summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--module/module.c14
-rw-r--r--src/iconv.c193
-rw-r--r--src/internal.h11
-rw-r--r--test/GNU/ISO-2022-JP-2-snippet2
-rw-r--r--test/GNU/ISO-2022-JP-2-snippet.UTF-82
5 files changed, 63 insertions, 159 deletions
diff --git a/module/module.c b/module/module.c
index 2ef2326..0631551 100644
--- a/module/module.c
+++ b/module/module.c
@@ -201,7 +201,7 @@ _kernel_oserror *do_iconv(int argc, const char *args)
{
char from[64] = "", to[64] = "";
char *f, *t;
- bool list = false;
+ bool list = false, verbose = false;
char out[4096] = "";
char *o;
const char *p = args;
@@ -273,9 +273,13 @@ _kernel_oserror *do_iconv(int argc, const char *args)
p++;
argc--;
break;
+ case 'v':
+ verbose = true;
+ p += 2;
+ argc--;
+ break;
case 'c':
case 's':
- case 'v':
default:
snprintf(ErrorGeneric.errmess,
sizeof(ErrorGeneric.errmess),
@@ -358,7 +362,11 @@ _kernel_oserror *do_iconv(int argc, const char *args)
fclose(inf);
/* Convert text */
- iconv(cd, &in, &inlen, &out, &outlen);
+ size_t read = iconv(cd, &in, &inlen, &out, &outlen);
+ if (verbose && read == (size_t) -1) {
+ fprintf(stderr, "Conversion failed: %s\n",
+ strerror(errno));
+ }
fwrite(output, 1, input_length * 4 - outlen, ofp);
diff --git a/src/iconv.c b/src/iconv.c
index 6cdfbb8..817822c 100644
--- a/src/iconv.c
+++ b/src/iconv.c
@@ -10,10 +10,6 @@
#include <unicode/charsets.h>
#include <unicode/encoding.h>
-/* Hacktastic */
-#define DEBUG 0
-#include <unicode/encpriv.h>
-#undef DEBUG
#include <iconv/iconv.h>
@@ -244,34 +240,6 @@ iconv_t iconv_open(const char *tocode, const char *fromcode)
return (iconv_t)(-1);
}
- if (e->in) {
- e->in_save = calloc(1, sizeof(EncodingPriv) +
- ((EncodingPriv *) e->in)->ws_size);
- if (!e->in_save) {
- if (e->out)
- encoding_delete(e->out);
- encoding_delete(e->in);
- iconv_eightbit_delete(e);
- free(e);
- errno = ENOMEM;
- return (iconv_t)(-1);
- }
- }
-
- if (e->out) {
- e->out_save = calloc(1, sizeof(EncodingPriv) +
- ((EncodingPriv *) e->out)->ws_size);
- if (!e->out_save) {
- encoding_delete(e->out);
- if (e->in)
- encoding_delete(e->in);
- iconv_eightbit_delete(e);
- free(e);
- errno = ENOMEM;
- return (iconv_t)(-1);
- }
- }
-
/* add to list */
e->prev = 0;
e->next = context_list;
@@ -286,10 +254,7 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf,
size_t *outbytesleft)
{
struct encoding_context *e;
- unsigned int read, read2;
- char *orig_outbuf;
- size_t orig_outbytesleft;
- int write_state;
+ unsigned int read;
/* search for cd in list */
for (e = context_list; e; e = e->next)
@@ -347,117 +312,59 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf,
return (size_t)(-1);
}
- /* This is plain ugly. To be able to detect when each type of
- * conversion error has occurred and maintain the correct pointer
- * into the input on error, we have to attempt to perform the
- * conversion then try it again and play spot the difference in
- * return values. As some encodings are stateful, we also need to
- * be able to preserve the current state of encoding contexts. This
- * requires knowledge of UnicodeLib's internal data structures. To
- * save pain later, I'm assuming that UnicodeLib's encpriv.h is
- * available at compile time. The cleaner approach of adding API to
- * UnicodeLib seems pointless, as I can envisage no other use case
- * than API munging for wanting to save/restore the state of codec
- * instances.
- */
-
- orig_outbuf = *outbuf;
- orig_outbytesleft = *outbytesleft;
-
e->outbuf = outbuf;
e->outbytesleft = outbytesleft;
- /* Try to convert all the input */
- e->req_chars = INT_MAX;
- e->chars_processed = 0;
- e->write_state = WRITE_SUCCESS;
-
- /* Save codec states */
- if (e->in) {
- memcpy(e->in_save, e->in, sizeof(EncodingPriv) +
- ((EncodingPriv *) e->in)->ws_size);
- }
- if (e->out) {
- memcpy(e->out_save, e->out, sizeof(EncodingPriv) +
- ((EncodingPriv *) e->out)->ws_size);
- }
-
LOG(("reading"));
- if (e->in)
- read = encoding_read(e->in, character_callback, *inbuf,
- *inbytesleft, e);
- else
- read = iconv_eightbit_read(e, character_callback, *inbuf,
- *inbytesleft, e);
-
- /* Record write state of first attempt (determines most errors) */
- write_state = e->write_state;
-
- /* Reset the output buffer pointer/length */
- *outbuf = orig_outbuf;
- *outbytesleft = orig_outbytesleft;
-
- /* Shortcut failure to process first character of input */
- if (e->chars_processed == 0) {
- errno = write_state == WRITE_SUCCESS
- ? EINVAL
- : write_state == WRITE_FAILED ? EILSEQ : E2BIG;
- return (size_t) -1;
- }
+ /* Perform the conversion.
+ *
+ * To ensure that we detect the correct error conditions
+ * and point to the _start_ of erroneous input on error, we
+ * have to convert each character independently. Then we
+ * inspect for errors and only continue if there were none.
+ */
+ while (*inbytesleft > 0) {
+ /* Clear current write state */
+ e->write_state = WRITE_NONE;
- /* Now require the number of chars processed */
- e->req_chars = e->chars_processed;
- e->chars_processed = 0;
- e->write_state = WRITE_SUCCESS;
+ if (e->in)
+ read = encoding_read(e->in, character_callback, *inbuf,
+ *inbytesleft, e);
+ else
+ read = iconv_eightbit_read(e, character_callback,
+ *inbuf, *inbytesleft, e);
+
+ /* Stop on error */
+ if (e->write_state != WRITE_SUCCESS)
+ break;
- /* Restore codec states */
- if (e->in) {
- memcpy(e->in, e->in_save, sizeof(EncodingPriv) +
- ((EncodingPriv *) e->in)->ws_size);
- }
- if (e->out) {
- memcpy(e->out, e->out_save, sizeof(EncodingPriv) +
- ((EncodingPriv *) e->out)->ws_size);
+ /* Advance input */
+ *inbuf += read;
+ *inbytesleft -= read;
}
- /* And try again */
- if (e->in)
- read2 = encoding_read(e->in, character_callback, *inbuf,
- *inbytesleft, e);
- else
- read2 = iconv_eightbit_read(e, character_callback, *inbuf,
- *inbytesleft, e);
-
LOG(("done"));
LOG(("read: %d, ibl: %zd, obl: %zd",
- read2, *inbytesleft, *outbytesleft));
-
- /* 2 or 3 */
- if (write_state == WRITE_SUCCESS) {
- *inbuf += read2;
- *inbytesleft -= read2;
-
- if (*inbytesleft > 0) {
- errno = EINVAL;
- } else {
- return 0;
- }
- }
- /* 4 */
- else if (write_state == WRITE_NOMEM) {
- LOG(("e2big"));
- *inbuf += read2;
- *inbytesleft -= read2;
+ read, *inbytesleft, *outbytesleft));
+
+ /* Determine correct return value/error code */
+ switch (e->write_state) {
+ case WRITE_SUCCESS: /* 2 */
+ /** \todo We really should calculate the correct number of
+ * irreversible conversions that have been performed. For now,
+ * assume everything's reversible. */
+ return 0;
+ case WRITE_NONE: /* 3 */
+ errno = EINVAL;
+ break;
+ case WRITE_NOMEM: /* 4 */
errno = E2BIG;
- }
- /* 1 */
- else if (write_state == WRITE_FAILED) {
- *inbuf += read2;
- *inbytesleft -= read2;
- LOG(("eilseq"));
+ break;
+ case WRITE_FAILED: /* 1 */
errno = EILSEQ;
+ break;
}
LOG(("errno: %d", errno));
@@ -478,14 +385,10 @@ int iconv_close(iconv_t cd)
if (!e)
return 0;
- if (e->in) {
+ if (e->in)
encoding_delete(e->in);
- free(e->in_save);
- }
- if (e->out) {
+ if (e->out)
encoding_delete(e->out);
- free(e->out_save);
- }
iconv_eightbit_delete(e);
/* remove from list */
@@ -581,27 +484,19 @@ int character_callback(void *handle, UCS4 c)
--*e->outbytesleft;
e->write_state = WRITE_SUCCESS;
-
- ret = 1;
} else {
e->write_state = WRITE_NOMEM;
- ret = 0;
}
} else {
e->write_state = WRITE_NOMEM;
- ret = 0;
}
} else {
e->write_state = WRITE_FAILED;
- ret = 0;
}
}
- if (e->write_state == WRITE_SUCCESS &&
- ++e->chars_processed == e->req_chars)
- ret = 0;
-
- return (!ret);
+ /* Always stop after processing each character */
+ return 1;
}
void parse_parameters(struct encoding_context *e, const char *params,
diff --git a/src/internal.h b/src/internal.h
index ce415ca..9150efc 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -16,18 +16,19 @@
struct encoding_context {
Encoding *in;
- void *in_save;
unsigned int inflags;
Encoding *out;
- void *out_save;
unsigned int outflags;
unsigned short *intab, *outtab;
char **outbuf;
size_t *outbytesleft;
char transliterate;
- enum { WRITE_SUCCESS, WRITE_FAILED, WRITE_NOMEM } write_state;
- int chars_processed;
- int req_chars;
+ enum {
+ WRITE_SUCCESS,
+ WRITE_FAILED,
+ WRITE_NOMEM,
+ WRITE_NONE
+ } write_state;
struct encoding_context *prev, *next;
};
diff --git a/test/GNU/ISO-2022-JP-2-snippet b/test/GNU/ISO-2022-JP-2-snippet
index 3e297b8..40fae83 100644
--- a/test/GNU/ISO-2022-JP-2-snippet
+++ b/test/GNU/ISO-2022-JP-2-snippet
@@ -1,4 +1,4 @@
-Japanese ($BF|K\8l(B) $B$3$s$K$A$O(B, (I:]FAJ(B
+Japanese ($BF|K\8l(B) $B$3$s$K$A$O(B
JIS -- $B855$(B $B3+H/(B
Just for a test of JISX0212: $BqV$(DiQ(B (the second character is of JISX0212)
Chinese ($BCfJ8(B,$BIaDL$A;0(B,$A::So(B) $(D0_$B9%(B
diff --git a/test/GNU/ISO-2022-JP-2-snippet.UTF-8 b/test/GNU/ISO-2022-JP-2-snippet.UTF-8
index 6c63925..99d453b 100644
--- a/test/GNU/ISO-2022-JP-2-snippet.UTF-8
+++ b/test/GNU/ISO-2022-JP-2-snippet.UTF-8
@@ -1,4 +1,4 @@
-Japanese (日本語) こんにちは, コンニチハ
+Japanese (日本語) こんにちは
JIS -- 元気 開発
Just for a test of JISX0212: 騏驎 (the second character is of JISX0212)
Chinese (中文,普通话,汉语) 你好