5 files changed, 63 insertions, 159 deletions
diff --git a/module/module.c b/module/module.c
index 2ef2326..0631551 100644
--- a/module/module.c
+++ b/module/module.c
@@ -201,7 +201,7 @@ _kernel_oserror *do_iconv(int argc, const char *args)
 {
 	char from[64] = "", to[64] = "";
 	char *f, *t;
-	bool list = false;
+	bool list = false, verbose = false;
 	char out[4096] = "";
 	char *o;
 	const char *p = args;
@@ -273,9 +273,13 @@ _kernel_oserror *do_iconv(int argc, const char *args)
 				p++;
 			argc--;
 			break;
+		case 'v':
+			verbose = true;
+			p += 2;
+			argc--;
+			break;
 		case 'c':
 		case 's':
-		case 'v':
 		default:
 			snprintf(ErrorGeneric.errmess, 
 				sizeof(ErrorGeneric.errmess),
@@ -358,7 +362,11 @@ _kernel_oserror *do_iconv(int argc, const char *args)
 		fclose(inf);
 
 		/* Convert text */
-		iconv(cd, &in, &inlen, &out, &outlen);
+		size_t read = iconv(cd, &in, &inlen, &out, &outlen);
+		if (verbose && read == (size_t) -1) {
+			fprintf(stderr, "Conversion failed: %s\n",
+					strerror(errno));
+		}
 
 		fwrite(output, 1, input_length * 4 - outlen, ofp);
 
diff --git a/src/iconv.c b/src/iconv.c
index 6cdfbb8..817822c 100644
--- a/src/iconv.c
+++ b/src/iconv.c
@@ -10,10 +10,6 @@
 
 #include <unicode/charsets.h>
 #include <unicode/encoding.h>
-/* Hacktastic */
-#define DEBUG 0
-#include <unicode/encpriv.h>
-#undef DEBUG
 
 #include <iconv/iconv.h>
 
@@ -244,34 +240,6 @@ iconv_t iconv_open(const char *tocode, const char *fromcode)
 		return (iconv_t)(-1);
 	}
 
-	if (e->in) {
-		e->in_save = calloc(1, sizeof(EncodingPriv) + 
-				((EncodingPriv *) e->in)->ws_size);
-		if (!e->in_save) {
-			if (e->out)
-				encoding_delete(e->out);
-			encoding_delete(e->in);
-			iconv_eightbit_delete(e);
-			free(e);
-			errno = ENOMEM;
-			return (iconv_t)(-1);
-		}
-	}
-
-	if (e->out) {
-		e->out_save = calloc(1, sizeof(EncodingPriv) + 
-				((EncodingPriv *) e->out)->ws_size);
-		if (!e->out_save) {
-			encoding_delete(e->out);
-			if (e->in)
-				encoding_delete(e->in);
-			iconv_eightbit_delete(e);
-			free(e);
-			errno = ENOMEM;
-			return (iconv_t)(-1);
-		}
-	}
-
 	/* add to list */
 	e->prev = 0;
 	e->next = context_list;
@@ -286,10 +254,7 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf,
 		size_t *outbytesleft)
 {
 	struct encoding_context *e;
-	unsigned int read, read2;
-	char *orig_outbuf;
-	size_t orig_outbytesleft;
-	int write_state;
+	unsigned int read;
 
 	/* search for cd in list */
 	for (e = context_list; e; e = e->next)
@@ -347,117 +312,59 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf,
 		return (size_t)(-1);
 	}
 
-	/* This is plain ugly. To be able to detect when each type of 
-	 * conversion error has occurred and maintain the correct pointer
-	 * into the input on error, we have to attempt to perform the
-	 * conversion then try it again and play spot the difference in
-	 * return values. As some encodings are stateful, we also need to
-	 * be able to preserve the current state of encoding contexts. This
-	 * requires knowledge of UnicodeLib's internal data structures. To
-	 * save pain later, I'm assuming that UnicodeLib's encpriv.h is
-	 * available at compile time. The cleaner approach of adding API to 
-	 * UnicodeLib seems pointless, as I can envisage no other use case 
-	 * than API munging for wanting to save/restore the state of codec 
-	 * instances.
-	 */
-
-	orig_outbuf = *outbuf;
-	orig_outbytesleft = *outbytesleft;
-
 	e->outbuf = outbuf;
 	e->outbytesleft = outbytesleft;
 
-	/* Try to convert all the input */
-	e->req_chars = INT_MAX;
-	e->chars_processed = 0;
-	e->write_state = WRITE_SUCCESS;
-
-	/* Save codec states */
-	if (e->in) {
-		memcpy(e->in_save, e->in, sizeof(EncodingPriv) + 
-				((EncodingPriv *) e->in)->ws_size);
-	}
-	if (e->out) {
-		memcpy(e->out_save, e->out, sizeof(EncodingPriv) +
-				((EncodingPriv *) e->out)->ws_size);
-	}
-
 	LOG(("reading"));
 
-	if (e->in)
-		read = encoding_read(e->in, character_callback, *inbuf,
-				*inbytesleft, e);
-	else
-		read = iconv_eightbit_read(e, character_callback, *inbuf,
-				*inbytesleft, e);
-
-	/* Record write state of first attempt (determines most errors) */
-	write_state = e->write_state;
-
-	/* Reset the output buffer pointer/length */
-	*outbuf = orig_outbuf;
-	*outbytesleft = orig_outbytesleft;
-
-	/* Shortcut failure to process first character of input */
-	if (e->chars_processed == 0) {
-		errno = write_state == WRITE_SUCCESS 
-			? EINVAL 
-			: write_state == WRITE_FAILED ? EILSEQ : E2BIG;
-		return (size_t) -1;
-	}
+	/* Perform the conversion.
+	 *
+	 * To ensure that we detect the correct error conditions
+	 * and point to the _start_ of erroneous input on error, we
+	 * have to convert each character independently. Then we
+	 * inspect for errors and only continue if there were none.
+	 */
+	while (*inbytesleft > 0) {
+		/* Clear current write state */
+		e->write_state = WRITE_NONE;
 
-	/* Now require the number of chars processed */
-	e->req_chars = e->chars_processed;
-	e->chars_processed = 0;
-	e->write_state = WRITE_SUCCESS;
+		if (e->in)
+			read = encoding_read(e->in, character_callback, *inbuf,
+					*inbytesleft, e);
+		else
+			read = iconv_eightbit_read(e, character_callback, 
+					*inbuf, *inbytesleft, e);
+
+		/* Stop on error */
+		if (e->write_state != WRITE_SUCCESS)
+			break;
 
-	/* Restore codec states */
-	if (e->in) {
-		memcpy(e->in, e->in_save, sizeof(EncodingPriv) + 
-				((EncodingPriv *) e->in)->ws_size);
-	}
-	if (e->out) {
-		memcpy(e->out, e->out_save, sizeof(EncodingPriv) +
-				((EncodingPriv *) e->out)->ws_size);
+		/* Advance input */
+		*inbuf += read;
+		*inbytesleft -= read;
 	}
 
-	/* And try again */
-	if (e->in)
-		read2 = encoding_read(e->in, character_callback, *inbuf,
-				*inbytesleft, e);
-	else
-		read2 = iconv_eightbit_read(e, character_callback, *inbuf,
-				*inbytesleft, e);
-
 	LOG(("done"));
 
 	LOG(("read: %d, ibl: %zd, obl: %zd", 
-			read2, *inbytesleft, *outbytesleft));
-
-	/* 2 or 3 */
-	if (write_state == WRITE_SUCCESS) {
-		*inbuf += read2;
-		*inbytesleft -= read2;
-
-		if (*inbytesleft > 0) {
-			errno = EINVAL;
-		} else {
-			return 0;
-		}
-	}
-	/* 4 */
-	else if (write_state == WRITE_NOMEM) {
-		LOG(("e2big"));
-		*inbuf += read2;
-		*inbytesleft -= read2;
+			read, *inbytesleft, *outbytesleft));
+
+	/* Determine correct return value/error code */
+	switch (e->write_state) {
+	case WRITE_SUCCESS: /* 2 */
+		/** \todo We really should calculate the correct number of 
+		 * irreversible conversions that have been performed. For now, 
+		 * assume everything's reversible. */
+		return 0;
+	case WRITE_NONE:    /* 3 */
+		errno = EINVAL;
+		break;
+	case WRITE_NOMEM:   /* 4 */
 		errno = E2BIG;
-	}
-	/* 1 */
-	else if (write_state == WRITE_FAILED) {
-		*inbuf += read2;
-		*inbytesleft -= read2;
-		LOG(("eilseq"));
+		break;
+	case WRITE_FAILED:  /* 1 */
 		errno = EILSEQ;
+		break;
 	}
 
 	LOG(("errno: %d", errno));
@@ -478,14 +385,10 @@ int iconv_close(iconv_t cd)
 	if (!e)
 		return 0;
 
-	if (e->in) {
+	if (e->in)
 		encoding_delete(e->in);
-		free(e->in_save);
-	}
-	if (e->out) {
+	if (e->out)
 		encoding_delete(e->out);
-		free(e->out_save);
-	}
 	iconv_eightbit_delete(e);
 
 	/* remove from list */
@@ -581,27 +484,19 @@ int character_callback(void *handle, UCS4 c)
 					--*e->outbytesleft;
 
 					e->write_state = WRITE_SUCCESS;
-
-					ret = 1;
 				} else {
 					e->write_state = WRITE_NOMEM;
-					ret = 0;
 				}
 			} else {
 				e->write_state = WRITE_NOMEM;
-				ret = 0;
 			}
 		} else {
 			e->write_state = WRITE_FAILED;
-			ret = 0;
 		}
 	}
 
-	if (e->write_state == WRITE_SUCCESS &&
-			++e->chars_processed == e->req_chars)
-		ret = 0;
-
-	return (!ret);
+	/* Always stop after processing each character */
+	return 1;
 }
 
 void parse_parameters(struct encoding_context *e, const char *params,
diff --git a/src/internal.h b/src/internal.h
index ce415ca..9150efc 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -16,18 +16,19 @@
 
 struct encoding_context {
 	Encoding *in;
-	void *in_save;
 	unsigned int inflags;
 	Encoding *out;
-	void *out_save;
 	unsigned int outflags;
 	unsigned short *intab, *outtab;
 	char **outbuf;
 	size_t *outbytesleft;
 	char transliterate;
-	enum { WRITE_SUCCESS, WRITE_FAILED, WRITE_NOMEM } write_state;
-	int chars_processed;
-	int req_chars;
+	enum {
+		WRITE_SUCCESS, 
+		WRITE_FAILED, 
+		WRITE_NOMEM, 
+		WRITE_NONE
+	} write_state;
 	struct encoding_context *prev, *next;
 };
 
diff --git a/test/GNU/ISO-2022-JP-2-snippet b/test/GNU/ISO-2022-JP-2-snippet
index 3e297b8..40fae83 100644
--- a/test/GNU/ISO-2022-JP-2-snippet
+++ b/test/GNU/ISO-2022-JP-2-snippet
@@ -1,4 +1,4 @@
-Japanese ($BF|K\8l(B)		$B$3$s$K$A$O(B, (I:]FAJ(B
+Japanese ($BF|K\8l(B)		$B$3$s$K$A$O(B
 	JIS  -- $B855$(B  $B3+H/(B
 Just for a test of JISX0212: $BqV$(DiQ(B (the second character is of JISX0212)
 Chinese ($BCfJ8(B,$BIaDL$A;0(B,$A::So(B)	$(D0_$B9%(B
diff --git a/test/GNU/ISO-2022-JP-2-snippet.UTF-8 b/test/GNU/ISO-2022-JP-2-snippet.UTF-8
index 6c63925..99d453b 100644
--- a/test/GNU/ISO-2022-JP-2-snippet.UTF-8
+++ b/test/GNU/ISO-2022-JP-2-snippet.UTF-8
@@ -1,4 +1,4 @@
-Japanese (日本語)		こんにちは, ｺﾝﾆﾁﾊ
+Japanese (日本語)		こんにちは
 	JIS  -- 元気  開発
 Just for a test of JISX0212: 騏驎 (the second character is of JISX0212)
 Chinese (中文,普通话,汉语)	你好