7 files changed, 279 insertions, 6 deletions
diff --git a/src/charset/codecs/codec_8859.c b/src/charset/codecs/codec_8859.c
index ef63d4f..5384729 100644
--- a/src/charset/codecs/codec_8859.c
+++ b/src/charset/codecs/codec_8859.c
@@ -563,10 +563,10 @@ parserutils_error charset_8859_to_ucs4(charset_8859_codec *c,
 	if (*s < 0x80) {
 		out = *s;
 	} else if (*s >= 0xA0) {
-		if (c->table[*s] == 0xFFFF)
+		if (c->table[*s - 0xA0] == 0xFFFF)
 			return PARSERUTILS_INVALID;
 
-		out = c->table[*s];
+		out = c->table[*s - 0xA0];
 	} else {
 		return PARSERUTILS_INVALID;
 	}
diff --git a/src/charset/codecs/codec_ext8.c b/src/charset/codecs/codec_ext8.c
index d85f87b..5d21024 100644
--- a/src/charset/codecs/codec_ext8.c
+++ b/src/charset/codecs/codec_ext8.c
@@ -557,10 +557,10 @@ parserutils_error charset_ext8_to_ucs4(charset_ext8_codec *c,
 	if (*s < 0x80) {
 		out = *s;
 	} else {
-		if (c->table[*s] == 0xFFFF)
+		if (c->table[*s - 0x80] == 0xFFFF)
 			return PARSERUTILS_INVALID;
 
-		out = c->table[*s];
+		out = c->table[*s - 0x80];
 	}
 
 	*ucs4 = out;
diff --git a/test/INDEX b/test/INDEX
index 927a240..a78aee5 100644
--- a/test/INDEX
+++ b/test/INDEX
@@ -7,6 +7,7 @@ parserutils	Library initialisation/finalisation
 aliases		Encoding alias handling
 cscodec-utf8	UTF-8 charset codec implementation	cscodec-utf8
 cscodec-utf16	UTF-16 charset codec implementation	cscodec-utf16
+cscodec-ext8	Extended 8bit charset codec		cscodec-ext8
 dict		Dictionary handling
 rbtree		Red-black tree implementation
 filter		Input stream filtering
diff --git a/test/Makefile b/test/Makefile
index a6a1161..1aaf0c0 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -35,8 +35,8 @@ d              := $(DIR)
 override CFLAGS := $(CFLAGS) -I$(TOP)/src/ -I$(d)
 
 # Tests
-TESTS_$(d) := aliases cscodec-utf8 cscodec-utf16 charset dict filter \
-	inputstream parserutils rbtree
+TESTS_$(d) := aliases cscodec-ext8 cscodec-utf8 cscodec-utf16 charset \
+	dict filter inputstream parserutils rbtree
 TESTS_$(d) := $(TESTS_$(d)) regression/cscodec-segv regression/filter-segv \
 	regression/stream-nomem
 
diff --git a/test/cscodec-ext8.c b/test/cscodec-ext8.c
new file mode 100644
index 0000000..14c641f
--- /dev/null
+++ b/test/cscodec-ext8.c
@@ -0,0 +1,267 @@
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "charset/charset.h"
+#include <parserutils/charset/codec.h>
+
+#include "utils/utils.h"
+
+#include "testutils.h"
+
+typedef struct line_ctx {
+	parserutils_charset_codec *codec;
+
+	size_t buflen;
+	size_t bufused;
+	uint8_t *buf;
+	size_t explen;
+	size_t expused;
+	uint8_t *exp;
+
+	bool hadenc;
+	bool indata;
+	bool inexp;
+
+	parserutils_error exp_ret;
+
+	enum { ENCODE, DECODE, BOTH } dir;
+} line_ctx;
+
+static bool handle_line(const char *data, size_t datalen, void *pw);
+static void run_test(line_ctx *ctx);
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+	UNUSED(pw);
+
+	return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+	line_ctx ctx;
+
+	if (argc != 3) {
+		printf("Usage: %s <aliases_file> <filename>\n", argv[0]);
+		return 1;
+	}
+
+	assert(parserutils_charset_initialise(argv[1], myrealloc, NULL) == 
+			PARSERUTILS_OK);
+
+	assert(parserutils_charset_codec_create("NATS-SEFI-ADD",
+			myrealloc, NULL) == NULL);
+
+	ctx.buflen = parse_filesize(argv[2]);
+	if (ctx.buflen == 0)
+		return 1;
+
+	ctx.buf = malloc(2 * ctx.buflen);
+	if (ctx.buf == NULL) {
+		printf("Failed allocating %u bytes\n",
+				(unsigned int) ctx.buflen);
+		return 1;
+	}
+
+	ctx.exp = ctx.buf + ctx.buflen;
+	ctx.explen = ctx.buflen;
+
+	ctx.buf[0] = '\0';
+	ctx.exp[0] = '\0';
+	ctx.bufused = 0;
+	ctx.expused = 0;
+	ctx.hadenc = false;
+	ctx.indata = false;
+	ctx.inexp = false;
+	ctx.exp_ret = PARSERUTILS_OK;
+
+	assert(parse_testfile(argv[2], handle_line, &ctx) == true);
+
+	/* and run final test */
+	if (ctx.bufused > 0 && ctx.buf[ctx.bufused - 1] == '\n')
+		ctx.bufused -= 1;
+
+	if (ctx.expused > 0 && ctx.exp[ctx.expused - 1] == '\n')
+		ctx.expused -= 1;
+
+	run_test(&ctx);
+
+	free(ctx.buf);
+
+	parserutils_charset_codec_destroy(ctx.codec);
+
+	assert(parserutils_charset_finalise(myrealloc, NULL) == 
+			PARSERUTILS_OK);
+
+	printf("PASS\n");
+
+	return 0;
+}
+
+bool handle_line(const char *data, size_t datalen, void *pw)
+{
+	line_ctx *ctx = (line_ctx *) pw;
+
+	if (data[0] == '#') {
+		if (ctx->inexp) {
+			/* This marks end of testcase, so run it */
+
+			if (ctx->buf[ctx->bufused - 1] == '\n')
+				ctx->bufused -= 1;
+
+			if (ctx->exp[ctx->expused - 1] == '\n')
+				ctx->expused -= 1;
+
+			run_test(ctx);
+
+			ctx->buf[0] = '\0';
+			ctx->exp[0] = '\0';
+			ctx->bufused = 0;
+			ctx->expused = 0;
+			ctx->exp_ret = PARSERUTILS_OK;
+		}
+
+		if (strncasecmp(data+1, "data", 4) == 0) {
+			parserutils_charset_codec_optparams params;
+			const char *ptr = data + 6;
+
+			ctx->indata = true;
+			ctx->inexp = false;
+
+			if (strncasecmp(ptr, "decode", 6) == 0)
+				ctx->dir = DECODE;
+			else if (strncasecmp(ptr, "encode", 6) == 0)
+				ctx->dir = ENCODE;
+			else
+				ctx->dir = BOTH;
+
+			ptr += 7;
+
+			if (strncasecmp(ptr, "LOOSE", 5) == 0) {
+				params.error_mode.mode =
+					PARSERUTILS_CHARSET_CODEC_ERROR_LOOSE;
+				ptr += 6;
+			} else if (strncasecmp(ptr, "STRICT", 6) == 0) {
+				params.error_mode.mode =
+					PARSERUTILS_CHARSET_CODEC_ERROR_STRICT;
+				ptr += 7;
+			} else {
+				params.error_mode.mode =
+					PARSERUTILS_CHARSET_CODEC_ERROR_TRANSLIT;
+				ptr += 9;
+			}
+
+			assert(parserutils_charset_codec_setopt(ctx->codec,
+				PARSERUTILS_CHARSET_CODEC_ERROR_MODE,
+				(parserutils_charset_codec_optparams *) &params)
+				== PARSERUTILS_OK);
+		} else if (strncasecmp(data+1, "expected", 8) == 0) {
+			ctx->indata = false;
+			ctx->inexp = true;
+
+			ctx->exp_ret = parserutils_error_from_string(data + 10,
+					datalen - 10 - 1 /* \n */);
+		} else if (strncasecmp(data+1, "reset", 5) == 0) {
+			ctx->indata = false;
+			ctx->inexp = false;
+
+			parserutils_charset_codec_reset(ctx->codec);
+		} else if (strncasecmp(data+1, "enc", 3) == 0) {
+			const char *enc = data + 5;
+			const char *end;
+
+			for (end = enc; !isspace(*end); end++)
+				;
+
+			char enc_name[end - enc + 1];
+			memcpy(enc_name, enc, end - enc);
+			enc_name[end - enc] = 0;
+
+			ctx->codec = parserutils_charset_codec_create(enc_name,
+					myrealloc, NULL);
+			assert(ctx->codec != NULL);
+
+			ctx->hadenc = true;
+		}
+	} else {
+		if (ctx->indata) {
+			memcpy(ctx->buf + ctx->bufused, data, datalen);
+			ctx->bufused += datalen;
+		}
+		if (ctx->inexp) {
+			memcpy(ctx->exp + ctx->expused, data, datalen);
+			ctx->expused += datalen;
+		}
+	}
+
+	return true;
+}
+
+void run_test(line_ctx *ctx)
+{
+	static int testnum;
+	size_t destlen = ctx->bufused * 4;
+	uint8_t dest[destlen];
+	uint8_t *pdest = dest;
+	const uint8_t *psrc = ctx->buf;
+	size_t srclen = ctx->bufused;
+	size_t i;
+
+	if (ctx->dir == DECODE) {
+		assert(parserutils_charset_codec_decode(ctx->codec,
+				&psrc, &srclen,
+				&pdest, &destlen) == ctx->exp_ret);
+	} else if (ctx->dir == ENCODE) {
+		assert(parserutils_charset_codec_encode(ctx->codec,
+				&psrc, &srclen,
+				&pdest, &destlen) == ctx->exp_ret);
+	} else {
+		size_t templen = ctx->bufused * 4;
+		uint8_t temp[templen];
+		uint8_t *ptemp = temp;
+		const uint8_t *ptemp2;
+		size_t templen2;
+
+		assert(parserutils_charset_codec_decode(ctx->codec,
+				&psrc, &srclen,
+				&ptemp, &templen) == ctx->exp_ret);
+		/* \todo currently there is no way to specify the number of
+		   consumed & produced data in case of a deliberate bad input
+		   data set.  */
+		if (ctx->exp_ret == PARSERUTILS_OK) {
+			assert(temp + (ctx->bufused * 4 - templen) == ptemp);
+		}
+
+		ptemp2 = temp;
+		templen2 = ctx->bufused * 4 - templen;
+		assert(parserutils_charset_codec_encode(ctx->codec,
+				&ptemp2, &templen2,
+				&pdest, &destlen) == ctx->exp_ret);
+		if (ctx->exp_ret == PARSERUTILS_OK) {
+			assert(templen2 == 0);
+			assert(temp + (ctx->bufused * 4 - templen) == ptemp2);
+		}
+	}
+	if (ctx->exp_ret == PARSERUTILS_OK) {
+		assert(srclen == 0);
+		assert(ctx->buf + ctx->bufused == psrc);
+		assert(dest + (ctx->bufused * 4 - destlen) == pdest);
+	}
+
+	printf("%d: Read '", ++testnum);
+	for (i = 0; i < ctx->expused; i++) {
+		printf("%c%c ", "0123456789abcdef"[(dest[i] >> 4) & 0xf],
+				"0123456789abcdef"[dest[i] & 0xf]);
+	}
+	printf("' Expected '");
+	for (i = 0; i < ctx->expused; i++) {
+		printf("%c%c ", "0123456789abcdef"[(ctx->exp[i] >> 4) & 0xf],
+				"0123456789abcdef"[ctx->exp[i] & 0xf]);
+	}
+	printf("'\n");
+
+	assert(pdest == dest + ctx->expused);
+	assert(memcmp(dest, ctx->exp, ctx->expused) == 0);
+}
+
diff --git a/test/data/cscodec-ext8/INDEX b/test/data/cscodec-ext8/INDEX
new file mode 100644
index 0000000..51f04bd
--- /dev/null
+++ b/test/data/cscodec-ext8/INDEX
@@ -0,0 +1,5 @@
+# Index file for charset codec tests
+#
+# Test			Description
+
+simple.dat		Simple tests, designed to validate testdriver
diff --git a/test/data/cscodec-ext8/simple.dat b/test/data/cscodec-ext8/simple.dat
new file mode 100644
index 0000000..8b11d40
--- /dev/null
+++ b/test/data/cscodec-ext8/simple.dat