summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2008-09-05 10:50:57 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2008-09-05 10:50:57 +0000
commitcd73772c52b6f15a5355f016d309d28c4fb61343 (patch)
tree0b06ecaa190c705f45d156a5163aa144465388ab
parenta461f2657f586c3fe765ad0f77f53c5a6927b04d (diff)
downloadlibparserutils-cd73772c52b6f15a5355f016d309d28c4fb61343.tar.gz
libparserutils-cd73772c52b6f15a5355f016d309d28c4fb61343.tar.bz2
ISO-8859-n test data
svn path=/trunk/libparserutils/; revision=5247
-rw-r--r--test/INDEX1
-rw-r--r--test/Makefile4
-rw-r--r--test/cscodec-8859.c268
-rw-r--r--test/data/cscodec-8859/1.datbin0 -> 2346 bytes
-rw-r--r--test/data/cscodec-8859/10.datbin0 -> 2347 bytes
-rw-r--r--test/data/cscodec-8859/11.datbin0 -> 2267 bytes
-rw-r--r--test/data/cscodec-8859/13.datbin0 -> 2347 bytes
-rw-r--r--test/data/cscodec-8859/14.datbin0 -> 2347 bytes
-rw-r--r--test/data/cscodec-8859/15.datbin0 -> 2347 bytes
-rw-r--r--test/data/cscodec-8859/16.datbin0 -> 2347 bytes
-rw-r--r--test/data/cscodec-8859/2.datbin0 -> 2346 bytes
-rw-r--r--test/data/cscodec-8859/3.datbin0 -> 2266 bytes
-rw-r--r--test/data/cscodec-8859/4.datbin0 -> 2346 bytes
-rw-r--r--test/data/cscodec-8859/5.datbin0 -> 2346 bytes
-rw-r--r--test/data/cscodec-8859/6.datbin0 -> 1896 bytes
-rw-r--r--test/data/cscodec-8859/7.datbin0 -> 2316 bytes
-rw-r--r--test/data/cscodec-8859/8.datbin0 -> 1986 bytes
-rw-r--r--test/data/cscodec-8859/9.datbin0 -> 2346 bytes
-rw-r--r--test/data/cscodec-8859/INDEX19
19 files changed, 290 insertions, 2 deletions
diff --git a/test/INDEX b/test/INDEX
index a78aee5..ef92b47 100644
--- a/test/INDEX
+++ b/test/INDEX
@@ -8,6 +8,7 @@ aliases Encoding alias handling
cscodec-utf8 UTF-8 charset codec implementation cscodec-utf8
cscodec-utf16 UTF-16 charset codec implementation cscodec-utf16
cscodec-ext8 Extended 8bit charset codec cscodec-ext8
+cscodec-8859 ISO-8859-n codec cscodec-8859
dict Dictionary handling
rbtree Red-black tree implementation
filter Input stream filtering
diff --git a/test/Makefile b/test/Makefile
index 1aaf0c0..a8177be 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -35,8 +35,8 @@ d := $(DIR)
override CFLAGS := $(CFLAGS) -I$(TOP)/src/ -I$(d)
# Tests
-TESTS_$(d) := aliases cscodec-ext8 cscodec-utf8 cscodec-utf16 charset \
- dict filter inputstream parserutils rbtree
+TESTS_$(d) := aliases cscodec-8859 cscodec-ext8 cscodec-utf8 cscodec-utf16 \
+ charset dict filter inputstream parserutils rbtree
TESTS_$(d) := $(TESTS_$(d)) regression/cscodec-segv regression/filter-segv \
regression/stream-nomem
diff --git a/test/cscodec-8859.c b/test/cscodec-8859.c
new file mode 100644
index 0000000..6378cd4
--- /dev/null
+++ b/test/cscodec-8859.c
@@ -0,0 +1,268 @@
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "charset/charset.h"
+#include <parserutils/charset/codec.h>
+
+#include "utils/utils.h"
+
+#include "testutils.h"
+
+typedef struct line_ctx {
+ parserutils_charset_codec *codec;
+
+ size_t buflen;
+ size_t bufused;
+ uint8_t *buf;
+ size_t explen;
+ size_t expused;
+ uint8_t *exp;
+
+ bool hadenc;
+ bool indata;
+ bool inexp;
+
+ parserutils_error exp_ret;
+
+ enum { ENCODE, DECODE, BOTH } dir;
+} line_ctx;
+
+static bool handle_line(const char *data, size_t datalen, void *pw);
+static void run_test(line_ctx *ctx);
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+ UNUSED(pw);
+
+ return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+ line_ctx ctx;
+
+ if (argc != 3) {
+ printf("Usage: %s <aliases_file> <filename>\n", argv[0]);
+ return 1;
+ }
+
+ assert(parserutils_charset_initialise(argv[1], myrealloc, NULL) ==
+ PARSERUTILS_OK);
+
+ assert(parserutils_charset_codec_create("NATS-SEFI-ADD",
+ myrealloc, NULL) == NULL);
+
+ ctx.buflen = parse_filesize(argv[2]);
+ if (ctx.buflen == 0)
+ return 1;
+
+ ctx.buf = malloc(2 * ctx.buflen);
+ if (ctx.buf == NULL) {
+ printf("Failed allocating %u bytes\n",
+ (unsigned int) ctx.buflen);
+ return 1;
+ }
+
+ ctx.exp = ctx.buf + ctx.buflen;
+ ctx.explen = ctx.buflen;
+
+ ctx.buf[0] = '\0';
+ ctx.exp[0] = '\0';
+ ctx.bufused = 0;
+ ctx.expused = 0;
+ ctx.hadenc = false;
+ ctx.indata = false;
+ ctx.inexp = false;
+ ctx.exp_ret = PARSERUTILS_OK;
+
+ assert(parse_testfile(argv[2], handle_line, &ctx) == true);
+
+ /* and run final test */
+ if (ctx.bufused > 0 && ctx.buf[ctx.bufused - 1] == '\n')
+ ctx.bufused -= 1;
+
+ if (ctx.expused > 0 && ctx.exp[ctx.expused - 1] == '\n')
+ ctx.expused -= 1;
+
+ run_test(&ctx);
+
+ free(ctx.buf);
+
+ parserutils_charset_codec_destroy(ctx.codec);
+
+ assert(parserutils_charset_finalise(myrealloc, NULL) ==
+ PARSERUTILS_OK);
+
+ printf("PASS\n");
+
+ return 0;
+}
+
+bool handle_line(const char *data, size_t datalen, void *pw)
+{
+ line_ctx *ctx = (line_ctx *) pw;
+
+ if (data[0] == '#') {
+ if (ctx->inexp) {
+ /* This marks end of testcase, so run it */
+
+ if (ctx->buf[ctx->bufused - 1] == '\n')
+ ctx->bufused -= 1;
+
+ if (ctx->exp[ctx->expused - 1] == '\n')
+ ctx->expused -= 1;
+
+ run_test(ctx);
+
+ ctx->buf[0] = '\0';
+ ctx->exp[0] = '\0';
+ ctx->bufused = 0;
+ ctx->expused = 0;
+ ctx->exp_ret = PARSERUTILS_OK;
+ }
+
+ if (strncasecmp(data+1, "data", 4) == 0) {
+ parserutils_charset_codec_optparams params;
+ const char *ptr = data + 6;
+
+ ctx->indata = true;
+ ctx->inexp = false;
+
+ if (strncasecmp(ptr, "decode", 6) == 0)
+ ctx->dir = DECODE;
+ else if (strncasecmp(ptr, "encode", 6) == 0)
+ ctx->dir = ENCODE;
+ else
+ ctx->dir = BOTH;
+
+ ptr += 7;
+
+ if (strncasecmp(ptr, "LOOSE", 5) == 0) {
+ params.error_mode.mode =
+ PARSERUTILS_CHARSET_CODEC_ERROR_LOOSE;
+ ptr += 6;
+ } else if (strncasecmp(ptr, "STRICT", 6) == 0) {
+ params.error_mode.mode =
+ PARSERUTILS_CHARSET_CODEC_ERROR_STRICT;
+ ptr += 7;
+ } else {
+ params.error_mode.mode =
+ PARSERUTILS_CHARSET_CODEC_ERROR_TRANSLIT;
+ ptr += 9;
+ }
+
+ assert(parserutils_charset_codec_setopt(ctx->codec,
+ PARSERUTILS_CHARSET_CODEC_ERROR_MODE,
+ (parserutils_charset_codec_optparams *) &params)
+ == PARSERUTILS_OK);
+ } else if (strncasecmp(data+1, "expected", 8) == 0) {
+ ctx->indata = false;
+ ctx->inexp = true;
+
+ ctx->exp_ret = parserutils_error_from_string(data + 10,
+ datalen - 10 - 1 /* \n */);
+ } else if (strncasecmp(data+1, "reset", 5) == 0) {
+ ctx->indata = false;
+ ctx->inexp = false;
+
+ parserutils_charset_codec_reset(ctx->codec);
+ } else if (strncasecmp(data+1, "enc", 3) == 0) {
+ const char *enc = data + 5;
+ const char *end;
+
+ for (end = enc; !isspace(*end); end++)
+ ;
+
+ char enc_name[end - enc + 1];
+ memcpy(enc_name, enc, end - enc);
+ enc_name[end - enc] = 0;
+
+ ctx->codec = parserutils_charset_codec_create(enc_name,
+ myrealloc, NULL);
+ assert(ctx->codec != NULL);
+
+ ctx->hadenc = true;
+ }
+ } else {
+ if (ctx->indata) {
+ memcpy(ctx->buf + ctx->bufused, data, datalen);
+ ctx->bufused += datalen;
+ }
+ if (ctx->inexp) {
+ memcpy(ctx->exp + ctx->expused, data, datalen);
+ ctx->expused += datalen;
+ }
+ }
+
+ return true;
+}
+
+void run_test(line_ctx *ctx)
+{
+ static int testnum;
+ size_t destlen = ctx->bufused * 4;
+ uint8_t dest[destlen];
+ uint8_t *pdest = dest;
+ const uint8_t *psrc = ctx->buf;
+ size_t srclen = ctx->bufused;
+ size_t i;
+
+ if (ctx->dir == DECODE) {
+ assert(parserutils_charset_codec_decode(ctx->codec,
+ &psrc, &srclen,
+ &pdest, &destlen) == ctx->exp_ret);
+ } else if (ctx->dir == ENCODE) {
+ assert(parserutils_charset_codec_encode(ctx->codec,
+ &psrc, &srclen,
+ &pdest, &destlen) == ctx->exp_ret);
+ } else {
+ size_t templen = ctx->bufused * 4;
+ uint8_t temp[templen];
+ uint8_t *ptemp = temp;
+ const uint8_t *ptemp2;
+ size_t templen2;
+
+ assert(parserutils_charset_codec_decode(ctx->codec,
+ &psrc, &srclen,
+ &ptemp, &templen) == ctx->exp_ret);
+ /* \todo currently there is no way to specify the number of
+ consumed & produced data in case of a deliberate bad input
+ data set. */
+ if (ctx->exp_ret == PARSERUTILS_OK) {
+ assert(temp + (ctx->bufused * 4 - templen) == ptemp);
+ }
+
+ ptemp2 = temp;
+ templen2 = ctx->bufused * 4 - templen;
+ assert(parserutils_charset_codec_encode(ctx->codec,
+ &ptemp2, &templen2,
+ &pdest, &destlen) == ctx->exp_ret);
+ if (ctx->exp_ret == PARSERUTILS_OK) {
+ assert(templen2 == 0);
+ assert(temp + (ctx->bufused * 4 - templen) == ptemp2);
+ }
+ }
+ if (ctx->exp_ret == PARSERUTILS_OK) {
+ assert(srclen == 0);
+ assert(ctx->buf + ctx->bufused == psrc);
+ assert(dest + (ctx->bufused * 4 - destlen) == pdest);
+ assert(ctx->bufused * 4 - destlen == ctx->expused);
+ }
+
+ printf("%d: Read '", ++testnum);
+ for (i = 0; i < ctx->expused; i++) {
+ printf("%c%c ", "0123456789abcdef"[(dest[i] >> 4) & 0xf],
+ "0123456789abcdef"[dest[i] & 0xf]);
+ }
+ printf("' Expected '");
+ for (i = 0; i < ctx->expused; i++) {
+ printf("%c%c ", "0123456789abcdef"[(ctx->exp[i] >> 4) & 0xf],
+ "0123456789abcdef"[ctx->exp[i] & 0xf]);
+ }
+ printf("'\n");
+
+ assert(pdest == dest + ctx->expused);
+ assert(memcmp(dest, ctx->exp, ctx->expused) == 0);
+}
+
diff --git a/test/data/cscodec-8859/1.dat b/test/data/cscodec-8859/1.dat
new file mode 100644
index 0000000..34aa0de
--- /dev/null
+++ b/test/data/cscodec-8859/1.dat
Binary files differ
diff --git a/test/data/cscodec-8859/10.dat b/test/data/cscodec-8859/10.dat
new file mode 100644
index 0000000..5f446dd
--- /dev/null
+++ b/test/data/cscodec-8859/10.dat
Binary files differ
diff --git a/test/data/cscodec-8859/11.dat b/test/data/cscodec-8859/11.dat
new file mode 100644
index 0000000..35fb645
--- /dev/null
+++ b/test/data/cscodec-8859/11.dat
Binary files differ
diff --git a/test/data/cscodec-8859/13.dat b/test/data/cscodec-8859/13.dat
new file mode 100644
index 0000000..bf8a360
--- /dev/null
+++ b/test/data/cscodec-8859/13.dat
Binary files differ
diff --git a/test/data/cscodec-8859/14.dat b/test/data/cscodec-8859/14.dat
new file mode 100644
index 0000000..900a482
--- /dev/null
+++ b/test/data/cscodec-8859/14.dat
Binary files differ
diff --git a/test/data/cscodec-8859/15.dat b/test/data/cscodec-8859/15.dat
new file mode 100644
index 0000000..d2705a4
--- /dev/null
+++ b/test/data/cscodec-8859/15.dat
Binary files differ
diff --git a/test/data/cscodec-8859/16.dat b/test/data/cscodec-8859/16.dat
new file mode 100644
index 0000000..ca17530
--- /dev/null
+++ b/test/data/cscodec-8859/16.dat
Binary files differ
diff --git a/test/data/cscodec-8859/2.dat b/test/data/cscodec-8859/2.dat
new file mode 100644
index 0000000..593f8a5
--- /dev/null
+++ b/test/data/cscodec-8859/2.dat
Binary files differ
diff --git a/test/data/cscodec-8859/3.dat b/test/data/cscodec-8859/3.dat
new file mode 100644
index 0000000..489ccc9
--- /dev/null
+++ b/test/data/cscodec-8859/3.dat
Binary files differ
diff --git a/test/data/cscodec-8859/4.dat b/test/data/cscodec-8859/4.dat
new file mode 100644
index 0000000..a36bca7
--- /dev/null
+++ b/test/data/cscodec-8859/4.dat
Binary files differ
diff --git a/test/data/cscodec-8859/5.dat b/test/data/cscodec-8859/5.dat
new file mode 100644
index 0000000..6d2cb19
--- /dev/null
+++ b/test/data/cscodec-8859/5.dat
Binary files differ
diff --git a/test/data/cscodec-8859/6.dat b/test/data/cscodec-8859/6.dat
new file mode 100644
index 0000000..eee176d
--- /dev/null
+++ b/test/data/cscodec-8859/6.dat
Binary files differ
diff --git a/test/data/cscodec-8859/7.dat b/test/data/cscodec-8859/7.dat
new file mode 100644
index 0000000..43664fc
--- /dev/null
+++ b/test/data/cscodec-8859/7.dat
Binary files differ
diff --git a/test/data/cscodec-8859/8.dat b/test/data/cscodec-8859/8.dat
new file mode 100644
index 0000000..51dfc58
--- /dev/null
+++ b/test/data/cscodec-8859/8.dat
Binary files differ
diff --git a/test/data/cscodec-8859/9.dat b/test/data/cscodec-8859/9.dat
new file mode 100644
index 0000000..179046a
--- /dev/null
+++ b/test/data/cscodec-8859/9.dat
Binary files differ
diff --git a/test/data/cscodec-8859/INDEX b/test/data/cscodec-8859/INDEX
new file mode 100644
index 0000000..5a14e8f
--- /dev/null
+++ b/test/data/cscodec-8859/INDEX
@@ -0,0 +1,19 @@
+# Index file for charset codec tests
+#
+# Test Description
+
+1.dat ISO-8859-1
+2.dat ISO-8859-2
+3.dat ISO-8859-3
+4.dat ISO-8859-4
+5.dat ISO-8859-5
+6.dat ISO-8859-6
+7.dat ISO-8859-7
+8.dat ISO-8859-8
+9.dat ISO-8859-9
+10.dat ISO-8859-10
+11.dat ISO-8859-11
+13.dat ISO-8859-13
+14.dat ISO-8859-14
+15.dat ISO-8859-15
+16.dat ISO-8859-16