From 2777a04ed2ba4fd36138b991d66a32a283361f7e Mon Sep 17 00:00:00 2001
From: John Mark Bell <jmb@netsurf-browser.org>
Date: Thu, 1 May 2008 16:34:46 +0000
Subject: Import parser construction utility library

svn path=/trunk/libparserutils/; revision=4111
---
 test/INDEX                       |  15 ++
 test/Makefile                    |  80 +++++++++
 test/README                      |  84 +++++++++
 test/aliases.c                   |  62 +++++++
 test/charset.c                   |  31 ++++
 test/cscodec.c                   | 232 +++++++++++++++++++++++++
 test/data/Aliases                | 302 +++++++++++++++++++++++++++++++++
 test/data/cscodec/INDEX          |   6 +
 test/data/cscodec/UTF-8-test.txt | Bin 0 -> 41013 bytes
 test/data/cscodec/simple.dat     | Bin 0 -> 1109 bytes
 test/data/input/INDEX            |   5 +
 test/data/input/UTF-8-test.txt   | Bin 0 -> 20334 bytes
 test/filter.c                    | 357 +++++++++++++++++++++++++++++++++++++++
 test/inputstream.c               |  97 +++++++++++
 test/parserutils.c               |  30 ++++
 test/regression/cscodec-segv.c   |  38 +++++
 test/regression/filter-segv.c    |  39 +++++
 test/regression/stream-nomem.c   |  94 +++++++++++
 test/testrunner.pl               | 167 ++++++++++++++++++
 test/testutils.h                 | 123 ++++++++++++++
 20 files changed, 1762 insertions(+)
 create mode 100644 test/INDEX
 create mode 100644 test/Makefile
 create mode 100644 test/README
 create mode 100644 test/aliases.c
 create mode 100644 test/charset.c
 create mode 100644 test/cscodec.c
 create mode 100644 test/data/Aliases
 create mode 100644 test/data/cscodec/INDEX
 create mode 100644 test/data/cscodec/UTF-8-test.txt
 create mode 100644 test/data/cscodec/simple.dat
 create mode 100644 test/data/input/INDEX
 create mode 100644 test/data/input/UTF-8-test.txt
 create mode 100644 test/filter.c
 create mode 100644 test/inputstream.c
 create mode 100644 test/parserutils.c
 create mode 100644 test/regression/cscodec-segv.c
 create mode 100644 test/regression/filter-segv.c
 create mode 100644 test/regression/stream-nomem.c
 create mode 100644 test/testrunner.pl
 create mode 100644 test/testutils.h

(limited to 'test')

diff --git a/test/INDEX b/test/INDEX
new file mode 100644
index 0000000..772c82f
--- /dev/null
+++ b/test/INDEX
@@ -0,0 +1,15 @@
+# Index for testcases
+#
+# Test		Description				DataDir
+
+charset		Charset initialisation/finalisation
+parserutils	Library initialisation/finalisation
+aliases		Encoding alias handling
+cscodec		Charset codec implementation		cscodec
+filter		Input stream filtering
+inputstream	Inputstream handling			input
+
+# Regression tests
+regression/cscodec-segv	Segfault in charset codecs
+regression/filter-segv	Segfault in input filtering
+regression/stream-nomem	Inputstream buffer expansion
diff --git a/test/Makefile b/test/Makefile
new file mode 100644
index 0000000..2ed0b44
--- /dev/null
+++ b/test/Makefile
@@ -0,0 +1,80 @@
+# Child makefile fragment
+#
+# Toolchain is provided by top-level makefile
+#
+# Variables provided by top-level makefile
+#
+# COMPONENT		The name of the component
+# EXPORT		The location of the export directory
+# TOP			The location of the source tree root
+# RELEASEDIR		The place to put release objects
+# DEBUGDIR		The place to put debug objects
+#
+# do_include		Canned command sequence to include a child makefile
+#
+# Variables provided by parent makefile:
+#
+# DIR			The name of the directory we're in, relative to $(TOP)
+#
+# Variables we can manipulate:
+#
+# ITEMS_CLEAN		The list of items to remove for "make clean"
+# ITEMS_DISTCLEAN	The list of items to remove for "make distclean"
+# TARGET_TESTS		The list of target names to run for "make test"
+#
+# SOURCES		The list of sources to build for $(COMPONENT)
+#
+# Plus anything from the toolchain
+
+# Push parent directory onto the directory stack
+sp             := $(sp).x
+dirstack_$(sp) := $(d)
+d              := $(DIR)
+
+# Extend toolchain settings
+override CFLAGS := $(CFLAGS) -I$(TOP)/src/ -I$(d)
+
+# Tests
+TESTS_$(d) := aliases cscodec charset filter inputstream parserutils
+TESTS_$(d) := $(TESTS_$(d)) regression/cscodec-segv regression/filter-segv \
+	regression/stream-nomem
+
+# Items for top-level makefile to use
+ITEMS_CLEAN := $(ITEMS_CLEAN) \
+	$(addprefix $(d), $(addsuffix $(EXEEXT), $(TESTS_$(d)))) \
+	$(addprefix $(d), $(addsuffix .gcda, $(TESTS_$(d)))) \
+	$(addprefix $(d), $(addsuffix .gcno, $(TESTS_$(d))))
+ITEMS_DISTCLEAN := $(ITEMS_DISTCLEAN) $(d)log
+
+# Targets for top-level makefile to run
+TARGET_TESTS := $(TARGET_TESTS) test_$(d)
+
+# Now we get to hack around so that we know what directory we're in.
+# $(d) no longer exists when running the commands for a target, so we can't
+# simply use it verbatim. Assigning to a variable doesn't really help, as
+# there's no guarantee that someone else hasn't overridden that variable.
+# So, what we do is make the target depend on $(d), then pick it out of the
+# dependency list when running commands. This isn't pretty, but is effective.
+test_$(d): $(d) $(addprefix $(d), $(TESTS_$(d)))
+	@$(PERL) $(TOP)/$<testrunner.pl $(TOP)/$< $(EXEEXT)
+
+# Build rules for each test binary -- they all depend on the debug library
+define compile_test
+$(2): $$(TOP)/$$(COMPONENT)-debug.a $(1)
+	@$$(ECHO) $$(ECHOFLAGS) "==> $(1)"
+	@$$(CC) -c -g $$(DEBUGCFLAGS) -o $$@.o $(1)
+	@$$(LD) -g -o $$@ $$@.o $$(LDFLAGS) -lparserutils-debug
+	@$$(RM) $$(RMFLAGS) $$@.o
+
+endef
+
+$(eval $(foreach TEST,$(addprefix $(d), $(TESTS_$(d))), \
+	$(call compile_test,$(addsuffix .c, $(TEST)),$(TEST))))
+
+# Now include any children we may have
+MAKE_INCLUDES := $(wildcard $(d)*/Makefile)
+$(eval $(foreach INC, $(MAKE_INCLUDES), $(call do_include,$(INC))))
+
+# Finally, pop off the directory stack
+d  := $(dirstack_$(sp))
+sp := $(basename $(sp))
diff --git a/test/README b/test/README
new file mode 100644
index 0000000..7e41abf
--- /dev/null
+++ b/test/README
@@ -0,0 +1,84 @@
+Libcharset testcases
+====================
+
+Testcases for Libcharset are self-contained binaries which test various parts
+of the charset library. These may make use of external data files to drive
+the testing.
+
+Testcase command lines
+----------------------
+
+Testcase command lines are in a unified format, thus:
+
+ 	<aliases_file> [ <data_file> ]
+
+The aliases file parameter will always be specified (as it is required for
+the library to work at all).
+
+The data file parameter is optional and may be provided on a test-by-test
+basis.
+
+Testcase output
+---------------
+
+Testcases may output anything at all to stdout. The final line of the 
+output must begin with either PASS or FAIL (case sensitive), indicating 
+the success status of the test.
+
+Test Index
+----------
+
+In the test sources directory, is a file, named INDEX, which provides an 
+index of all available test binaries. Any new test applications should be
+added to this index as they are created.
+
+The test index file format is as follows:
+
+	file         = *line
+
+	line         = ( entry / comment / blank ) LF
+
+	entry        = testname 1*HTAB description [ 1*HTAB datadir ]
+	comment      = "#" *non-newline
+	blank        = 0<OCTET>
+
+	testname     = 1*non-reserved
+	description  = 1*non-reserved
+	datadir      = 1*non-reserved
+
+	non-newline  = VCHAR / WSP
+	non-reserved = VCHAR / SP
+
+Each entry contains a mandatory binary name and description followed by 
+an optional data directory specifier. The data directory specifier is 
+used to state the name of the directory containing data files for the 
+test name. This directory will be searched for within the "data" 
+directory in the source tree. 
+
+If a data directory is specified, the test binary will be invoked for
+each data file listed within the data directory INDEX, passing the 
+filename as the second parameter (<data_file>, above).
+
+Data Index
+----------
+
+Each test data directory contains a file, named INDEX, which provides an 
+index of all available test data files.
+
+The data index file format is as follows:
+
+	file         = *line
+
+	line         = ( entry / comment / blank ) LF
+
+	entry        = dataname 1*HTAB description
+	comment      = "#" *non-newline
+	blank        = 0<OCTET>
+
+	dataname     = 1*non-reserved
+	description  = 1*non-reserved
+
+	non-newline  = VCHAR / WSP
+	non-reserved = VCHAR / SP
+
+Each entry contains a mandatory data file name and description.
diff --git a/test/aliases.c b/test/aliases.c
new file mode 100644
index 0000000..dff31c6
--- /dev/null
+++ b/test/aliases.c
@@ -0,0 +1,62 @@
+#include <stdio.h>
+#include <string.h>
+
+#include "charset/aliases.h"
+
+#include "testutils.h"
+
+extern void charset_aliases_dump(void);
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+	UNUSED(pw);
+
+	return realloc(ptr, len);
+}
+
+int main (int argc, char **argv)
+{
+	parserutils_charset_aliases_canon *c;
+
+	if (argc != 2) {
+		printf("Usage: %s <filename>\n", argv[0]);
+		return 1;
+	}
+
+	parserutils_charset_aliases_create(argv[1], myrealloc, NULL);
+
+	parserutils_charset_aliases_dump();
+
+	c = parserutils_charset_alias_canonicalise("moose", 5);
+	if (c) {
+		printf("FAIL - found invalid encoding 'moose'\n");
+		return 1;
+	}
+
+	c = parserutils_charset_alias_canonicalise("csinvariant", 11);
+	if (c) {
+		printf("%s %d\n", c->name, c->mib_enum);
+	} else {
+		printf("FAIL - failed finding encoding 'csinvariant'\n");
+		return 1;
+	}
+
+	c = parserutils_charset_alias_canonicalise("nats-sefi-add", 13);
+	if (c) {
+		printf("%s %d\n", c->name, c->mib_enum);
+	} else {
+		printf("FAIL - failed finding encoding 'nats-sefi-add'\n");
+		return 1;
+	}
+
+	printf("%d\n", parserutils_charset_mibenum_from_name(c->name, 
+			strlen(c->name)));
+
+	printf("%s\n", parserutils_charset_mibenum_to_name(c->mib_enum));
+
+	parserutils_charset_aliases_destroy(myrealloc, NULL);
+
+	printf("PASS\n");
+
+	return 0;
+}
diff --git a/test/charset.c b/test/charset.c
new file mode 100644
index 0000000..a793e7e
--- /dev/null
+++ b/test/charset.c
@@ -0,0 +1,31 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "charset/charset.h"
+
+#include "testutils.h"
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+	UNUSED(pw);
+
+	return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+	if (argc != 2) {
+		printf("Usage: %s <filename>\n", argv[0]);
+		return 1;
+	}
+
+	assert(parserutils_charset_initialise(argv[1], myrealloc, NULL) == 
+			PARSERUTILS_OK);
+
+	assert (parserutils_charset_finalise(myrealloc, NULL) == 
+			PARSERUTILS_OK);
+
+	printf("PASS\n");
+
+	return 0;
+}
diff --git a/test/cscodec.c b/test/cscodec.c
new file mode 100644
index 0000000..d3b1b76
--- /dev/null
+++ b/test/cscodec.c
@@ -0,0 +1,232 @@
+#include <stdio.h>
+#include <string.h>
+
+#include "charset/charset.h"
+#include <parserutils/charset/codec.h>
+
+#include "utils/utils.h"
+
+#include "testutils.h"
+
+typedef struct line_ctx {
+	parserutils_charset_codec *codec;
+
+	size_t buflen;
+	size_t bufused;
+	uint8_t *buf;
+	size_t explen;
+	size_t expused;
+	uint8_t *exp;
+
+	bool indata;
+	bool inexp;
+
+	parserutils_error exp_ret;
+
+	enum { ENCODE, DECODE, BOTH } dir;
+} line_ctx;
+
+static bool handle_line(const char *data, size_t datalen, void *pw);
+static void run_test(line_ctx *ctx);
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+	UNUSED(pw);
+
+	return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+	line_ctx ctx;
+
+	if (argc != 3) {
+		printf("Usage: %s <aliases_file> <filename>\n", argv[0]);
+		return 1;
+	}
+
+	assert(parserutils_charset_initialise(argv[1], myrealloc, NULL) == 
+			PARSERUTILS_OK);
+
+	assert(parserutils_charset_codec_create("NATS-SEFI-ADD",
+			myrealloc, NULL) == NULL);
+
+	ctx.codec = parserutils_charset_codec_create("UTF-8", myrealloc, NULL);
+	assert(ctx.codec != NULL);
+
+	ctx.buflen = parse_filesize(argv[2]);
+	if (ctx.buflen == 0)
+		return 1;
+
+	ctx.buf = malloc(2 * ctx.buflen);
+	if (ctx.buf == NULL) {
+		printf("Failed allocating %u bytes\n",
+				(unsigned int) ctx.buflen);
+		return 1;
+	}
+
+	ctx.exp = ctx.buf + ctx.buflen;
+	ctx.explen = ctx.buflen;
+
+	ctx.buf[0] = '\0';
+	ctx.exp[0] = '\0';
+	ctx.bufused = 0;
+	ctx.expused = 0;
+	ctx.indata = false;
+	ctx.inexp = false;
+	ctx.exp_ret = PARSERUTILS_OK;
+
+	assert(parse_testfile(argv[2], handle_line, &ctx) == true);
+
+	/* and run final test */
+	if (ctx.bufused > 0 && ctx.buf[ctx.bufused - 1] == '\n')
+		ctx.bufused -= 1;
+
+	if (ctx.expused > 0 && ctx.exp[ctx.expused - 1] == '\n')
+		ctx.expused -= 1;
+
+	run_test(&ctx);
+
+	free(ctx.buf);
+
+	parserutils_charset_codec_destroy(ctx.codec);
+
+	assert(parserutils_charset_finalise(myrealloc, NULL) == 
+			PARSERUTILS_OK);
+
+	printf("PASS\n");
+
+	return 0;
+}
+
+bool handle_line(const char *data, size_t datalen, void *pw)
+{
+	line_ctx *ctx = (line_ctx *) pw;
+
+	if (data[0] == '#') {
+		if (ctx->inexp) {
+			/* This marks end of testcase, so run it */
+
+			if (ctx->buf[ctx->bufused - 1] == '\n')
+				ctx->bufused -= 1;
+
+			if (ctx->exp[ctx->expused - 1] == '\n')
+				ctx->expused -= 1;
+
+			run_test(ctx);
+
+			ctx->buf[0] = '\0';
+			ctx->exp[0] = '\0';
+			ctx->bufused = 0;
+			ctx->expused = 0;
+			ctx->exp_ret = PARSERUTILS_OK;
+		}
+
+		if (strncasecmp(data+1, "data", 4) == 0) {
+			parserutils_charset_codec_optparams params;
+			const char *ptr = data + 6;
+
+			ctx->indata = true;
+			ctx->inexp = false;
+
+			if (strncasecmp(ptr, "decode", 6) == 0)
+				ctx->dir = DECODE;
+			else if (strncasecmp(ptr, "encode", 6) == 0)
+				ctx->dir = ENCODE;
+			else
+				ctx->dir = BOTH;
+
+			ptr += 7;
+
+			if (strncasecmp(ptr, "LOOSE", 5) == 0) {
+				params.error_mode.mode =
+					PARSERUTILS_CHARSET_CODEC_ERROR_LOOSE;
+				ptr += 6;
+			} else if (strncasecmp(ptr, "STRICT", 6) == 0) {
+				params.error_mode.mode =
+					PARSERUTILS_CHARSET_CODEC_ERROR_STRICT;
+				ptr += 7;
+			} else {
+				params.error_mode.mode =
+					PARSERUTILS_CHARSET_CODEC_ERROR_TRANSLIT;
+				ptr += 9;
+			}
+
+			assert(parserutils_charset_codec_setopt(ctx->codec,
+				PARSERUTILS_CHARSET_CODEC_ERROR_MODE,
+				(parserutils_charset_codec_optparams *) &params)
+				== PARSERUTILS_OK);
+		} else if (strncasecmp(data+1, "expected", 8) == 0) {
+			ctx->indata = false;
+			ctx->inexp = true;
+
+			ctx->exp_ret = parserutils_error_from_string(data + 10,
+					datalen - 10 - 1 /* \n */);
+		} else if (strncasecmp(data+1, "reset", 5) == 0) {
+			ctx->indata = false;
+			ctx->inexp = false;
+
+			parserutils_charset_codec_reset(ctx->codec);
+		}
+	} else {
+		if (ctx->indata) {
+			memcpy(ctx->buf + ctx->bufused, data, datalen);
+			ctx->bufused += datalen;
+		}
+		if (ctx->inexp) {
+			memcpy(ctx->exp + ctx->expused, data, datalen);
+			ctx->expused += datalen;
+		}
+	}
+
+	return true;
+}
+
+void run_test(line_ctx *ctx)
+{
+	static int testnum;
+	size_t destlen = ctx->bufused * 4;
+	uint8_t dest[destlen];
+	uint8_t *pdest = dest;
+	const uint8_t *psrc = ctx->buf;
+	size_t srclen = ctx->bufused;
+	size_t i;
+
+	if (ctx->dir == DECODE) {
+		assert(parserutils_charset_codec_decode(ctx->codec,
+				&psrc, &srclen,
+				&pdest, &destlen) == ctx->exp_ret);
+	} else if (ctx->dir == ENCODE) {
+		assert(parserutils_charset_codec_encode(ctx->codec,
+				&psrc, &srclen,
+				&pdest, &destlen) == ctx->exp_ret);
+	} else {
+		size_t templen = ctx->bufused * 4;
+		uint8_t temp[templen];
+		uint8_t *ptemp = temp;
+
+		assert(parserutils_charset_codec_decode(ctx->codec,
+				&psrc, &srclen,
+				&ptemp, &templen) == ctx->exp_ret);
+		ptemp = temp;
+		templen = ctx->bufused * 4 - templen;
+		assert(parserutils_charset_codec_encode(ctx->codec,
+				(const uint8_t **) &ptemp, &templen,
+				&pdest, &destlen) == ctx->exp_ret);
+	}
+
+	printf("%d: Read '", ++testnum);
+	for (i = 0; i < ctx->expused; i++) {
+		printf("%c%c ", "0123456789abcdef"[(dest[i] >> 4) & 0xf],
+				"0123456789abcdef"[dest[i] & 0xf]);
+	}
+	printf("' Expected '");
+	for (i = 0; i < ctx->expused; i++) {
+		printf("%c%c ", "0123456789abcdef"[(ctx->exp[i] >> 4) & 0xf],
+				"0123456789abcdef"[ctx->exp[i] & 0xf]);
+	}
+	printf("'\n");
+
+	assert(memcmp(dest, ctx->exp, ctx->expused) == 0);
+}
+
diff --git a/test/data/Aliases b/test/data/Aliases
new file mode 100644
index 0000000..db61ff1
--- /dev/null
+++ b/test/data/Aliases
@@ -0,0 +1,302 @@
+# > Unicode:Files.Aliases
+# Mapping of character set encoding names to their canonical form
+#
+# Lines starting with a '#' are comments, blank lines are ignored.
+#
+# Based on http://www.iana.org/assignments/character-sets and
+# http://www.iana.org/assignments/ianacharset-mib
+#
+# Canonical Form	MIBenum		Aliases...
+#
+US-ASCII		3		iso-ir-6 ANSI_X3.4-1986 ISO_646.irv:1991 ASCII ISO646-US ANSI_X3.4-1968 us IBM367 cp367 csASCII
+ISO-10646-UTF-1		27		csISO10646UTF1
+ISO_646.basic:1983	28		ref csISO646basic1983
+INVARIANT		29		csINVARIANT
+ISO_646.irv:1983	30		iso-ir-2 irv csISO2IntlRefVersion
+BS_4730			20		iso-ir-4 ISO646-GB gb uk csISO4UnitedKingdom
+NATS-SEFI		31		iso-ir-8-1 csNATSSEFI
+NATS-SEFI-ADD		32		iso-ir-8-2 csNATSSEFIADD
+NATS-DANO		33		iso-ir-9-1 csNATSDANO
+NATS-DANO-ADD		34		iso-ir-9-2 csNATSDANOADD
+SEN_850200_B		35		iso-ir-10 FI ISO646-FI ISO646-SE se csISO10Swedish
+SEN_850200_C		21		iso-ir-11 ISO646-SE2 se2 csISO11SwedishForNames
+KS_C_5601-1987		36		iso-ir-149 KS_C_5601-1989 KSC_5601 korean csKSC56011987
+ISO-2022-KR		37		csISO2022KR
+EUC-KR			38		csEUCKR EUCKR
+ISO-2022-JP		39		csISO2022JP
+ISO-2022-JP-2		40		csISO2022JP2
+ISO-2022-CN		104
+ISO-2022-CN-EXT		105
+JIS_C6220-1969-jp	41		JIS_C6220-1969 iso-ir-13 katakana x0201-7 csISO13JISC6220jp
+JIS_C6220-1969-ro	42		iso-ir-14 jp ISO646-JP csISO14JISC6220ro
+IT			22		iso-ir-15 ISO646-IT csISO15Italian
+PT			43		iso-ir-16 ISO646-PT csISO16Portuguese
+ES			23		iso-ir-17 ISO646-ES csISO17Spanish
+greek7-old		44		iso-ir-18 csISO18Greek7Old
+latin-greek		45		iso-ir-19 csISO19LatinGreek
+DIN_66003		24		iso-ir-21 de ISO646-DE csISO21German
+NF_Z_62-010_(1973)	46		iso-ir-25 ISO646-FR1 csISO25French
+Latin-greek-1		47		iso-ir-27 csISO27LatinGreek1
+ISO_5427		48		iso-ir-37 csISO5427Cyrillic
+JIS_C6226-1978		49		iso-ir-42 csISO42JISC62261978
+BS_viewdata		50		iso-ir-47 csISO47BSViewdata
+INIS			51		iso-ir-49 csISO49INIS
+INIS-8			52		iso-ir-50 csISO50INIS8
+INIS-cyrillic		53		iso-ir-51 csISO51INISCyrillic
+ISO_5427:1981		54		iso-ir-54 ISO5427Cyrillic1981
+ISO_5428:1980		55		iso-ir-55 csISO5428Greek
+GB_1988-80		56		iso-ir-57 cn ISO646-CN csISO57GB1988
+GB_2312-80		57		iso-ir-58 chinese csISO58GB231280
+NS_4551-1		25		iso-ir-60 ISO646-NO no csISO60DanishNorwegian csISO60Norwegian1
+NS_4551-2		58		ISO646-NO2 iso-ir-61 no2 csISO61Norwegian2
+NF_Z_62-010		26		iso-ir-69 ISO646-FR fr csISO69French
+videotex-suppl		59		iso-ir-70 csISO70VideotexSupp1
+PT2			60		iso-ir-84 ISO646-PT2 csISO84Portuguese2
+ES2			61		iso-ir-85 ISO646-ES2 csISO85Spanish2
+MSZ_7795.3		62		iso-ir-86 ISO646-HU hu csISO86Hungarian
+JIS_C6226-1983		63		iso-ir-87 x0208 JIS_X0208-1983 csISO87JISX0208
+greek7			64		iso-ir-88 csISO88Greek7
+ASMO_449		65		ISO_9036 arabic7 iso-ir-89 csISO89ASMO449
+iso-ir-90		66		csISO90
+JIS_C6229-1984-a	67		iso-ir-91 jp-ocr-a csISO91JISC62291984a
+JIS_C6229-1984-b	68		iso-ir-92 ISO646-JP-OCR-B jp-ocr-b csISO92JISC62991984b
+JIS_C6229-1984-b-add	69		iso-ir-93 jp-ocr-b-add csISO93JIS62291984badd
+JIS_C6229-1984-hand	70		iso-ir-94 jp-ocr-hand csISO94JIS62291984hand
+JIS_C6229-1984-hand-add	71		iso-ir-95 jp-ocr-hand-add csISO95JIS62291984handadd
+JIS_C6229-1984-kana	72		iso-ir-96 csISO96JISC62291984kana
+ISO_2033-1983		73		iso-ir-98 e13b csISO2033
+ANSI_X3.110-1983	74		iso-ir-99 CSA_T500-1983 NAPLPS csISO99NAPLPS
+ISO-8859-1		4		iso-ir-100 ISO_8859-1 ISO_8859-1:1987 latin1 l1 IBM819 CP819 csISOLatin1 8859_1 ISO8859-1
+ISO-8859-2		5		iso-ir-101 ISO_8859-2 ISO_8859-2:1987 latin2 l2 csISOLatin2 8859_2 ISO8859-2
+T.61-7bit		75		iso-ir-102 csISO102T617bit
+T.61-8bit		76		T.61 iso-ir-103 csISO103T618bit
+ISO-8859-3		6		iso-ir-109 ISO_8859-3 ISO_8859-3:1988 latin3 l3 csISOLatin3 8859_3 ISO8859-3
+ISO-8859-4		7		iso-ir-110 ISO_8859-4 ISO_8859-4:1988 latin4 l4 csISOLatin4 8859_4 ISO8859-4
+ECMA-cyrillic		77		iso-ir-111 KOI8-E csISO111ECMACyrillic
+CSA_Z243.4-1985-1	78		iso-ir-121 ISO646-CA csa7-1 ca csISO121Canadian1
+CSA_Z243.4-1985-2	79		iso-ir-122 ISO646-CA2 csa7-2 csISO122Canadian2
+CSA_Z243.4-1985-gr	80		iso-ir-123 csISO123CSAZ24341985gr
+ISO-8859-6		9		iso-ir-127 ISO_8859-6 ISO_8859-6:1987 ECMA-114 ASMO-708 arabic csISOLatinArabic
+ISO-8859-6-E		81		csISO88596E ISO_8859-6-E
+ISO-8859-6-I		82		csISO88596I ISO_8859-6-I
+ISO-8859-7		10		iso-ir-126 ISO_8859-7 ISO_8859-7:1987 ELOT_928 ECMA-118 greek greek8 csISOLatinGreek 8859_7 ISO8859-7
+T.101-G2		83		iso-ir-128 csISO128T101G2
+ISO-8859-8		11		iso-ir-138 ISO_8859-8 ISO_8859-8:1988 hebrew csISOLatinHebrew 8859_8 ISO8859-8
+ISO-8859-8-E		84		csISO88598E ISO_8859-8-E
+ISO-8859-8-I		85		csISO88598I ISO_8859-8-I
+CSN_369103		86		iso-ir-139 csISO139CSN369103
+JUS_I.B1.002		87		iso-ir-141 ISO646-YU js yu csISO141JUSIB1002
+ISO_6937-2-add		14		iso-ir-142 csISOTextComm
+IEC_P27-1		88		iso-ir-143 csISO143IECP271
+ISO-8859-5		8		iso-ir-144 ISO_8859-5 ISO_8859-5:1988 cyrillic csISOLatinCyrillic 8859_5 ISO8859-5
+JUS_I.B1.003-serb	89		iso-ir-146 serbian csISO146Serbian
+JUS_I.B1.003-mac	90		macedonian iso-ir-147 csISO147Macedonian
+ISO-8859-9		12		iso-ir-148 ISO_8859-9 ISO_8859-9:1989 latin5 l5 csISOLatin5 8859_9 ISO8859-9
+greek-ccitt		91		iso-ir-150 csISO150 csISO150GreekCCITT
+NC_NC00-10:81		92		cuba iso-ir-151 ISO646-CU csISO151Cuba
+ISO_6937-2-25		93		iso-ir-152 csISO6937Add
+GOST_19768-74		94		ST_SEV_358-88 iso-ir-153 csISO153GOST1976874
+ISO_8859-supp		95		iso-ir-154 latin1-2-5 csISO8859Supp
+ISO_10367-box		96		iso-ir-155 csISO10367Box
+ISO-8859-10		13		iso-ir-157 l6 ISO_8859-10:1992 csISOLatin6 latin6 8859_10 ISO8859-10
+latin-lap		97		lap iso-ir-158 csISO158Lap
+JIS_X0212-1990		98		x0212 iso-ir-159 csISO159JISX02121990
+DS_2089			99		DS2089 ISO646-DK dk csISO646Danish
+us-dk			100		csUSDK
+dk-us			101		csDKUS
+JIS_X0201		15		X0201 csHalfWidthKatakana
+KSC5636			102		ISO646-KR csKSC5636
+ISO-10646-UCS-2		1000		csUnicode UCS-2 UCS2
+ISO-10646-UCS-4		1001		csUCS4 UCS-4 UCS4
+DEC-MCS			2008		dec csDECMCS
+hp-roman8		2004		roman8 r8 csHPRoman8
+macintosh		2027		mac csMacintosh MACROMAN MAC-ROMAN X-MAC-ROMAN
+IBM037			2028		cp037 ebcdic-cp-us ebcdic-cp-ca ebcdic-cp-wt ebcdic-cp-nl csIBM037
+IBM038			2029		EBCDIC-INT cp038 csIBM038
+IBM273			2030		CP273 csIBM273
+IBM274			2031		EBCDIC-BE CP274 csIBM274
+IBM275			2032		EBCDIC-BR cp275 csIBM275
+IBM277			2033		EBCDIC-CP-DK EBCDIC-CP-NO csIBM277
+IBM278			2034		CP278 ebcdic-cp-fi ebcdic-cp-se csIBM278
+IBM280			2035		CP280 ebcdic-cp-it csIBM280
+IBM281			2036		EBCDIC-JP-E cp281 csIBM281
+IBM284			2037		CP284 ebcdic-cp-es csIBM284
+IBM285			2038		CP285 ebcdic-cp-gb csIBM285
+IBM290			2039		cp290 EBCDIC-JP-kana csIBM290
+IBM297			2040		cp297 ebcdic-cp-fr csIBM297
+IBM420			2041		cp420 ebcdic-cp-ar1 csIBM420
+IBM423			2042		cp423 ebcdic-cp-gr csIBM423
+IBM424			2043		cp424 ebcdic-cp-he csIBM424
+IBM437			2011		cp437 437 csPC8CodePage437
+IBM500			2044		CP500 ebcdic-cp-be ebcdic-cp-ch csIBM500
+IBM775			2087		cp775 csPC775Baltic
+IBM850			2009		cp850 850 csPC850Multilingual
+IBM851			2045		cp851 851 csIBM851
+IBM852			2010		cp852 852 csPCp852
+IBM855			2046		cp855 855 csIBM855
+IBM857			2047		cp857 857 csIBM857
+IBM860			2048		cp860 860 csIBM860
+IBM861			2049		cp861 861 cp-is csIBM861
+IBM862			2013		cp862 862 csPC862LatinHebrew
+IBM863			2050		cp863 863 csIBM863
+IBM864			2051		cp864 csIBM864
+IBM865			2052		cp865 865 csIBM865
+IBM866			2086		cp866 866 csIBM866
+IBM868			2053		CP868 cp-ar csIBM868
+IBM869			2054		cp869 869 cp-gr csIBM869
+IBM870			2055		CP870 ebcdic-cp-roece ebcdic-cp-yu csIBM870
+IBM871			2056		CP871 ebcdic-cp-is csIBM871
+IBM880			2057		cp880 EBCDIC-Cyrillic csIBM880
+IBM891			2058		cp891 csIBM891
+IBM903			2059		cp903 csIBM903
+IBM904			2060		cp904 904 csIBBM904
+IBM905			2061		CP905 ebcdic-cp-tr csIBM905
+IBM918			2062		CP918 ebcdic-cp-ar2 csIBM918
+IBM1026			2063		CP1026 csIBM1026
+EBCDIC-AT-DE		2064		csIBMEBCDICATDE
+EBCDIC-AT-DE-A		2065		csEBCDICATDEA
+EBCDIC-CA-FR		2066		csEBCDICCAFR
+EBCDIC-DK-NO		2067		csEBCDICDKNO
+EBCDIC-DK-NO-A		2068		csEBCDICDKNOA
+EBCDIC-FI-SE		2069		csEBCDICFISE
+EBCDIC-FI-SE-A		2070		csEBCDICFISEA
+EBCDIC-FR		2071		csEBCDICFR
+EBCDIC-IT		2072		csEBCDICIT
+EBCDIC-PT		2073		csEBCDICPT
+EBCDIC-ES		2074		csEBCDICES
+EBCDIC-ES-A		2075		csEBCDICESA
+EBCDIC-ES-S		2076		csEBCDICESS
+EBCDIC-UK		2077		csEBCDICUK
+EBCDIC-US		2078		csEBCDICUS
+UNKNOWN-8BIT		2079		csUnknown8BiT
+MNEMONIC		2080		csMnemonic
+MNEM			2081		csMnem
+VISCII			2082		csVISCII
+VIQR			2083		csVIQR
+KOI8-R			2084		csKOI8R
+KOI8-U			2088
+IBM00858		2089		CCSID00858 CP00858 PC-Multilingual-850+euro
+IBM00924		2090		CCSID00924 CP00924 ebcdic-Latin9--euro
+IBM01140		2091		CCSID01140 CP01140 ebcdic-us-37+euro
+IBM01141		2092		CCSID01141 CP01141 ebcdic-de-273+euro
+IBM01142		2093		CCSID01142 CP01142 ebcdic-dk-277+euro ebcdic-no-277+euro
+IBM01143		2094		CCSID01143 CP01143 ebcdic-fi-278+euro ebcdic-se-278+euro
+IBM01144		2095		CCSID01144 CP01144 ebcdic-it-280+euro
+IBM01145		2096		CCSID01145 CP01145 ebcdic-es-284+euro
+IBM01146		2097		CCSID01146 CP01146 ebcdic-gb-285+euro
+IBM01147		2098		CCSID01147 CP01147 ebcdic-fr-297+euro
+IBM01148		2099		CCSID01148 CP01148 ebcdic-international-500+euro
+IBM01149		2100		CCSID01149 CP01149 ebcdic-is-871+euro
+Big5-HKSCS		2101
+IBM1047			2102		IBM-1047
+PTCP154			2103		csPTCP154 PT154 CP154 Cyrillic-Asian
+Amiga-1251		2104		Ami1251 Amiga1251 Ami-1251
+KOI7-switched		2105
+UNICODE-1-1		1010		csUnicode11
+SCSU			1011
+UTF-7			1012
+UTF-16BE		1013
+UTF-16LE		1014
+UTF-16			1015
+CESU-8			1016		csCESU-8
+UTF-32			1017
+UTF-32BE		1018
+UTF-32LE		1019
+BOCU-1			1020		csBOCU-1
+UNICODE-1-1-UTF-7	103		csUnicode11UTF7
+UTF-8			106		UNICODE-1-1-UTF-8 UNICODE-2-0-UTF-8 utf8
+ISO-8859-13		109		8859_13 ISO8859-13
+ISO-8859-14		110		iso-ir-199 ISO_8859-14:1998 ISO_8859-14 latin8 iso-celtic l8 8859_14 ISO8859-14
+ISO-8859-15		111		ISO_8859-15 Latin-9 8859_15 ISO8859-15
+ISO-8859-16		112		iso-ir-226 ISO_8859-16:2001 ISO_8859-16 latin10 l10
+GBK			113		CP936 MS936 windows-936
+GB18030			114
+OSD_EBCDIC_DF04_15	115
+OSD_EBCDIC_DF03_IRV	116
+OSD_EBCDIC_DF04_1	117
+JIS_Encoding		16		csJISEncoding
+Shift_JIS		17		MS_Kanji csShiftJIS X-SJIS Shift-JIS
+EUC-JP			18		csEUCPkdFmtJapanese Extended_UNIX_Code_Packed_Format_for_Japanese EUCJP
+Extended_UNIX_Code_Fixed_Width_for_Japanese	19		csEUCFixWidJapanese
+ISO-10646-UCS-Basic	1002		csUnicodeASCII
+ISO-10646-Unicode-Latin1	1003		csUnicodeLatin1 ISO-10646
+ISO-Unicode-IBM-1261	1005		csUnicodeIBM1261
+ISO-Unicode-IBM-1268	1006		csUnicodeIBM1268
+ISO-Unicode-IBM-1276	1007		csUnicodeIBM1276
+ISO-Unicode-IBM-1264	1008		csUnicodeIBM1264
+ISO-Unicode-IBM-1265	1009		csUnicodeIBM1265
+ISO-8859-1-Windows-3.0-Latin-1	2000		csWindows30Latin1
+ISO-8859-1-Windows-3.1-Latin-1	2001		csWindows31Latin1
+ISO-8859-2-Windows-Latin-2	2002		csWindows31Latin2
+ISO-8859-9-Windows-Latin-5	2003		csWindows31Latin5
+Adobe-Standard-Encoding	2005		csAdobeStandardEncoding
+Ventura-US		2006		csVenturaUS
+Ventura-International	2007		csVenturaInternational
+PC8-Danish-Norwegian	2012		csPC8DanishNorwegian
+PC8-Turkish		2014		csPC8Turkish
+IBM-Symbols		2015		csIBMSymbols
+IBM-Thai		2016		csIBMThai
+HP-Legal		2017		csHPLegal
+HP-Pi-font		2018		csHPPiFont
+HP-Math8		2019		csHPMath8
+Adobe-Symbol-Encoding	2020		csHPPSMath
+HP-DeskTop		2021		csHPDesktop
+Ventura-Math		2022		csVenturaMath
+Microsoft-Publishing	2023		csMicrosoftPublishing
+Windows-31J		2024		csWindows31J
+GB2312			2025		csGB2312 EUC-CN EUCCN CN-GB
+Big5			2026		csBig5 BIG-FIVE BIG-5 CN-BIG5 BIG_FIVE
+windows-1250		2250		CP1250 MS-EE
+windows-1251		2251		CP1251 MS-CYRL
+windows-1252		2252		CP1252 MS-ANSI
+windows-1253		2253		CP1253 MS-GREEK
+windows-1254		2254		CP1254 MS-TURK
+windows-1255		2255
+windows-1256		2256		CP1256 MS-ARAB
+windows-1257		2257		CP1257 WINBALTRIM
+windows-1258		2258
+TIS-620			2259
+HZ-GB-2312		2085
+
+# Additional encodings not defined by IANA
+
+# Arbitrary allocations
+#CP737			3001
+#CP853			3002
+#CP856			3003
+CP874			3004		WINDOWS-874
+#CP922			3005
+#CP1046			3006
+#CP1124			3007
+#CP1125			3008		WINDOWS-1125
+#CP1129			3009
+#CP1133			3010		IBM-CP1133
+#CP1161			3011		IBM-1161 IBM1161 CSIBM1161
+#CP1162			3012		IBM-1162 IBM1162 CSIBM1162
+#CP1163			3013		IBM-1163 IBM1163 CSIBM1163
+#GEORGIAN-ACADEMY	3014
+#GEORGIAN-PS		3015
+#KOI8-RU		3016
+#KOI8-T			3017
+#MACARABIC		3018		X-MAC-ARABIC MAC-ARABIC
+#MACCROATIAN		3019		X-MAC-CROATIAN MAC-CROATIAN
+#MACGREEK		3020		X-MAC-GREEK MAC-GREEK
+#MACHEBREW		3021		X-MAC-HEBREW MAC-HEBREW
+#MACICELAND		3022		X-MAC-ICELAND MAC-ICELAND
+#MACROMANIA		3023		X-MAC-ROMANIA MAC-ROMANIA
+#MACTHAI		3024		X-MAC-THAI MAC-THAI
+#MACTURKISH		3025		X-MAC-TURKISH MAC-TURKISH
+#MULELAO-1		3026
+
+# From Unicode Lib
+ISO-IR-182		4000
+ISO-IR-197		4002
+ISO-2022-JP-1		4008
+MACCYRILLIC		4009		X-MAC-CYRILLIC MAC-CYRILLIC
+MACUKRAINE		4010		X-MAC-UKRAINIAN MAC-UKRAINIAN
+MACCENTRALEUROPE	4011		X-MAC-CENTRALEURROMAN MAC-CENTRALEURROMAN
+JOHAB			4012
+ISO-8859-11		4014		iso-ir-166 ISO_8859-11 ISO8859-11 8859_11
+X-CURRENT		4999		X-SYSTEM
+X-ACORN-LATIN1		5001
+X-ACORN-FUZZY		5002
diff --git a/test/data/cscodec/INDEX b/test/data/cscodec/INDEX
new file mode 100644
index 0000000..d6d338a
--- /dev/null
+++ b/test/data/cscodec/INDEX
@@ -0,0 +1,6 @@
+# Index file for charset codec tests
+#
+# Test			Description
+
+simple.dat		Simple tests, designed to validate testdriver
+UTF-8-test.txt		Markus Kuhn's UTF-8 decoding test file
diff --git a/test/data/cscodec/UTF-8-test.txt b/test/data/cscodec/UTF-8-test.txt
new file mode 100644
index 0000000..920e54e
Binary files /dev/null and b/test/data/cscodec/UTF-8-test.txt differ
diff --git a/test/data/cscodec/simple.dat b/test/data/cscodec/simple.dat
new file mode 100644
index 0000000..3e2c7ae
Binary files /dev/null and b/test/data/cscodec/simple.dat differ
diff --git a/test/data/input/INDEX b/test/data/input/INDEX
new file mode 100644
index 0000000..c2c97ea
--- /dev/null
+++ b/test/data/input/INDEX
@@ -0,0 +1,5 @@
+# Index file for inputstream tests
+#
+# Test			Description
+
+UTF-8-test.txt		Markus Kuhn's UTF-8 decoding test file
diff --git a/test/data/input/UTF-8-test.txt b/test/data/input/UTF-8-test.txt
new file mode 100644
index 0000000..abd16f7
Binary files /dev/null and b/test/data/input/UTF-8-test.txt differ
diff --git a/test/filter.c b/test/filter.c
new file mode 100644
index 0000000..ff4d1e7
--- /dev/null
+++ b/test/filter.c
@@ -0,0 +1,357 @@
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <parserutils/parserutils.h>
+
+#include "utils/utils.h"
+
+#include "input/filter.h"
+
+#include "testutils.h"
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+	UNUSED(pw);
+
+	return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+	parserutils_filter_optparams params;
+	parserutils_filter *input;
+	uint8_t inbuf[64], outbuf[64];
+	size_t inlen, outlen;
+	const uint8_t *in = inbuf;
+	uint8_t *out = outbuf;
+
+	if (argc != 2) {
+		printf("Usage: %s <filename>\n", argv[0]);
+		return 1;
+	}
+
+	/* Initialise library */
+	assert(parserutils_initialise(argv[1], myrealloc, NULL) == 
+			PARSERUTILS_OK);
+
+	/* Create input filter */
+	input = parserutils_filter_create("UTF-8", myrealloc, NULL);
+	assert(input);
+
+	/* Convert filter to UTF-8 encoding */
+	params.encoding.name = "UTF-8";
+	assert(parserutils_filter_setopt(input, PARSERUTILS_FILTER_SET_ENCODING,
+			(parserutils_filter_optparams *) &params) == 
+			PARSERUTILS_OK);
+
+
+	/* Simple case - valid input & output buffer large enough */
+	in = inbuf;
+	out = outbuf;
+	strcpy((char *) inbuf, "hell\xc2\xa0o!");
+	inlen = strlen((const char *) inbuf);
+	outbuf[0] = '\0';
+	outlen = 64;
+
+	assert(parserutils_filter_process_chunk(input, &in, &inlen,
+			&out, &outlen) == PARSERUTILS_OK);
+
+	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+			(int) (out - ((uint8_t *) outbuf)),
+			outbuf, (int) outlen);
+
+	assert(parserutils_filter_reset(input) == PARSERUTILS_OK);
+
+	assert(memcmp(outbuf, "hell\xc2\xa0o!",
+			SLEN("hell\xc2\xa0o!")) == 0);
+
+
+	/* Too small an output buffer; no encoding edge cases */
+	in = inbuf;
+	out = outbuf;
+	strcpy((char *) inbuf, "hello!");
+	inlen = strlen((const char *) inbuf);
+	outbuf[0] = '\0';
+	outlen = 5;
+
+	assert(parserutils_filter_process_chunk(input, &in, &inlen,
+			&out, &outlen) == PARSERUTILS_NOMEM);
+
+	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+			(int) (out - ((uint8_t *) outbuf)),
+			outbuf, (int) outlen);
+
+	outlen = 64 - 5 + outlen;
+
+	assert(parserutils_filter_process_chunk(input, &in, &inlen,
+			&out, &outlen) == PARSERUTILS_OK);
+
+	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+			(int) (out - ((uint8_t *) outbuf)),
+			outbuf, (int) outlen);
+
+	assert(parserutils_filter_reset(input) == PARSERUTILS_OK);
+
+	assert(memcmp(outbuf, "hello!",
+			SLEN("hello!")) == 0);
+
+
+	/* Illegal input sequence; output buffer large enough */
+	in = inbuf;
+	out = outbuf;
+	strcpy((char *) inbuf, "hell\x96o!");
+	inlen = strlen((const char *) inbuf);
+	outbuf[0] = '\0';
+	outlen = 64;
+
+	/* Input does loose decoding, converting to U+FFFD if illegal
+	 * input is encountered */
+	assert(parserutils_filter_process_chunk(input, &in, &inlen,
+			&out, &outlen) == PARSERUTILS_OK);
+
+	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+			(int) (out - ((uint8_t *) outbuf)),
+			outbuf, (int) outlen);
+
+	assert(parserutils_filter_reset(input) == PARSERUTILS_OK);
+
+	assert(memcmp(outbuf, "hell\xef\xbf\xbdo!",
+			SLEN("hell\xef\xbf\xbdo!")) == 0);
+
+
+	/* Input ends mid-sequence */
+	in = inbuf;
+	out = outbuf;
+	strcpy((char *) inbuf, "hell\xc2\xa0o!");
+	inlen = strlen((const char *) inbuf) - 3;
+	outbuf[0] = '\0';
+	outlen = 64;
+
+	assert(parserutils_filter_process_chunk(input, &in, &inlen,
+			&out, &outlen) == PARSERUTILS_OK);
+
+	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+			(int) (out - ((uint8_t *) outbuf)),
+			outbuf, (int) outlen);
+
+	inlen += 3;
+
+	assert(parserutils_filter_process_chunk(input, &in, &inlen,
+			&out, &outlen) == PARSERUTILS_OK);
+
+	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+			(int) (out - ((uint8_t *) outbuf)),
+			outbuf, (int) outlen);
+
+	assert(parserutils_filter_reset(input) == PARSERUTILS_OK);
+
+	assert(memcmp(outbuf, "hell\xc2\xa0o!",
+			SLEN("hell\xc2\xa0o!")) == 0);
+
+
+	/* Input ends mid-sequence, but second attempt has too small a
+	 * buffer, but large enough to write out the incomplete character. */
+	in = inbuf;
+	out = outbuf;
+	strcpy((char *) inbuf, "hell\xc2\xa0o!");
+	inlen = strlen((const char *) inbuf) - 3;
+	outbuf[0] = '\0';
+	outlen = 64;
+
+	assert(parserutils_filter_process_chunk(input, &in, &inlen,
+			&out, &outlen) == PARSERUTILS_OK);
+
+	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+			(int) (out - ((uint8_t *) outbuf)),
+			outbuf, (int) outlen);
+
+	inlen += 3;
+	outlen = 3;
+
+	assert(parserutils_filter_process_chunk(input, &in, &inlen,
+			&out, &outlen) == PARSERUTILS_NOMEM);
+
+	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+			(int) (out - ((uint8_t *) outbuf)),
+			outbuf, (int) outlen);
+
+	outlen = 64 - 7;
+
+	assert(parserutils_filter_process_chunk(input, &in, &inlen,
+			&out, &outlen) == PARSERUTILS_OK);
+
+	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+			(int) (out - ((uint8_t *) outbuf)),
+			outbuf, (int) outlen);
+
+	assert(parserutils_filter_reset(input) == PARSERUTILS_OK);
+
+	assert(memcmp(outbuf, "hell\xc2\xa0o!",
+			SLEN("hell\xc2\xa0o!")) == 0);
+
+
+	/* Input ends mid-sequence, but second attempt has too small a
+	 * buffer, not large enough to write out the incomplete character. */
+	in = inbuf;
+	out = outbuf;
+	strcpy((char *) inbuf, "hell\xc2\xa0o!");
+	inlen = strlen((const char *) inbuf) - 3;
+	outbuf[0] = '\0';
+	outlen = 64;
+
+	assert(parserutils_filter_process_chunk(input, &in, &inlen,
+			&out, &outlen) == PARSERUTILS_OK);
+
+	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+			(int) (out - ((uint8_t *) outbuf)),
+			outbuf, (int) outlen);
+
+	inlen += 3;
+	outlen = 1;
+
+	assert(parserutils_filter_process_chunk(input, &in, &inlen,
+			&out, &outlen) == PARSERUTILS_NOMEM);
+
+	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+			(int) (out - ((uint8_t *) outbuf)),
+			outbuf, (int) outlen);
+
+	outlen = 60;
+
+	assert(parserutils_filter_process_chunk(input, &in, &inlen,
+			&out, &outlen) == PARSERUTILS_OK);
+
+	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+			(int) (out - ((uint8_t *) outbuf)),
+			outbuf, (int) outlen);
+
+	assert(parserutils_filter_reset(input) == PARSERUTILS_OK);
+
+	assert(memcmp(outbuf, "hell\xc2\xa0o!",
+			SLEN("hell\xc2\xa0o!")) == 0);
+
+
+	/* Input ends mid-sequence, but second attempt contains
+	 * invalid character */
+	in = inbuf;
+	out = outbuf;
+	strcpy((char *) inbuf, "hell\xc2\xc2o!");
+	inlen = strlen((const char *) inbuf) - 3;
+	outbuf[0] = '\0';
+	outlen = 64;
+
+	assert(parserutils_filter_process_chunk(input, &in, &inlen,
+			&out, &outlen) == PARSERUTILS_OK);
+
+	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+			(int) (out - ((uint8_t *) outbuf)),
+			outbuf, (int) outlen);
+
+	inlen += 3;
+
+	/* Input does loose decoding, converting to U+FFFD if illegal
+	 * input is encountered */
+	assert(parserutils_filter_process_chunk(input, &in, &inlen,
+			&out, &outlen) == PARSERUTILS_OK);
+
+	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+			(int) (out - ((uint8_t *) outbuf)),
+			outbuf, (int) outlen);
+
+	assert(parserutils_filter_reset(input) == PARSERUTILS_OK);
+
+	assert(memcmp(outbuf, "hell\xef\xbf\xbd\xef\xbf\xbdo!",
+			SLEN("hell\xef\xbf\xbd\xef\xbf\xbdo!")) == 0);
+
+
+	/* Input ends mid-sequence, but second attempt contains another
+	 * incomplete character */
+	in = inbuf;
+	out = outbuf;
+	strcpy((char *) inbuf, "hell\xc2\xa0\xc2\xa1o!");
+	inlen = strlen((const char *) inbuf) - 5;
+	outbuf[0] = '\0';
+	outlen = 64;
+
+	assert(parserutils_filter_process_chunk(input, &in, &inlen,
+			&out, &outlen) == PARSERUTILS_OK);
+
+	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+			(int) (out - ((uint8_t *) outbuf)),
+			outbuf, (int) outlen);
+
+	inlen += 2;
+
+	assert(parserutils_filter_process_chunk(input, &in, &inlen,
+			&out, &outlen) == PARSERUTILS_OK);
+
+	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+			(int) (out - ((uint8_t *) outbuf)),
+			outbuf, (int) outlen);
+
+	inlen += 3;
+
+	assert(parserutils_filter_process_chunk(input, &in, &inlen,
+			&out, &outlen) == PARSERUTILS_OK);
+
+	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+			(int) (out - ((uint8_t *) outbuf)),
+			outbuf, (int) outlen);
+
+	assert(parserutils_filter_reset(input) == PARSERUTILS_OK);
+
+	assert(memcmp(outbuf, "hell\xc2\xa0\xc2\xa1o!",
+			SLEN("hell\xc2\xa0\xc2\xa1o!")) == 0);
+
+
+	/* Input ends mid-sequence, but second attempt contains insufficient
+	 * data to complete the incomplete character */
+	in = inbuf;
+	out = outbuf;
+	strcpy((char *) inbuf, "hell\xe2\x80\xa2o!");
+	inlen = strlen((const char *) inbuf) - 4;
+	outbuf[0] = '\0';
+	outlen = 64;
+
+	assert(parserutils_filter_process_chunk(input, &in, &inlen,
+			&out, &outlen) == PARSERUTILS_OK);
+
+	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+			(int) (out - ((uint8_t *) outbuf)),
+			outbuf, (int) outlen);
+
+	inlen += 1;
+
+	assert(parserutils_filter_process_chunk(input, &in, &inlen,
+			&out, &outlen) == PARSERUTILS_OK);
+
+	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+			(int) (out - ((uint8_t *) outbuf)),
+			outbuf, (int) outlen);
+
+	inlen += 3;
+
+	assert(parserutils_filter_process_chunk(input, &in, &inlen,
+			&out, &outlen) == PARSERUTILS_OK);
+
+	printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+			(int) (out - ((uint8_t *) outbuf)),
+			outbuf, (int) outlen);
+
+	assert(parserutils_filter_reset(input) == PARSERUTILS_OK);
+
+	assert(memcmp(outbuf, "hell\xe2\x80\xa2o!",
+			SLEN("hell\xe2\x80\xa2o!")) == 0);
+
+
+	/* Clean up */
+	parserutils_filter_destroy(input);
+
+	assert(parserutils_finalise(myrealloc, NULL) == PARSERUTILS_OK);
+
+	printf("PASS\n");
+
+	return 0;
+}
diff --git a/test/inputstream.c b/test/inputstream.c
new file mode 100644
index 0000000..bad3127
--- /dev/null
+++ b/test/inputstream.c
@@ -0,0 +1,97 @@
+#include <inttypes.h>
+#include <stdio.h>
+
+#include <parserutils/parserutils.h>
+#include <parserutils/charset/utf8.h>
+#include <parserutils/input/inputstream.h>
+
+#include "utils/utils.h"
+
+#include "testutils.h"
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+	UNUSED(pw);
+
+	return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+	parserutils_inputstream *stream;
+	FILE *fp;
+	size_t len, origlen;
+#define CHUNK_SIZE (4096)
+	uint8_t buf[CHUNK_SIZE];
+	uintptr_t c;
+	size_t clen;
+
+	if (argc != 3) {
+		printf("Usage: %s <aliases_file> <filename>\n", argv[0]);
+		return 1;
+	}
+
+	/* Initialise library */
+	assert(parserutils_initialise(argv[1], myrealloc, NULL) == 
+			PARSERUTILS_OK);
+
+	stream = parserutils_inputstream_create("UTF-8", 1, NULL, 
+			myrealloc, NULL);
+	assert(stream != NULL);
+
+	fp = fopen(argv[2], "rb");
+	if (fp == NULL) {
+		printf("Failed opening %s\n", argv[2]);
+		return 1;
+	}
+
+	fseek(fp, 0, SEEK_END);
+	origlen = len = ftell(fp);
+	fseek(fp, 0, SEEK_SET);
+
+	while (len >= CHUNK_SIZE) {
+		fread(buf, 1, CHUNK_SIZE, fp);
+
+		assert(parserutils_inputstream_append(stream,
+				buf, CHUNK_SIZE) == PARSERUTILS_OK);
+
+		len -= CHUNK_SIZE;
+
+		while ((c = parserutils_inputstream_peek(stream, 0, &clen)) !=
+				PARSERUTILS_INPUTSTREAM_OOD) {
+			parserutils_inputstream_advance(stream, clen);
+		}
+	}
+
+	if (len > 0) {
+		fread(buf, 1, len, fp);
+
+		assert(parserutils_inputstream_append(stream,
+				buf, len) == PARSERUTILS_OK);
+
+		len = 0;
+	}
+
+	fclose(fp);
+
+	assert(parserutils_inputstream_insert(stream,
+			(const uint8_t *) "hello!!!",
+			SLEN("hello!!!")) == PARSERUTILS_OK);
+
+	assert(parserutils_inputstream_append(stream, NULL, 0) == 
+			PARSERUTILS_OK);
+
+	while ((c = parserutils_inputstream_peek(stream, 0, &clen)) !=
+			PARSERUTILS_INPUTSTREAM_EOF) {
+		parserutils_inputstream_advance(stream, clen);
+	}
+
+	parserutils_inputstream_destroy(stream);
+
+	assert(parserutils_finalise(myrealloc, NULL) == PARSERUTILS_OK);
+
+	printf("PASS\n");
+
+	return 0;
+}
+
diff --git a/test/parserutils.c b/test/parserutils.c
new file mode 100644
index 0000000..c6d671a
--- /dev/null
+++ b/test/parserutils.c
@@ -0,0 +1,30 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <parserutils/parserutils.h>
+
+#include "testutils.h"
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+	UNUSED(pw);
+
+	return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+	if (argc != 2) {
+		printf("Usage: %s <filename>\n", argv[0]);
+		return 1;
+	}
+
+	assert(parserutils_initialise(argv[1], myrealloc, NULL) == 
+			PARSERUTILS_OK);
+
+	assert (parserutils_finalise(myrealloc, NULL) == PARSERUTILS_OK);
+
+	printf("PASS\n");
+
+	return 0;
+}
diff --git a/test/regression/cscodec-segv.c b/test/regression/cscodec-segv.c
new file mode 100644
index 0000000..5802fdf
--- /dev/null
+++ b/test/regression/cscodec-segv.c
@@ -0,0 +1,38 @@
+#include <stdio.h>
+
+#include "charset/charset.h"
+#include <parserutils/charset/codec.h>
+
+#include "testutils.h"
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+	UNUSED(pw);
+
+	return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+	parserutils_charset_codec *codec;
+
+	if (argc != 2) {
+		printf("Usage: %s <aliases_file>\n", argv[0]);
+		return 1;
+	}
+
+	assert(parserutils_charset_initialise(argv[1], myrealloc, NULL) == 
+			PARSERUTILS_OK);
+
+	codec = parserutils_charset_codec_create("UTF-8", myrealloc, NULL);
+	assert(codec != NULL);
+
+	parserutils_charset_codec_destroy(codec);
+
+	assert(parserutils_charset_finalise(myrealloc, NULL) == 
+			PARSERUTILS_OK);
+
+	printf("PASS\n");
+
+	return 0;
+}
diff --git a/test/regression/filter-segv.c b/test/regression/filter-segv.c
new file mode 100644
index 0000000..761caab
--- /dev/null
+++ b/test/regression/filter-segv.c
@@ -0,0 +1,39 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <parserutils/parserutils.h>
+
+#include "input/filter.h"
+
+#include "testutils.h"
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+	UNUSED(pw);
+
+	return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+	parserutils_filter *input;
+
+	if (argc != 2) {
+		printf("Usage: %s <filename>\n", argv[0]);
+		return 1;
+	}
+
+	assert(parserutils_initialise(argv[1], myrealloc, NULL) == 
+			PARSERUTILS_OK);
+
+	input = parserutils_filter_create("UTF-8", myrealloc, NULL);
+	assert(input);
+
+	parserutils_filter_destroy(input);
+
+	assert(parserutils_finalise(myrealloc, NULL) == PARSERUTILS_OK);
+
+	printf("PASS\n");
+
+	return 0;
+}
diff --git a/test/regression/stream-nomem.c b/test/regression/stream-nomem.c
new file mode 100644
index 0000000..f62b392
--- /dev/null
+++ b/test/regression/stream-nomem.c
@@ -0,0 +1,94 @@
+#include <stdio.h>
+#include <string.h>
+
+#include <parserutils/parserutils.h>
+#include <parserutils/input/inputstream.h>
+
+#include "utils/utils.h"
+
+#include "testutils.h"
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+	UNUSED(pw);
+
+	return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+	parserutils_inputstream *stream;
+
+	/* This is specially calculated so that the inputstream is forced to 
+	 * reallocate (it assumes that the inputstream's buffer chunk size 
+	 * is 4k) */
+#define BUFFER_SIZE (4096 + 4)
+	uint8_t input_buffer[BUFFER_SIZE];
+//	uint8_t *buffer;
+//	size_t buflen;
+	uintptr_t c;
+	size_t clen;
+
+	if (argc != 2) {
+		printf("Usage: %s <aliases_file>\n", argv[0]);
+		return 1;
+	}
+
+	/* Populate the buffer with something sane */
+	memset(input_buffer, 'a', BUFFER_SIZE);
+	/* Now, set up our test data */
+	input_buffer[BUFFER_SIZE - 1] = '5';
+	input_buffer[BUFFER_SIZE - 2] = '4';
+	input_buffer[BUFFER_SIZE - 3] = '\xbd';
+	input_buffer[BUFFER_SIZE - 4] = '\xbf';
+	/* This byte will occupy the 4095th byte in the buffer and
+	 * thus cause the entirety of U+FFFD to be buffered until after
+	 * the buffer has been enlarged */
+	input_buffer[BUFFER_SIZE - 5] = '\xef';
+	input_buffer[BUFFER_SIZE - 6] = '3';
+	input_buffer[BUFFER_SIZE - 7] = '2';
+	input_buffer[BUFFER_SIZE - 8] = '1';
+
+	assert(parserutils_initialise(argv[1], myrealloc, NULL) == 
+			PARSERUTILS_OK);
+
+	stream = parserutils_inputstream_create("UTF-8", 0, 
+			NULL, myrealloc, NULL);
+	assert(stream != NULL);
+
+	assert(parserutils_inputstream_append(stream, 
+			input_buffer, BUFFER_SIZE) == PARSERUTILS_OK);
+
+	assert(parserutils_inputstream_append(stream, NULL, 0) == 
+			PARSERUTILS_OK);
+
+	while ((c = parserutils_inputstream_peek(stream, 0, &clen)) != 
+			PARSERUTILS_INPUTSTREAM_EOF)
+		parserutils_inputstream_advance(stream, clen);
+
+/*
+	assert(css_inputstream_claim_buffer(stream, &buffer, &buflen) == 
+			CSS_OK);
+
+	assert(buflen == BUFFER_SIZE);
+
+	printf("Buffer: '%.*s'\n", 8, buffer + (BUFFER_SIZE - 8));
+
+	assert( buffer[BUFFER_SIZE - 6] == '3' && 
+		buffer[BUFFER_SIZE - 5] == (uint8_t) '\xef' && 
+		buffer[BUFFER_SIZE - 4] == (uint8_t) '\xbf' && 
+		buffer[BUFFER_SIZE - 3] == (uint8_t) '\xbd' && 
+		buffer[BUFFER_SIZE - 2] == '4');
+
+	free(buffer);
+*/
+
+	parserutils_inputstream_destroy(stream);
+
+	assert(parserutils_finalise(myrealloc, NULL) == PARSERUTILS_OK);
+
+	printf("PASS\n");
+
+	return 0;
+}
+
diff --git a/test/testrunner.pl b/test/testrunner.pl
new file mode 100644
index 0000000..1c6c66d
--- /dev/null
+++ b/test/testrunner.pl
@@ -0,0 +1,167 @@
+#!/bin/perl
+#
+# Testcase runner
+#
+# Usage: testrunner <directory> [<executable extension>]
+#
+# Operates upon INDEX files described in the README.
+# Locates and executes testcases, feeding data files to programs 
+# as appropriate.
+# Logs testcase output to file.
+# Aborts test sequence on detection of error.
+#
+
+use warnings;
+use strict;
+use File::Spec;
+use IPC::Open3;
+
+if (@ARGV < 1) {
+	print "Usage: testrunner.pl <directory> [<exeext>]\n";
+	exit;
+}
+
+# Get directory
+my $directory = shift @ARGV;
+
+# Get EXE extension (if any)
+my $exeext = "";
+$exeext = shift @ARGV if (@ARGV > 0);
+
+# Open log file and /dev/null
+open(LOG, ">$directory/log") or die "Failed opening test log";
+open(NULL, "+<", File::Spec->devnull) or die "Failed opening /dev/null";
+
+# Open testcase index
+open(TINDEX, "<$directory/INDEX") or die "Failed opening test INDEX";
+
+# Parse testcase index, looking for testcases
+while (my $line = <TINDEX>) {
+	next if ($line =~ /^(#.*)?$/);
+
+	# Found one; decompose
+	(my $test, my $desc, my $data) = split /\t+/, $line;
+
+	# Strip whitespace
+	$test =~ s/^\s+|\s+$//g;
+	$desc =~ s/^\s+|\s+$//g;
+	$data =~ s/^\s+|\s+$//g if ($data);
+
+	# Append EXE extension to binary name
+	$test = $test . $exeext;
+
+	print "Test: $desc\n";
+
+	my $pid;
+
+	if ($data) {
+		# Testcase has external data files
+
+		# Open datafile index
+		open(DINDEX, "<$directory/data/$data/INDEX") or 
+			die "Failed opening $directory/data/$data/INDEX";
+
+		# Parse datafile index, looking for datafiles
+		while (my $dentry = <DINDEX>) {
+			next if ($dentry =~ /^(#.*)?$/);
+
+			# Found one; decompose
+			(my $dtest, my $ddesc) = split /\t+/, $dentry;
+
+			# Strip whitespace
+			$dtest =~ s/^\s+|\s+$//g;
+			$ddesc =~ s/^\s+|\s+$//g;
+
+			print LOG "Running $directory/$test " .
+					"$directory/data/Aliases " .
+					"$directory/data/$data/$dtest\n";
+
+			# Make message fit on an 80 column terminal
+			my $msg = "    ==> $test [$data/$dtest]";
+			$msg = $msg . "." x (80 - length($msg) - 8);
+
+			print $msg;
+
+			# Run testcase
+			$pid = open3("&<NULL", \*OUT, \*ERR, 
+					"$directory/$test", 
+					"$directory/data/Aliases", 
+					"$directory/data/$data/$dtest");
+
+			my $last = "FAIL";
+
+			# Marshal testcase output to log file
+			while (my $output = <OUT>) {
+				print LOG "    $output";
+				$last = $output;
+			}
+
+			# Wait for child to finish
+			waitpid($pid, 0);
+
+			print substr($last, 0, 4) . "\n";
+
+			# Bail, noisily, on failure
+			if (substr($last, 0, 4) eq "FAIL") {
+				# Write any stderr output to the log
+				while (my $errors = <ERR>) {
+					print LOG "    $errors";
+				}
+
+				print "\n\nFailure detected: " .
+						"consult log file\n\n\n";
+
+				exit(1);
+			}
+                }
+
+		close(DINDEX);
+	} else {
+		# Testcase has no external data files
+		print LOG "Running $directory/$test $directory/data/Aliases\n";
+
+		# Make message fit on an 80 column terminal
+		my $msg = "    ==> $test";
+		$msg = $msg . "." x (80 - length($msg) - 8);
+
+		print $msg;
+
+		# Run testcase
+		$pid = open3("&<NULL", \*OUT, \*ERR, 
+				"$directory/$test", "$directory/data/Aliases");
+
+		my $last = "FAIL";
+
+		# Marshal testcase output to log file
+		while (my $output = <OUT>) {
+			print LOG "    $output";
+			$last = $output;
+		}
+
+		# Wait for child to finish
+		waitpid($pid, 0);
+
+		print substr($last, 0, 4) . "\n";
+
+		# Bail, noisily, on failure
+		if (substr($last, 0, 4) eq "FAIL") {
+			# Write any stderr output to the log
+			while (my $errors = <ERR>) {
+				print LOG "    $errors";
+			}
+
+			print "\n\nFailure detected: " . 
+					"consult log file\n\n\n";
+
+			exit(1);
+		}
+	}
+
+	print "\n";
+}
+
+# Clean up
+close(TINDEX);
+
+close(NULL);
+close(LOG);
diff --git a/test/testutils.h b/test/testutils.h
new file mode 100644
index 0000000..c91c5b8
--- /dev/null
+++ b/test/testutils.h
@@ -0,0 +1,123 @@
+#ifndef test_testutils_h_
+#define test_testutils_h_
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef UNUSED
+#define UNUSED(x) ((x) = (x))
+#endif
+
+/* Redefine assert, so we can simply use the standard assert mechanism
+ * within testcases and exit with the right output for the testrunner
+ * to do the right thing. */
+void __assert2(const char *expr, const char *function,
+		const char *file, int line);
+
+void __assert2(const char *expr, const char *function,
+		const char *file, int line)
+{
+	UNUSED(function);
+	UNUSED(file);
+
+	printf("FAIL - %s at line %d\n", expr, line);
+
+	exit(EXIT_FAILURE);
+}
+
+#define assert(expr) \
+  ((void) ((expr) || (__assert2 (#expr, __func__, __FILE__, __LINE__), 0)))
+
+
+typedef bool (*line_func)(const char *data, size_t datalen, void *pw);
+
+static size_t parse_strlen(const char *str, size_t limit);
+bool parse_testfile(const char *filename, line_func callback, void *pw);
+size_t parse_filesize(const char *filename);
+
+/**
+ * Testcase datafile parser driver
+ *
+ * \param filename  Name of file to parse
+ * \param callback  Pointer to function to handle each line of input data
+ * \param pw        Pointer to client-specific private data
+ * \return true on success, false otherwise.
+ */
+bool parse_testfile(const char *filename, line_func callback, void *pw)
+{
+	FILE *fp;
+	char buf[300];
+
+	fp = fopen(filename, "rb");
+	if (fp == NULL) {
+		printf("Failed opening %s\n", filename);
+		return false;
+	}
+
+	while (fgets(buf, sizeof buf, fp)) {
+		if (buf[0] == '\n')
+			continue;
+
+		if (!callback(buf, parse_strlen(buf, sizeof buf), pw)) {
+			fclose(fp);
+			return false;
+		}
+	}
+
+	fclose(fp);
+
+	return true;
+}
+
+/**
+ * Utility string length measurer; assumes strings are '\n' terminated
+ *
+ * \param str    String to measure length of
+ * \param limit  Upper bound on string length
+ * \return String length
+ */
+size_t parse_strlen(const char *str, size_t limit)
+{
+	size_t len = 0;
+
+	if (str == NULL)
+		return 0;
+
+	while (len < limit - 1 && *str != '\n') {
+		len++;
+		str++;
+	}
+
+	len++;
+
+	return len;
+}
+
+/**
+ * Read the size of a file
+ *
+ * \param filename  Name of file to read size of
+ * \return File size (in bytes), or 0 on error
+ */
+size_t parse_filesize(const char *filename)
+{
+	FILE *fp;
+	size_t len = 0;
+
+	fp = fopen(filename, "rb");
+	if (fp == NULL) {
+		printf("Failed opening %s\n", filename);
+		return 0;
+	}
+
+	fseek(fp, 0, SEEK_END);
+	len = ftell(fp);
+
+	fclose(fp);
+
+	return len;
+}
+
+
+#endif
-- 
cgit v1.2.3