Add UTF-16 input stream.

This appears to work correctly -- could probably do with more testing. svn path=/trunk/hubbub/; revision=3362
author: John Mark Bell <jmb@netsurf-browser.org> 2007-06-24 10:31:23 +0000
committer: John Mark Bell <jmb@netsurf-browser.org> 2007-06-24 10:31:23 +0000
commit: ff835de85e45a77c810d0f8dc7916ff0be677545 (patch)
tree: ce3a645348b52a492ec9d171b8f6f623a08c2530 /test
parent: 86214cfb1e17624c3b0851a8a3cd6909c2623ce9 (diff)
download: libhubbub-ff835de85e45a77c810d0f8dc7916ff0be677545.tar.gz
libhubbub-ff835de85e45a77c810d0f8dc7916ff0be677545.tar.bz2
3 files changed, 177 insertions, 1 deletions
diff --git a/test/INDEX b/test/INDEX
index 100dd21..d43829b 100644
--- a/test/INDEX
+++ b/test/INDEX
@@ -11,5 +11,6 @@ filter		Input stream filtering
 hubbub		Library initialisation/finalisation
 inputstream	Buffered input stream			html
 parser		Public parser API			html
+parser-utf16	Public parser API (utf-16 internally)	html
 tokeniser	HTML tokeniser				html
 tokeniser2	HTML tokeniser (again)			tokeniser2
 \ No newline at end of file
diff --git a/test/Makefile b/test/Makefile
index ef50365..bf4670c 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -32,7 +32,7 @@ DEBUG =
 
 # Objects
 OBJS = aliases cscodec csdetect dict entities filter hubbub \
-	inputstream parser tokeniser tokeniser2
+	inputstream parser parser-utf16 tokeniser tokeniser2
 OBJS += regression/cscodec-segv regression/filter-segv
 
 .PHONY: clean debug export release setup test
diff --git a/test/parser-utf16.c b/test/parser-utf16.c
new file mode 100644
index 0000000..9056dd1
--- /dev/null
+++ b/test/parser-utf16.c
@@ -0,0 +1,175 @@
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <hubbub/hubbub.h>
+
+#include <hubbub/parser.h>
+
+#include "utils/utils.h"
+
+#include "testutils.h"
+
+static const uint8_t *pbuffer;
+
+static void buffer_handler(const uint8_t *buffer, size_t len, void *pw);
+static void token_handler(const hubbub_token *token, void *pw);
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+	UNUSED(pw);
+
+	return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+	hubbub_parser *parser;
+	hubbub_parser_optparams params;
+	FILE *fp;
+	size_t len, origlen;
+#define CHUNK_SIZE (4096)
+	uint8_t buf[CHUNK_SIZE];
+	const char *charset;
+	hubbub_charset_source cssource;
+	uint8_t *buffer;
+
+	if (argc != 3) {
+		printf("Usage: %s <aliases_file> <filename>\n", argv[0]);
+		return 1;
+	}
+
+	/* Initialise library */
+	assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK);
+
+	parser = hubbub_parser_create("UTF-8", "UTF-16", myrealloc, NULL);
+	assert(parser != NULL);
+
+	params.buffer_handler.handler = buffer_handler;
+	params.buffer_handler.pw = NULL;
+	assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_BUFFER_HANDLER,
+			&params) == HUBBUB_OK);
+
+	params.token_handler.handler = token_handler;
+	params.token_handler.pw = NULL;
+	assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_TOKEN_HANDLER,
+			&params) == HUBBUB_OK);
+
+	fp = fopen(argv[2], "rb");
+	if (fp == NULL) {
+		printf("Failed opening %s\n", argv[2]);
+		return 1;
+	}
+
+	fseek(fp, 0, SEEK_END);
+	origlen = len = ftell(fp);
+	fseek(fp, 0, SEEK_SET);
+
+	while (len >= CHUNK_SIZE) {
+		fread(buf, 1, CHUNK_SIZE, fp);
+
+		assert(hubbub_parser_parse_chunk(parser,
+				buf, CHUNK_SIZE) == HUBBUB_OK);
+
+		len -= CHUNK_SIZE;
+	}
+
+	if (len > 0) {
+		fread(buf, 1, len, fp);
+
+		assert(hubbub_parser_parse_chunk(parser,
+				buf, len) == HUBBUB_OK);
+
+		len = 0;
+
+		assert(hubbub_parser_completed(parser) == HUBBUB_OK);
+	}
+
+	fclose(fp);
+
+	charset = hubbub_parser_read_charset(parser, &cssource);
+
+	printf("Charset: %s (from %d)\n", charset, cssource);
+
+	assert(hubbub_parser_claim_buffer(parser, &buffer, &len) ==
+			HUBBUB_OK);
+
+	free(buffer);
+
+	hubbub_parser_destroy(parser);
+
+	assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
+
+	printf("PASS\n");
+
+	return 0;
+}
+
+void buffer_handler(const uint8_t *buffer, size_t len, void *pw)
+{
+	UNUSED(len);
+	UNUSED(pw);
+
+	pbuffer = buffer;
+}
+
+void token_handler(const hubbub_token *token, void *pw)
+{
+	static const char *token_names[] = {
+		"DOCTYPE", "START TAG", "END TAG",
+		"COMMENT", "CHARACTERS", "EOF"
+	};
+	size_t i;
+
+	UNUSED(pw);
+
+	printf("%s: ", token_names[token->type]);
+
+	switch (token->type) {
+	case HUBBUB_TOKEN_DOCTYPE:
+		printf("'%.*s' (%svalid)\n",
+				(int) token->data.doctype.name.len,
+				pbuffer + token->data.doctype.name.data_off,
+				token->data.doctype.correct ? "" : "in");
+		break;
+	case HUBBUB_TOKEN_START_TAG:
+		printf("'%.*s' %s\n",
+				(int) token->data.tag.name.len,
+				pbuffer + token->data.tag.name.data_off,
+				(token->data.tag.n_attributes > 0) ?
+						"attributes:" : "");
+		for (i = 0; i < token->data.tag.n_attributes; i++) {
+			printf("\t'%.*s' = '%.*s'\n",
+					(int) token->data.tag.attributes[i].name.len,
+					pbuffer + token->data.tag.attributes[i].name.data_off,
+					(int) token->data.tag.attributes[i].value.len,
+					pbuffer + token->data.tag.attributes[i].value.data_off);
+		}
+		break;
+	case HUBBUB_TOKEN_END_TAG:
+		printf("'%.*s' %s\n",
+				(int) token->data.tag.name.len,
+				pbuffer + token->data.tag.name.data_off,
+				(token->data.tag.n_attributes > 0) ?
+						"attributes:" : "");
+		for (i = 0; i < token->data.tag.n_attributes; i++) {
+			printf("\t'%.*s' = '%.*s'\n",
+					(int) token->data.tag.attributes[i].name.len,
+					pbuffer + token->data.tag.attributes[i].name.data_off,
+					(int) token->data.tag.attributes[i].value.len,
+					pbuffer + token->data.tag.attributes[i].value.data_off);
+		}
+		break;
+	case HUBBUB_TOKEN_COMMENT:
+		printf("'%.*s'\n", (int) token->data.comment.len,
+				pbuffer + token->data.comment.data_off);
+		break;
+	case HUBBUB_TOKEN_CHARACTER:
+		printf("'%.*s'\n", (int) token->data.character.len,
+				pbuffer + token->data.character.data_off);
+		break;
+	case HUBBUB_TOKEN_EOF:
+		printf("\n");
+		break;
+	}
+}
author	John Mark Bell <jmb@netsurf-browser.org>	2007-06-24 10:31:23 +0000
committer	John Mark Bell <jmb@netsurf-browser.org>	2007-06-24 10:31:23 +0000
commit	ff835de85e45a77c810d0f8dc7916ff0be677545 (patch)
tree	ce3a645348b52a492ec9d171b8f6f623a08c2530 /test
parent	86214cfb1e17624c3b0851a8a3cd6909c2623ce9 (diff)
download	libhubbub-ff835de85e45a77c810d0f8dc7916ff0be677545.tar.gz libhubbub-ff835de85e45a77c810d0f8dc7916ff0be677545.tar.bz2