From 4da6a038c15a5fa3d1c754b7278ae47627a44718 Mon Sep 17 00:00:00 2001
From: John Mark Bell <jmb@netsurf-browser.org>
Date: Fri, 21 Mar 2008 13:20:22 +0000
Subject: hubbub_strings may now be either an offset into the data buffer or a
 pointer to constant data. Fix up tokeniser and treebuilder to deal with this.
 Fix up testcases, too.

The tokeniser will only ever emit strings of type HUBBUB_STRING_OFF. Anything else is a bug which should be fixed.

The treebuilder may emit strings of either type.

svn path=/trunk/hubbub/; revision=4014
---
 test/parser-utf16.c | 18 +++++++++---------
 test/parser.c       | 18 +++++++++---------
 test/tokeniser.c    | 18 +++++++++---------
 test/tokeniser2.c   | 16 ++++++++--------
 test/tree.c         | 26 +++++++++++++++++++++-----
 5 files changed, 56 insertions(+), 40 deletions(-)

(limited to 'test')

diff --git a/test/parser-utf16.c b/test/parser-utf16.c
index 9056dd1..86024a6 100644
--- a/test/parser-utf16.c
+++ b/test/parser-utf16.c
@@ -129,44 +129,44 @@ void token_handler(const hubbub_token *token, void *pw)
 	case HUBBUB_TOKEN_DOCTYPE:
 		printf("'%.*s' (%svalid)\n",
 				(int) token->data.doctype.name.len,
-				pbuffer + token->data.doctype.name.data_off,
+				pbuffer + token->data.doctype.name.data.off,
 				token->data.doctype.correct ? "" : "in");
 		break;
 	case HUBBUB_TOKEN_START_TAG:
 		printf("'%.*s' %s\n",
 				(int) token->data.tag.name.len,
-				pbuffer + token->data.tag.name.data_off,
+				pbuffer + token->data.tag.name.data.off,
 				(token->data.tag.n_attributes > 0) ?
 						"attributes:" : "");
 		for (i = 0; i < token->data.tag.n_attributes; i++) {
 			printf("\t'%.*s' = '%.*s'\n",
 					(int) token->data.tag.attributes[i].name.len,
-					pbuffer + token->data.tag.attributes[i].name.data_off,
+					pbuffer + token->data.tag.attributes[i].name.data.off,
 					(int) token->data.tag.attributes[i].value.len,
-					pbuffer + token->data.tag.attributes[i].value.data_off);
+					pbuffer + token->data.tag.attributes[i].value.data.off);
 		}
 		break;
 	case HUBBUB_TOKEN_END_TAG:
 		printf("'%.*s' %s\n",
 				(int) token->data.tag.name.len,
-				pbuffer + token->data.tag.name.data_off,
+				pbuffer + token->data.tag.name.data.off,
 				(token->data.tag.n_attributes > 0) ?
 						"attributes:" : "");
 		for (i = 0; i < token->data.tag.n_attributes; i++) {
 			printf("\t'%.*s' = '%.*s'\n",
 					(int) token->data.tag.attributes[i].name.len,
-					pbuffer + token->data.tag.attributes[i].name.data_off,
+					pbuffer + token->data.tag.attributes[i].name.data.off,
 					(int) token->data.tag.attributes[i].value.len,
-					pbuffer + token->data.tag.attributes[i].value.data_off);
+					pbuffer + token->data.tag.attributes[i].value.data.off);
 		}
 		break;
 	case HUBBUB_TOKEN_COMMENT:
 		printf("'%.*s'\n", (int) token->data.comment.len,
-				pbuffer + token->data.comment.data_off);
+				pbuffer + token->data.comment.data.off);
 		break;
 	case HUBBUB_TOKEN_CHARACTER:
 		printf("'%.*s'\n", (int) token->data.character.len,
-				pbuffer + token->data.character.data_off);
+				pbuffer + token->data.character.data.off);
 		break;
 	case HUBBUB_TOKEN_EOF:
 		printf("\n");
diff --git a/test/parser.c b/test/parser.c
index fe2659d..fa2afb8 100644
--- a/test/parser.c
+++ b/test/parser.c
@@ -129,44 +129,44 @@ void token_handler(const hubbub_token *token, void *pw)
 	case HUBBUB_TOKEN_DOCTYPE:
 		printf("'%.*s' (%svalid)\n",
 				(int) token->data.doctype.name.len,
-				pbuffer + token->data.doctype.name.data_off,
+				pbuffer + token->data.doctype.name.data.off,
 				token->data.doctype.correct ? "" : "in");
 		break;
 	case HUBBUB_TOKEN_START_TAG:
 		printf("'%.*s' %s\n",
 				(int) token->data.tag.name.len,
-				pbuffer + token->data.tag.name.data_off,
+				pbuffer + token->data.tag.name.data.off,
 				(token->data.tag.n_attributes > 0) ?
 						"attributes:" : "");
 		for (i = 0; i < token->data.tag.n_attributes; i++) {
 			printf("\t'%.*s' = '%.*s'\n",
 					(int) token->data.tag.attributes[i].name.len,
-					pbuffer + token->data.tag.attributes[i].name.data_off,
+					pbuffer + token->data.tag.attributes[i].name.data.off,
 					(int) token->data.tag.attributes[i].value.len,
-					pbuffer + token->data.tag.attributes[i].value.data_off);
+					pbuffer + token->data.tag.attributes[i].value.data.off);
 		}
 		break;
 	case HUBBUB_TOKEN_END_TAG:
 		printf("'%.*s' %s\n",
 				(int) token->data.tag.name.len,
-				pbuffer + token->data.tag.name.data_off,
+				pbuffer + token->data.tag.name.data.off,
 				(token->data.tag.n_attributes > 0) ?
 						"attributes:" : "");
 		for (i = 0; i < token->data.tag.n_attributes; i++) {
 			printf("\t'%.*s' = '%.*s'\n",
 					(int) token->data.tag.attributes[i].name.len,
-					pbuffer + token->data.tag.attributes[i].name.data_off,
+					pbuffer + token->data.tag.attributes[i].name.data.off,
 					(int) token->data.tag.attributes[i].value.len,
-					pbuffer + token->data.tag.attributes[i].value.data_off);
+					pbuffer + token->data.tag.attributes[i].value.data.off);
 		}
 		break;
 	case HUBBUB_TOKEN_COMMENT:
 		printf("'%.*s'\n", (int) token->data.comment.len,
-				pbuffer + token->data.comment.data_off);
+				pbuffer + token->data.comment.data.off);
 		break;
 	case HUBBUB_TOKEN_CHARACTER:
 		printf("'%.*s'\n", (int) token->data.character.len,
-				pbuffer + token->data.character.data_off);
+				pbuffer + token->data.character.data.off);
 		break;
 	case HUBBUB_TOKEN_EOF:
 		printf("\n");
diff --git a/test/tokeniser.c b/test/tokeniser.c
index 271b986..32ecdbc 100644
--- a/test/tokeniser.c
+++ b/test/tokeniser.c
@@ -128,44 +128,44 @@ void token_handler(const hubbub_token *token, void *pw)
 	case HUBBUB_TOKEN_DOCTYPE:
 		printf("'%.*s' (%svalid)\n",
 				(int) token->data.doctype.name.len,
-				pbuffer + token->data.doctype.name.data_off,
+				pbuffer + token->data.doctype.name.data.off,
 				token->data.doctype.correct ? "" : "in");
 		break;
 	case HUBBUB_TOKEN_START_TAG:
 		printf("'%.*s' %s\n",
 				(int) token->data.tag.name.len,
-				pbuffer + token->data.tag.name.data_off,
+				pbuffer + token->data.tag.name.data.off,
 				(token->data.tag.n_attributes > 0) ?
 						"attributes:" : "");
 		for (i = 0; i < token->data.tag.n_attributes; i++) {
 			printf("\t'%.*s' = '%.*s'\n",
 					(int) token->data.tag.attributes[i].name.len,
-					pbuffer + token->data.tag.attributes[i].name.data_off,
+					pbuffer + token->data.tag.attributes[i].name.data.off,
 					(int) token->data.tag.attributes[i].value.len,
-					pbuffer + token->data.tag.attributes[i].value.data_off);
+					pbuffer + token->data.tag.attributes[i].value.data.off);
 		}
 		break;
 	case HUBBUB_TOKEN_END_TAG:
 		printf("'%.*s' %s\n",
 				(int) token->data.tag.name.len,
-				pbuffer + token->data.tag.name.data_off,
+				pbuffer + token->data.tag.name.data.off,
 				(token->data.tag.n_attributes > 0) ?
 						"attributes:" : "");
 		for (i = 0; i < token->data.tag.n_attributes; i++) {
 			printf("\t'%.*s' = '%.*s'\n",
 					(int) token->data.tag.attributes[i].name.len,
-					pbuffer + token->data.tag.attributes[i].name.data_off,
+					pbuffer + token->data.tag.attributes[i].name.data.off,
 					(int) token->data.tag.attributes[i].value.len,
-					pbuffer + token->data.tag.attributes[i].value.data_off);
+					pbuffer + token->data.tag.attributes[i].value.data.off);
 		}
 		break;
 	case HUBBUB_TOKEN_COMMENT:
 		printf("'%.*s'\n", (int) token->data.comment.len,
-				pbuffer + token->data.comment.data_off);
+				pbuffer + token->data.comment.data.off);
 		break;
 	case HUBBUB_TOKEN_CHARACTER:
 		printf("'%.*s'\n", (int) token->data.character.len,
-				pbuffer + token->data.character.data_off);
+				pbuffer + token->data.character.data.off);
 		break;
 	case HUBBUB_TOKEN_EOF:
 		printf("\n");
diff --git a/test/tokeniser2.c b/test/tokeniser2.c
index 103a3d5..f72e0d7 100644
--- a/test/tokeniser2.c
+++ b/test/tokeniser2.c
@@ -280,7 +280,7 @@ void token_handler(const hubbub_token *token, void *pw)
 		bool expvalid = json_object_get_boolean((struct json_object *)
 				array_list_get_idx(items, 2));
 		char *gotname = (char *) (ctx->pbuffer +
-				token->data.doctype.name.data_off);
+				token->data.doctype.name.data.off);
 
 		printf("'%.*s' (%svalid)\n",
 				(int) token->data.doctype.name.len,
@@ -302,7 +302,7 @@ void token_handler(const hubbub_token *token, void *pw)
 			(struct json_object *)
 					array_list_get_idx(items, 2))->head;
 		char *tagname = (char *) (ctx->pbuffer +
-				token->data.tag.name.data_off);
+				token->data.tag.name.data.off);
 
 		printf("'%.*s' %s\n",
 				(int) token->data.tag.name.len,
@@ -318,11 +318,11 @@ void token_handler(const hubbub_token *token, void *pw)
 			char *expval = json_object_get_string(
 					(struct json_object *) expattrs->v);
 			char *gotname = (char *) (ctx->pbuffer +
-				token->data.tag.attributes[i].name.data_off);
+				token->data.tag.attributes[i].name.data.off);
 			size_t namelen =
 				token->data.tag.attributes[i].name.len;
 			char *gotval = (char *) (ctx->pbuffer +
-				token->data.tag.attributes[i].value.data_off);
+				token->data.tag.attributes[i].value.data.off);
 			size_t vallen =
 				token->data.tag.attributes[i].value.len;
 
@@ -347,7 +347,7 @@ void token_handler(const hubbub_token *token, void *pw)
 		char *expname = json_object_get_string((struct json_object *)
 				array_list_get_idx(items, 1));
 		char *tagname = (char *) (ctx->pbuffer +
-				token->data.tag.name.data_off);
+				token->data.tag.name.data.off);
 
 		printf("'%.*s' %s\n",
 				(int) token->data.tag.name.len,
@@ -364,7 +364,7 @@ void token_handler(const hubbub_token *token, void *pw)
 		char *expstr = json_object_get_string((struct json_object *)
 				array_list_get_idx(items, 1));
 		char *gotstr = (char *) (ctx->pbuffer +
-				token->data.comment.data_off);
+				token->data.comment.data.off);
 
 		printf("'%.*s'\n", (int) token->data.comment.len, gotstr);
 
@@ -377,7 +377,7 @@ void token_handler(const hubbub_token *token, void *pw)
 		char *expstr = json_object_get_string((struct json_object *)
 				array_list_get_idx(items, 1));
 		char *gotstr = (char *) (ctx->pbuffer +
-				token->data.character.data_off);
+				token->data.character.data.off);
 		size_t len = min(token->data.character.len,
 				strlen(expstr + ctx->char_off));
 
@@ -392,7 +392,7 @@ void token_handler(const hubbub_token *token, void *pw)
 			hubbub_token t;
 
 			t.type = HUBBUB_TOKEN_CHARACTER;
-			t.data.character.data_off += len;
+			t.data.character.data.off += len;
 			t.data.character.len -= len;
 
 			ctx->char_off = 0;
diff --git a/test/tree.c b/test/tree.c
index 04ce026..f4e6c3c 100644
--- a/test/tree.c
+++ b/test/tree.c
@@ -11,7 +11,7 @@
 
 #include "testutils.h"
 
-#define NODE_REF_CHUNK 1024
+#define NODE_REF_CHUNK 8192
 static uint16_t *node_ref;
 static uintptr_t node_ref_alloc;
 static uintptr_t node_counter;
@@ -72,6 +72,22 @@ static void *myrealloc(void *ptr, size_t len, void *pw)
 	return realloc(ptr, len);
 }
 
+static const uint8_t *ptr_from_hubbub_string(const hubbub_string *string)
+{
+	const uint8_t *data;
+
+	switch (string->type) {
+	case HUBBUB_STRING_OFF:
+		data = pbuffer + string->data.off;
+		break;
+	case HUBBUB_STRING_PTR:
+		data = string->data.ptr;
+		break;
+	}
+
+	return data;
+}
+
 int main(int argc, char **argv)
 {
 	hubbub_parser *parser;
@@ -188,7 +204,7 @@ void buffer_handler(const uint8_t *buffer, size_t len, void *pw)
 int create_comment(void *ctx, const hubbub_string *data, void **result)
 {
 	printf("Creating (%u) [comment '%.*s']\n", ++node_counter,
-			data->len, pbuffer + data->data_off);
+			data->len, ptr_from_hubbub_string(data));
 
 	GROW_REF
 	node_ref[node_counter] = 0;
@@ -208,7 +224,7 @@ int create_doctype(void *ctx, const hubbub_string *qname,
 	UNUSED(system_id);
 
 	printf("Creating (%u) [doctype '%.*s']\n", ++node_counter,
-			qname->len, pbuffer + qname->data_off);
+			qname->len, ptr_from_hubbub_string(qname));
 
 	GROW_REF
 	node_ref[node_counter] = 0;
@@ -223,7 +239,7 @@ int create_doctype(void *ctx, const hubbub_string *qname,
 int create_element(void *ctx, const hubbub_tag *tag, void **result)
 {
 	printf("Creating (%u) [element '%.*s']\n", ++node_counter,
-			tag->name.len, pbuffer + tag->name.data_off);
+			tag->name.len, ptr_from_hubbub_string(&tag->name));
 
 	GROW_REF
 	node_ref[node_counter] = 0;
@@ -254,7 +270,7 @@ int create_element_verbatim(void *ctx, const uint8_t *name, size_t len,
 int create_text(void *ctx, const hubbub_string *data, void **result)
 {
 	printf("Creating (%u) [text '%.*s']\n", ++node_counter,
-			data->len, pbuffer + data->data_off);
+			data->len, ptr_from_hubbub_string(data));
 
 	GROW_REF
 	node_ref[node_counter] = 0;
-- 
cgit v1.2.3