From 4da6a038c15a5fa3d1c754b7278ae47627a44718 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Fri, 21 Mar 2008 13:20:22 +0000 Subject: hubbub_strings may now be either an offset into the data buffer or a pointer to constant data. Fix up tokeniser and treebuilder to deal with this. Fix up testcases, too. The tokeniser will only ever emit strings of type HUBBUB_STRING_OFF. Anything else is a bug which should be fixed. The treebuilder may emit strings of either type. svn path=/trunk/hubbub/; revision=4014 --- test/parser-utf16.c | 18 +++++++++--------- test/parser.c | 18 +++++++++--------- test/tokeniser.c | 18 +++++++++--------- test/tokeniser2.c | 16 ++++++++-------- test/tree.c | 26 +++++++++++++++++++++----- 5 files changed, 56 insertions(+), 40 deletions(-) (limited to 'test') diff --git a/test/parser-utf16.c b/test/parser-utf16.c index 9056dd1..86024a6 100644 --- a/test/parser-utf16.c +++ b/test/parser-utf16.c @@ -129,44 +129,44 @@ void token_handler(const hubbub_token *token, void *pw) case HUBBUB_TOKEN_DOCTYPE: printf("'%.*s' (%svalid)\n", (int) token->data.doctype.name.len, - pbuffer + token->data.doctype.name.data_off, + pbuffer + token->data.doctype.name.data.off, token->data.doctype.correct ? "" : "in"); break; case HUBBUB_TOKEN_START_TAG: printf("'%.*s' %s\n", (int) token->data.tag.name.len, - pbuffer + token->data.tag.name.data_off, + pbuffer + token->data.tag.name.data.off, (token->data.tag.n_attributes > 0) ? "attributes:" : ""); for (i = 0; i < token->data.tag.n_attributes; i++) { printf("\t'%.*s' = '%.*s'\n", (int) token->data.tag.attributes[i].name.len, - pbuffer + token->data.tag.attributes[i].name.data_off, + pbuffer + token->data.tag.attributes[i].name.data.off, (int) token->data.tag.attributes[i].value.len, - pbuffer + token->data.tag.attributes[i].value.data_off); + pbuffer + token->data.tag.attributes[i].value.data.off); } break; case HUBBUB_TOKEN_END_TAG: printf("'%.*s' %s\n", (int) token->data.tag.name.len, - pbuffer + token->data.tag.name.data_off, + pbuffer + token->data.tag.name.data.off, (token->data.tag.n_attributes > 0) ? "attributes:" : ""); for (i = 0; i < token->data.tag.n_attributes; i++) { printf("\t'%.*s' = '%.*s'\n", (int) token->data.tag.attributes[i].name.len, - pbuffer + token->data.tag.attributes[i].name.data_off, + pbuffer + token->data.tag.attributes[i].name.data.off, (int) token->data.tag.attributes[i].value.len, - pbuffer + token->data.tag.attributes[i].value.data_off); + pbuffer + token->data.tag.attributes[i].value.data.off); } break; case HUBBUB_TOKEN_COMMENT: printf("'%.*s'\n", (int) token->data.comment.len, - pbuffer + token->data.comment.data_off); + pbuffer + token->data.comment.data.off); break; case HUBBUB_TOKEN_CHARACTER: printf("'%.*s'\n", (int) token->data.character.len, - pbuffer + token->data.character.data_off); + pbuffer + token->data.character.data.off); break; case HUBBUB_TOKEN_EOF: printf("\n"); diff --git a/test/parser.c b/test/parser.c index fe2659d..fa2afb8 100644 --- a/test/parser.c +++ b/test/parser.c @@ -129,44 +129,44 @@ void token_handler(const hubbub_token *token, void *pw) case HUBBUB_TOKEN_DOCTYPE: printf("'%.*s' (%svalid)\n", (int) token->data.doctype.name.len, - pbuffer + token->data.doctype.name.data_off, + pbuffer + token->data.doctype.name.data.off, token->data.doctype.correct ? "" : "in"); break; case HUBBUB_TOKEN_START_TAG: printf("'%.*s' %s\n", (int) token->data.tag.name.len, - pbuffer + token->data.tag.name.data_off, + pbuffer + token->data.tag.name.data.off, (token->data.tag.n_attributes > 0) ? "attributes:" : ""); for (i = 0; i < token->data.tag.n_attributes; i++) { printf("\t'%.*s' = '%.*s'\n", (int) token->data.tag.attributes[i].name.len, - pbuffer + token->data.tag.attributes[i].name.data_off, + pbuffer + token->data.tag.attributes[i].name.data.off, (int) token->data.tag.attributes[i].value.len, - pbuffer + token->data.tag.attributes[i].value.data_off); + pbuffer + token->data.tag.attributes[i].value.data.off); } break; case HUBBUB_TOKEN_END_TAG: printf("'%.*s' %s\n", (int) token->data.tag.name.len, - pbuffer + token->data.tag.name.data_off, + pbuffer + token->data.tag.name.data.off, (token->data.tag.n_attributes > 0) ? "attributes:" : ""); for (i = 0; i < token->data.tag.n_attributes; i++) { printf("\t'%.*s' = '%.*s'\n", (int) token->data.tag.attributes[i].name.len, - pbuffer + token->data.tag.attributes[i].name.data_off, + pbuffer + token->data.tag.attributes[i].name.data.off, (int) token->data.tag.attributes[i].value.len, - pbuffer + token->data.tag.attributes[i].value.data_off); + pbuffer + token->data.tag.attributes[i].value.data.off); } break; case HUBBUB_TOKEN_COMMENT: printf("'%.*s'\n", (int) token->data.comment.len, - pbuffer + token->data.comment.data_off); + pbuffer + token->data.comment.data.off); break; case HUBBUB_TOKEN_CHARACTER: printf("'%.*s'\n", (int) token->data.character.len, - pbuffer + token->data.character.data_off); + pbuffer + token->data.character.data.off); break; case HUBBUB_TOKEN_EOF: printf("\n"); diff --git a/test/tokeniser.c b/test/tokeniser.c index 271b986..32ecdbc 100644 --- a/test/tokeniser.c +++ b/test/tokeniser.c @@ -128,44 +128,44 @@ void token_handler(const hubbub_token *token, void *pw) case HUBBUB_TOKEN_DOCTYPE: printf("'%.*s' (%svalid)\n", (int) token->data.doctype.name.len, - pbuffer + token->data.doctype.name.data_off, + pbuffer + token->data.doctype.name.data.off, token->data.doctype.correct ? "" : "in"); break; case HUBBUB_TOKEN_START_TAG: printf("'%.*s' %s\n", (int) token->data.tag.name.len, - pbuffer + token->data.tag.name.data_off, + pbuffer + token->data.tag.name.data.off, (token->data.tag.n_attributes > 0) ? "attributes:" : ""); for (i = 0; i < token->data.tag.n_attributes; i++) { printf("\t'%.*s' = '%.*s'\n", (int) token->data.tag.attributes[i].name.len, - pbuffer + token->data.tag.attributes[i].name.data_off, + pbuffer + token->data.tag.attributes[i].name.data.off, (int) token->data.tag.attributes[i].value.len, - pbuffer + token->data.tag.attributes[i].value.data_off); + pbuffer + token->data.tag.attributes[i].value.data.off); } break; case HUBBUB_TOKEN_END_TAG: printf("'%.*s' %s\n", (int) token->data.tag.name.len, - pbuffer + token->data.tag.name.data_off, + pbuffer + token->data.tag.name.data.off, (token->data.tag.n_attributes > 0) ? "attributes:" : ""); for (i = 0; i < token->data.tag.n_attributes; i++) { printf("\t'%.*s' = '%.*s'\n", (int) token->data.tag.attributes[i].name.len, - pbuffer + token->data.tag.attributes[i].name.data_off, + pbuffer + token->data.tag.attributes[i].name.data.off, (int) token->data.tag.attributes[i].value.len, - pbuffer + token->data.tag.attributes[i].value.data_off); + pbuffer + token->data.tag.attributes[i].value.data.off); } break; case HUBBUB_TOKEN_COMMENT: printf("'%.*s'\n", (int) token->data.comment.len, - pbuffer + token->data.comment.data_off); + pbuffer + token->data.comment.data.off); break; case HUBBUB_TOKEN_CHARACTER: printf("'%.*s'\n", (int) token->data.character.len, - pbuffer + token->data.character.data_off); + pbuffer + token->data.character.data.off); break; case HUBBUB_TOKEN_EOF: printf("\n"); diff --git a/test/tokeniser2.c b/test/tokeniser2.c index 103a3d5..f72e0d7 100644 --- a/test/tokeniser2.c +++ b/test/tokeniser2.c @@ -280,7 +280,7 @@ void token_handler(const hubbub_token *token, void *pw) bool expvalid = json_object_get_boolean((struct json_object *) array_list_get_idx(items, 2)); char *gotname = (char *) (ctx->pbuffer + - token->data.doctype.name.data_off); + token->data.doctype.name.data.off); printf("'%.*s' (%svalid)\n", (int) token->data.doctype.name.len, @@ -302,7 +302,7 @@ void token_handler(const hubbub_token *token, void *pw) (struct json_object *) array_list_get_idx(items, 2))->head; char *tagname = (char *) (ctx->pbuffer + - token->data.tag.name.data_off); + token->data.tag.name.data.off); printf("'%.*s' %s\n", (int) token->data.tag.name.len, @@ -318,11 +318,11 @@ void token_handler(const hubbub_token *token, void *pw) char *expval = json_object_get_string( (struct json_object *) expattrs->v); char *gotname = (char *) (ctx->pbuffer + - token->data.tag.attributes[i].name.data_off); + token->data.tag.attributes[i].name.data.off); size_t namelen = token->data.tag.attributes[i].name.len; char *gotval = (char *) (ctx->pbuffer + - token->data.tag.attributes[i].value.data_off); + token->data.tag.attributes[i].value.data.off); size_t vallen = token->data.tag.attributes[i].value.len; @@ -347,7 +347,7 @@ void token_handler(const hubbub_token *token, void *pw) char *expname = json_object_get_string((struct json_object *) array_list_get_idx(items, 1)); char *tagname = (char *) (ctx->pbuffer + - token->data.tag.name.data_off); + token->data.tag.name.data.off); printf("'%.*s' %s\n", (int) token->data.tag.name.len, @@ -364,7 +364,7 @@ void token_handler(const hubbub_token *token, void *pw) char *expstr = json_object_get_string((struct json_object *) array_list_get_idx(items, 1)); char *gotstr = (char *) (ctx->pbuffer + - token->data.comment.data_off); + token->data.comment.data.off); printf("'%.*s'\n", (int) token->data.comment.len, gotstr); @@ -377,7 +377,7 @@ void token_handler(const hubbub_token *token, void *pw) char *expstr = json_object_get_string((struct json_object *) array_list_get_idx(items, 1)); char *gotstr = (char *) (ctx->pbuffer + - token->data.character.data_off); + token->data.character.data.off); size_t len = min(token->data.character.len, strlen(expstr + ctx->char_off)); @@ -392,7 +392,7 @@ void token_handler(const hubbub_token *token, void *pw) hubbub_token t; t.type = HUBBUB_TOKEN_CHARACTER; - t.data.character.data_off += len; + t.data.character.data.off += len; t.data.character.len -= len; ctx->char_off = 0; diff --git a/test/tree.c b/test/tree.c index 04ce026..f4e6c3c 100644 --- a/test/tree.c +++ b/test/tree.c @@ -11,7 +11,7 @@ #include "testutils.h" -#define NODE_REF_CHUNK 1024 +#define NODE_REF_CHUNK 8192 static uint16_t *node_ref; static uintptr_t node_ref_alloc; static uintptr_t node_counter; @@ -72,6 +72,22 @@ static void *myrealloc(void *ptr, size_t len, void *pw) return realloc(ptr, len); } +static const uint8_t *ptr_from_hubbub_string(const hubbub_string *string) +{ + const uint8_t *data; + + switch (string->type) { + case HUBBUB_STRING_OFF: + data = pbuffer + string->data.off; + break; + case HUBBUB_STRING_PTR: + data = string->data.ptr; + break; + } + + return data; +} + int main(int argc, char **argv) { hubbub_parser *parser; @@ -188,7 +204,7 @@ void buffer_handler(const uint8_t *buffer, size_t len, void *pw) int create_comment(void *ctx, const hubbub_string *data, void **result) { printf("Creating (%u) [comment '%.*s']\n", ++node_counter, - data->len, pbuffer + data->data_off); + data->len, ptr_from_hubbub_string(data)); GROW_REF node_ref[node_counter] = 0; @@ -208,7 +224,7 @@ int create_doctype(void *ctx, const hubbub_string *qname, UNUSED(system_id); printf("Creating (%u) [doctype '%.*s']\n", ++node_counter, - qname->len, pbuffer + qname->data_off); + qname->len, ptr_from_hubbub_string(qname)); GROW_REF node_ref[node_counter] = 0; @@ -223,7 +239,7 @@ int create_doctype(void *ctx, const hubbub_string *qname, int create_element(void *ctx, const hubbub_tag *tag, void **result) { printf("Creating (%u) [element '%.*s']\n", ++node_counter, - tag->name.len, pbuffer + tag->name.data_off); + tag->name.len, ptr_from_hubbub_string(&tag->name)); GROW_REF node_ref[node_counter] = 0; @@ -254,7 +270,7 @@ int create_element_verbatim(void *ctx, const uint8_t *name, size_t len, int create_text(void *ctx, const hubbub_string *data, void **result) { printf("Creating (%u) [text '%.*s']\n", ++node_counter, - data->len, pbuffer + data->data_off); + data->len, ptr_from_hubbub_string(data)); GROW_REF node_ref[node_counter] = 0; -- cgit v1.2.3