summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/hubbub/types.h11
-rw-r--r--src/tokeniser/tokeniser.c113
-rw-r--r--src/treebuilder/treebuilder.c22
-rw-r--r--test/parser-utf16.c18
-rw-r--r--test/parser.c18
-rw-r--r--test/tokeniser.c18
-rw-r--r--test/tokeniser2.c16
-rw-r--r--test/tree.c26
8 files changed, 142 insertions, 100 deletions
diff --git a/include/hubbub/types.h b/include/hubbub/types.h
index 922bdbb..e58a88b 100644
--- a/include/hubbub/types.h
+++ b/include/hubbub/types.h
@@ -57,7 +57,16 @@ typedef enum hubbub_token_type {
* Tokeniser string type
*/
typedef struct hubbub_string {
- uint32_t data_off; /**< Byte offset of string start */
+ enum {
+ HUBBUB_STRING_OFF,
+ HUBBUB_STRING_PTR
+ } type;
+
+ union {
+ const uint8_t *ptr; /**< Pointer to data */
+ uint32_t off; /**< Byte offset of string start */
+ } data;
+
size_t len; /**< Byte length of string */
} hubbub_string;
diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index f8b6bb3..3d69797 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -236,6 +236,12 @@ hubbub_tokeniser *hubbub_tokeniser_create(hubbub_inputstream *input,
}
memset(&tok->context, 0, sizeof(hubbub_tokeniser_context));
+ tok->context.current_tag.name.type = HUBBUB_STRING_OFF;
+ tok->context.current_comment.type = HUBBUB_STRING_OFF;
+ tok->context.current_doctype.name.type = HUBBUB_STRING_OFF;
+ tok->context.current_chars.type = HUBBUB_STRING_OFF;
+ tok->context.close_tag_match.tag.type = HUBBUB_STRING_OFF;
+ tok->context.match_entity.str.type = HUBBUB_STRING_OFF;
return tok;
}
@@ -434,7 +440,7 @@ bool hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser)
uint32_t c;
/* Clear current characters */
- tokeniser->context.current_chars.data_off = 0;
+ tokeniser->context.current_chars.data.off = 0;
tokeniser->context.current_chars.len = 0;
while ((c = hubbub_inputstream_peek(tokeniser->input)) !=
@@ -462,7 +468,7 @@ bool hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser)
}
/* Buffer '<' */
- tokeniser->context.current_chars.data_off =
+ tokeniser->context.current_chars.data.off =
hubbub_inputstream_cur_pos(tokeniser->input,
&tokeniser->context.current_chars.len);
@@ -478,7 +484,7 @@ bool hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser)
&len);
if (tokeniser->context.current_chars.len == 0) {
- tokeniser->context.current_chars.data_off =
+ tokeniser->context.current_chars.data.off =
pos;
}
tokeniser->context.current_chars.len++;
@@ -495,7 +501,7 @@ bool hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser)
hubbub_tokeniser_emit_token(tokeniser, &token);
- tokeniser->context.current_chars.data_off = 0;
+ tokeniser->context.current_chars.data.off = 0;
tokeniser->context.current_chars.len = 0;
}
@@ -524,7 +530,8 @@ bool hubbub_tokeniser_handle_entity_data(hubbub_tokeniser *tokeniser)
/* Emit character */
token.type = HUBBUB_TOKEN_CHARACTER;
- token.data.character.data_off =
+ token.data.character.type = HUBBUB_STRING_OFF;
+ token.data.character.data.off =
hubbub_inputstream_cur_pos(tokeniser->input,
&token.data.character.len);
@@ -601,7 +608,7 @@ bool hubbub_tokeniser_handle_tag_open(hubbub_tokeniser *tokeniser)
tokeniser->context.current_tag_type =
HUBBUB_TOKEN_START_TAG;
- ctag->name.data_off =
+ ctag->name.data.off =
hubbub_inputstream_cur_pos(tokeniser->input,
&ctag->name.len);
ctag->n_attributes = 0;
@@ -613,7 +620,7 @@ bool hubbub_tokeniser_handle_tag_open(hubbub_tokeniser *tokeniser)
tokeniser->context.current_tag_type =
HUBBUB_TOKEN_START_TAG;
- ctag->name.data_off =
+ ctag->name.data.off =
hubbub_inputstream_cur_pos(tokeniser->input,
&ctag->name.len);
ctag->n_attributes = 0;
@@ -644,7 +651,7 @@ bool hubbub_tokeniser_handle_tag_open(hubbub_tokeniser *tokeniser)
&len);
tokeniser->context.current_chars.len += len;
- tokeniser->context.current_comment.data_off = pos;
+ tokeniser->context.current_comment.data.off = pos;
tokeniser->context.current_comment.len = len;
tokeniser->state =
HUBBUB_TOKENISER_STATE_BOGUS_COMMENT;
@@ -688,7 +695,7 @@ bool hubbub_tokeniser_handle_close_tag_open(hubbub_tokeniser *tokeniser)
tokeniser->context.current_tag_type =
HUBBUB_TOKEN_END_TAG;
- ctag->name.data_off = pos;
+ ctag->name.data.off = pos;
ctag->name.len = len;
ctag->n_attributes = 0;
@@ -700,7 +707,7 @@ bool hubbub_tokeniser_handle_close_tag_open(hubbub_tokeniser *tokeniser)
tokeniser->context.current_tag_type =
HUBBUB_TOKEN_END_TAG;
- ctag->name.data_off = pos;
+ ctag->name.data.off = pos;
ctag->name.len = len;
ctag->n_attributes = 0;
@@ -724,7 +731,7 @@ bool hubbub_tokeniser_handle_close_tag_open(hubbub_tokeniser *tokeniser)
pos = hubbub_inputstream_cur_pos(tokeniser->input,
&len);
- tokeniser->context.current_comment.data_off = pos;
+ tokeniser->context.current_comment.data.off = pos;
tokeniser->context.current_comment.len = len;
tokeniser->state =
@@ -756,7 +763,7 @@ bool hubbub_tokeniser_handle_close_tag_match(hubbub_tokeniser *tokeniser)
off = hubbub_inputstream_cur_pos(tokeniser->input, &len);
if (ctx->close_tag_match.tag.len == 0) {
- ctx->close_tag_match.tag.data_off = off;
+ ctx->close_tag_match.tag.data.off = off;
ctx->close_tag_match.tag.len = len;
} else {
ctx->close_tag_match.tag.len += len;
@@ -768,8 +775,8 @@ bool hubbub_tokeniser_handle_close_tag_match(hubbub_tokeniser *tokeniser)
(ctx->close_tag_match.tag.len == ctag->name.len &&
hubbub_inputstream_compare_range_ci(
tokeniser->input,
- ctag->name.data_off,
- ctx->close_tag_match.tag.data_off,
+ ctag->name.data.off,
+ ctx->close_tag_match.tag.data.off,
ctag->name.len) != 0)) {
hubbub_token token;
@@ -792,8 +799,8 @@ bool hubbub_tokeniser_handle_close_tag_match(hubbub_tokeniser *tokeniser)
} else if (ctx->close_tag_match.tag.len == ctag->name.len &&
hubbub_inputstream_compare_range_ci(
tokeniser->input,
- ctag->name.data_off,
- ctx->close_tag_match.tag.data_off,
+ ctag->name.data.off,
+ ctx->close_tag_match.tag.data.off,
ctag->name.len) == 0) {
/* Matched => stop searching */
break;
@@ -968,9 +975,11 @@ bool hubbub_tokeniser_handle_before_attribute_name(
ctag->attributes = attr;
- attr[ctag->n_attributes].name.data_off = pos;
+ attr[ctag->n_attributes].name.type = HUBBUB_STRING_OFF;
+ attr[ctag->n_attributes].name.data.off = pos;
attr[ctag->n_attributes].name.len = len;
- attr[ctag->n_attributes].value.data_off = 0;
+ attr[ctag->n_attributes].value.type = HUBBUB_STRING_OFF;
+ attr[ctag->n_attributes].value.data.off = 0;
attr[ctag->n_attributes].value.len = 0;
ctag->n_attributes++;
@@ -1008,9 +1017,11 @@ bool hubbub_tokeniser_handle_before_attribute_name(
ctag->attributes = attr;
- attr[ctag->n_attributes].name.data_off = pos;
+ attr[ctag->n_attributes].name.type = HUBBUB_STRING_OFF;
+ attr[ctag->n_attributes].name.data.off = pos;
attr[ctag->n_attributes].name.len = len;
- attr[ctag->n_attributes].value.data_off = 0;
+ attr[ctag->n_attributes].value.type = HUBBUB_STRING_OFF;
+ attr[ctag->n_attributes].value.data.off = 0;
attr[ctag->n_attributes].value.len = 0;
ctag->n_attributes++;
@@ -1135,9 +1146,11 @@ bool hubbub_tokeniser_handle_after_attribute_name(
ctag->attributes = attr;
- attr[ctag->n_attributes].name.data_off = pos;
+ attr[ctag->n_attributes].name.type = HUBBUB_STRING_OFF;
+ attr[ctag->n_attributes].name.data.off = pos;
attr[ctag->n_attributes].name.len = len;
- attr[ctag->n_attributes].value.data_off = 0;
+ attr[ctag->n_attributes].value.type = HUBBUB_STRING_OFF;
+ attr[ctag->n_attributes].value.data.off = 0;
attr[ctag->n_attributes].value.len = 0;
ctag->n_attributes++;
@@ -1179,9 +1192,11 @@ bool hubbub_tokeniser_handle_after_attribute_name(
ctag->attributes = attr;
- attr[ctag->n_attributes].name.data_off = pos;
+ attr[ctag->n_attributes].name.type = HUBBUB_STRING_OFF;
+ attr[ctag->n_attributes].name.data.off = pos;
attr[ctag->n_attributes].name.len = len;
- attr[ctag->n_attributes].value.data_off = 0;
+ attr[ctag->n_attributes].value.type = HUBBUB_STRING_OFF;
+ attr[ctag->n_attributes].value.data.off = 0;
attr[ctag->n_attributes].value.len = 0;
ctag->n_attributes++;
@@ -1240,7 +1255,7 @@ bool hubbub_tokeniser_handle_before_attribute_value(
pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
- ctag->attributes[ctag->n_attributes - 1].value.data_off = pos;
+ ctag->attributes[ctag->n_attributes - 1].value.data.off = pos;
ctag->attributes[ctag->n_attributes - 1].value.len = len;
tokeniser->state = HUBBUB_TOKENISER_STATE_ATTRIBUTE_VALUE_UQ;
@@ -1285,7 +1300,7 @@ bool hubbub_tokeniser_handle_attribute_value_dq(hubbub_tokeniser *tokeniser)
pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
if (ctag->attributes[ctag->n_attributes - 1].value.len == 0) {
- ctag->attributes[ctag->n_attributes - 1].value.data_off =
+ ctag->attributes[ctag->n_attributes - 1].value.data.off =
pos;
}
@@ -1331,7 +1346,7 @@ bool hubbub_tokeniser_handle_attribute_value_sq(hubbub_tokeniser *tokeniser)
pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
if (ctag->attributes[ctag->n_attributes - 1].value.len == 0) {
- ctag->attributes[ctag->n_attributes - 1].value.data_off =
+ ctag->attributes[ctag->n_attributes - 1].value.data.off =
pos;
}
@@ -1388,7 +1403,7 @@ bool hubbub_tokeniser_handle_attribute_value_uq(hubbub_tokeniser *tokeniser)
pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
if (ctag->attributes[ctag->n_attributes - 1].value.len == 0) {
- ctag->attributes[ctag->n_attributes - 1].value.data_off =
+ ctag->attributes[ctag->n_attributes - 1].value.data.off =
pos;
}
@@ -1421,7 +1436,7 @@ bool hubbub_tokeniser_handle_entity_in_attribute_value(
pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
if (ctag->attributes[ctag->n_attributes - 1].value.len == 0) {
- ctag->attributes[ctag->n_attributes - 1].value.data_off =
+ ctag->attributes[ctag->n_attributes - 1].value.data.off =
pos;
}
@@ -1458,7 +1473,7 @@ bool hubbub_tokeniser_handle_bogus_comment(hubbub_tokeniser *tokeniser)
pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
if (tokeniser->context.current_comment.len == 0)
- tokeniser->context.current_comment.data_off = pos;
+ tokeniser->context.current_comment.data.off = pos;
tokeniser->context.current_comment.len += len;
hubbub_inputstream_advance(tokeniser->input);
@@ -1495,7 +1510,7 @@ bool hubbub_tokeniser_handle_markup_declaration_open(
tokeniser->state = HUBBUB_TOKENISER_STATE_MATCH_DOCTYPE;
hubbub_inputstream_advance(tokeniser->input);
} else {
- tokeniser->context.current_comment.data_off = 0;
+ tokeniser->context.current_comment.data.off = 0;
tokeniser->context.current_comment.len = 0;
tokeniser->state = HUBBUB_TOKENISER_STATE_BOGUS_COMMENT;
@@ -1511,7 +1526,7 @@ bool hubbub_tokeniser_handle_comment_start(hubbub_tokeniser *tokeniser)
if (c == HUBBUB_INPUTSTREAM_OOD)
return false;
- tokeniser->context.current_comment.data_off = 0;
+ tokeniser->context.current_comment.data.off = 0;
tokeniser->context.current_comment.len = 0;
@@ -1553,7 +1568,7 @@ bool hubbub_tokeniser_handle_comment(hubbub_tokeniser *tokeniser)
pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
if (tokeniser->context.current_comment.len == 0)
- tokeniser->context.current_comment.data_off = pos;
+ tokeniser->context.current_comment.data.off = pos;
tokeniser->context.current_comment.len += len;
hubbub_inputstream_advance(tokeniser->input);
@@ -1589,11 +1604,11 @@ bool hubbub_tokeniser_handle_comment_dash(hubbub_tokeniser *tokeniser)
pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
if (tokeniser->context.current_comment.len == 0) {
- tokeniser->context.current_comment.data_off = pos;
+ tokeniser->context.current_comment.data.off = pos;
} else {
/* Need to do this to get length of '-' */
len += pos -
- tokeniser->context.current_comment.data_off;
+ tokeniser->context.current_comment.data.off;
}
tokeniser->context.current_comment.len = len;
@@ -1631,12 +1646,12 @@ bool hubbub_tokeniser_handle_comment_end(hubbub_tokeniser *tokeniser)
pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
if (tokeniser->context.current_comment.len == 0) {
- tokeniser->context.current_comment.data_off = pos;
+ tokeniser->context.current_comment.data.off = pos;
tokeniser->context.current_comment.len = len;
} else {
/* Need to do this to get length of '-' */
len = pos -
- tokeniser->context.current_comment.data_off;
+ tokeniser->context.current_comment.data.off;
}
tokeniser->context.current_comment.len = len;
@@ -1660,11 +1675,11 @@ bool hubbub_tokeniser_handle_comment_end(hubbub_tokeniser *tokeniser)
pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
if (tokeniser->context.current_comment.len == 0) {
- tokeniser->context.current_comment.data_off = pos;
+ tokeniser->context.current_comment.data.off = pos;
} else {
/* Need to do this to get length of '--' */
len += pos -
- tokeniser->context.current_comment.data_off;
+ tokeniser->context.current_comment.data.off;
}
tokeniser->context.current_comment.len = len;
@@ -1724,7 +1739,7 @@ bool hubbub_tokeniser_handle_match_doctype(hubbub_tokeniser *tokeniser)
case 1: hubbub_inputstream_push_back(tokeniser->input, 'D');
}
- tokeniser->context.current_comment.data_off = 0;
+ tokeniser->context.current_comment.data.off = 0;
tokeniser->context.current_comment.len = 0;
tokeniser->state = HUBBUB_TOKENISER_STATE_BOGUS_COMMENT;
@@ -1768,7 +1783,7 @@ bool hubbub_tokeniser_handle_before_doctype_name(
pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
- cdoc->name.data_off = pos;
+ cdoc->name.data.off = pos;
cdoc->name.len = len;
cdoc->correct = false;
@@ -1802,7 +1817,7 @@ bool hubbub_tokeniser_handle_before_doctype_name(
pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
- cdoc->name.data_off = pos;
+ cdoc->name.data.off = pos;
cdoc->name.len = len;
cdoc->correct = false;
@@ -1834,7 +1849,7 @@ bool hubbub_tokeniser_handle_doctype_name(hubbub_tokeniser *tokeniser)
token.data.doctype.correct =
(hubbub_inputstream_compare_range_ascii(
tokeniser->input,
- token.data.doctype.name.data_off,
+ token.data.doctype.name.data.off,
token.data.doctype.name.len,
"HTML", SLEN("HTML")) == 0);
@@ -1896,7 +1911,7 @@ bool hubbub_tokeniser_handle_after_doctype_name(hubbub_tokeniser *tokeniser)
token.data.doctype.correct =
(hubbub_inputstream_compare_range_ascii(
tokeniser->input,
- token.data.doctype.name.data_off,
+ token.data.doctype.name.data.off,
token.data.doctype.name.len,
"HTML", SLEN("HTML")) == 0);
@@ -1969,7 +1984,7 @@ bool hubbub_tokeniser_consume_entity(hubbub_tokeniser *tokeniser)
if (tokeniser->context.match_entity.done_setup == false) {
pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
- tokeniser->context.match_entity.str.data_off = pos;
+ tokeniser->context.match_entity.str.data.off = pos;
tokeniser->context.match_entity.str.len = len;
tokeniser->context.match_entity.base = 0;
tokeniser->context.match_entity.codepoint = 0;
@@ -2095,7 +2110,7 @@ bool hubbub_tokeniser_handle_numbered_entity(hubbub_tokeniser *tokeniser)
/* And replace the matched range with it */
error = hubbub_inputstream_replace_range(tokeniser->input,
- ctx->match_entity.str.data_off,
+ ctx->match_entity.str.data.off,
ctx->match_entity.str.len,
ctx->match_entity.codepoint);
if (error != HUBBUB_OK) {
@@ -2177,7 +2192,7 @@ bool hubbub_tokeniser_handle_named_entity(hubbub_tokeniser *tokeniser)
/* Now, replace range, if we found a named entity */
if (ctx->match_entity.codepoint != 0) {
error = hubbub_inputstream_replace_range(tokeniser->input,
- ctx->match_entity.str.data_off,
+ ctx->match_entity.str.data.off,
ctx->match_entity.prev_len,
ctx->match_entity.codepoint);
if (error != HUBBUB_OK) {
@@ -2249,8 +2264,8 @@ void hubbub_tokeniser_emit_token(hubbub_tokeniser *tokeniser,
attrs[j].name.len ||
hubbub_inputstream_compare_range_cs(
tokeniser->input,
- attrs[i].name.data_off,
- attrs[j].name.data_off,
+ attrs[i].name.data.off,
+ attrs[j].name.data.off,
attrs[i].name.len) != 0) {
/* Attributes don't match */
continue;
diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c
index 3d2b295..01e31e4 100644
--- a/src/treebuilder/treebuilder.c
+++ b/src/treebuilder/treebuilder.c
@@ -241,6 +241,8 @@ hubbub_treebuilder *hubbub_treebuilder_create(hubbub_tokeniser *tokeniser,
assert(HTML != 0);
tb->context.element_stack[0].type = 0;
+ tb->context.collect.string.type = HUBBUB_STRING_OFF;
+
tb->buffer_handler = NULL;
tb->buffer_pw = NULL;
@@ -1070,8 +1072,8 @@ bool handle_generic_rcdata(hubbub_treebuilder *treebuilder,
switch (token->type) {
case HUBBUB_TOKEN_CHARACTER:
if (treebuilder->context.collect.string.len == 0) {
- treebuilder->context.collect.string.data_off =
- token->data.character.data_off;
+ treebuilder->context.collect.string.data.off =
+ token->data.character.data.off;
}
treebuilder->context.collect.string.len +=
token->data.character.len;
@@ -1158,8 +1160,8 @@ bool handle_script_collect_characters(hubbub_treebuilder *treebuilder,
switch (token->type) {
case HUBBUB_TOKEN_CHARACTER:
if (treebuilder->context.collect.string.len == 0) {
- treebuilder->context.collect.string.data_off =
- token->data.character.data_off;
+ treebuilder->context.collect.string.data.off =
+ token->data.character.data.off;
}
treebuilder->context.collect.string.len +=
token->data.character.len;
@@ -1265,7 +1267,7 @@ bool process_characters_expect_whitespace(hubbub_treebuilder *treebuilder,
const hubbub_token *token, bool insert_into_current_node)
{
const uint8_t *data = treebuilder->input_buffer +
- token->data.character.data_off;
+ token->data.character.data.off;
size_t len = token->data.character.len;
size_t c;
@@ -1284,7 +1286,7 @@ bool process_characters_expect_whitespace(hubbub_treebuilder *treebuilder,
int success;
void *text, *appended;
- temp.data_off = token->data.character.data_off;
+ temp.data.off = token->data.character.data.off;
temp.len = len - c;
/** \todo Append to pre-existing text child, iff
@@ -1318,7 +1320,7 @@ bool process_characters_expect_whitespace(hubbub_treebuilder *treebuilder,
}
/* Update token data to strip leading whitespace */
- ((hubbub_token *) token)->data.character.data_off +=
+ ((hubbub_token *) token)->data.character.data.off +=
len - c;
((hubbub_token *) token)->data.character.len -= c;
@@ -1409,7 +1411,7 @@ void parse_generic_rcdata(hubbub_treebuilder *treebuilder,
treebuilder->context.collect.mode = treebuilder->context.mode;
treebuilder->context.collect.type = type;
treebuilder->context.collect.node = node;
- treebuilder->context.collect.string.data_off = 0;
+ treebuilder->context.collect.string.data.off = 0;
treebuilder->context.collect.string.len = 0;
treebuilder->tree_handler->unref_node(
@@ -1472,7 +1474,7 @@ void process_script_in_head(hubbub_treebuilder *treebuilder,
treebuilder->context.collect.mode = treebuilder->context.mode;
treebuilder->context.collect.node = script;
treebuilder->context.collect.type = SCRIPT;
- treebuilder->context.collect.string.data_off = 0;
+ treebuilder->context.collect.string.data.off = 0;
treebuilder->context.collect.string.len = 0;
treebuilder->context.mode = SCRIPT_COLLECT_CHARACTERS;
@@ -1846,7 +1848,7 @@ void reset_insertion_mode(hubbub_treebuilder *treebuilder)
element_type element_type_from_name(hubbub_treebuilder *treebuilder,
const hubbub_string *tag_name)
{
- const uint8_t *name = treebuilder->input_buffer + tag_name->data_off;
+ const uint8_t *name = treebuilder->input_buffer + tag_name->data.off;
return element_type_from_verbatim_name(name, tag_name->len);
}
diff --git a/test/parser-utf16.c b/test/parser-utf16.c
index 9056dd1..86024a6 100644
--- a/test/parser-utf16.c
+++ b/test/parser-utf16.c
@@ -129,44 +129,44 @@ void token_handler(const hubbub_token *token, void *pw)
case HUBBUB_TOKEN_DOCTYPE:
printf("'%.*s' (%svalid)\n",
(int) token->data.doctype.name.len,
- pbuffer + token->data.doctype.name.data_off,
+ pbuffer + token->data.doctype.name.data.off,
token->data.doctype.correct ? "" : "in");
break;
case HUBBUB_TOKEN_START_TAG:
printf("'%.*s' %s\n",
(int) token->data.tag.name.len,
- pbuffer + token->data.tag.name.data_off,
+ pbuffer + token->data.tag.name.data.off,
(token->data.tag.n_attributes > 0) ?
"attributes:" : "");
for (i = 0; i < token->data.tag.n_attributes; i++) {
printf("\t'%.*s' = '%.*s'\n",
(int) token->data.tag.attributes[i].name.len,
- pbuffer + token->data.tag.attributes[i].name.data_off,
+ pbuffer + token->data.tag.attributes[i].name.data.off,
(int) token->data.tag.attributes[i].value.len,
- pbuffer + token->data.tag.attributes[i].value.data_off);
+ pbuffer + token->data.tag.attributes[i].value.data.off);
}
break;
case HUBBUB_TOKEN_END_TAG:
printf("'%.*s' %s\n",
(int) token->data.tag.name.len,
- pbuffer + token->data.tag.name.data_off,
+ pbuffer + token->data.tag.name.data.off,
(token->data.tag.n_attributes > 0) ?
"attributes:" : "");
for (i = 0; i < token->data.tag.n_attributes; i++) {
printf("\t'%.*s' = '%.*s'\n",
(int) token->data.tag.attributes[i].name.len,
- pbuffer + token->data.tag.attributes[i].name.data_off,
+ pbuffer + token->data.tag.attributes[i].name.data.off,
(int) token->data.tag.attributes[i].value.len,
- pbuffer + token->data.tag.attributes[i].value.data_off);
+ pbuffer + token->data.tag.attributes[i].value.data.off);
}
break;
case HUBBUB_TOKEN_COMMENT:
printf("'%.*s'\n", (int) token->data.comment.len,
- pbuffer + token->data.comment.data_off);
+ pbuffer + token->data.comment.data.off);
break;
case HUBBUB_TOKEN_CHARACTER:
printf("'%.*s'\n", (int) token->data.character.len,
- pbuffer + token->data.character.data_off);
+ pbuffer + token->data.character.data.off);
break;
case HUBBUB_TOKEN_EOF:
printf("\n");
diff --git a/test/parser.c b/test/parser.c
index fe2659d..fa2afb8 100644
--- a/test/parser.c
+++ b/test/parser.c
@@ -129,44 +129,44 @@ void token_handler(const hubbub_token *token, void *pw)
case HUBBUB_TOKEN_DOCTYPE:
printf("'%.*s' (%svalid)\n",
(int) token->data.doctype.name.len,
- pbuffer + token->data.doctype.name.data_off,
+ pbuffer + token->data.doctype.name.data.off,
token->data.doctype.correct ? "" : "in");
break;
case HUBBUB_TOKEN_START_TAG:
printf("'%.*s' %s\n",
(int) token->data.tag.name.len,
- pbuffer + token->data.tag.name.data_off,
+ pbuffer + token->data.tag.name.data.off,
(token->data.tag.n_attributes > 0) ?
"attributes:" : "");
for (i = 0; i < token->data.tag.n_attributes; i++) {
printf("\t'%.*s' = '%.*s'\n",
(int) token->data.tag.attributes[i].name.len,
- pbuffer + token->data.tag.attributes[i].name.data_off,
+ pbuffer + token->data.tag.attributes[i].name.data.off,
(int) token->data.tag.attributes[i].value.len,
- pbuffer + token->data.tag.attributes[i].value.data_off);
+ pbuffer + token->data.tag.attributes[i].value.data.off);
}
break;
case HUBBUB_TOKEN_END_TAG:
printf("'%.*s' %s\n",
(int) token->data.tag.name.len,
- pbuffer + token->data.tag.name.data_off,
+ pbuffer + token->data.tag.name.data.off,
(token->data.tag.n_attributes > 0) ?
"attributes:" : "");
for (i = 0; i < token->data.tag.n_attributes; i++) {
printf("\t'%.*s' = '%.*s'\n",
(int) token->data.tag.attributes[i].name.len,
- pbuffer + token->data.tag.attributes[i].name.data_off,
+ pbuffer + token->data.tag.attributes[i].name.data.off,
(int) token->data.tag.attributes[i].value.len,
- pbuffer + token->data.tag.attributes[i].value.data_off);
+ pbuffer + token->data.tag.attributes[i].value.data.off);
}
break;
case HUBBUB_TOKEN_COMMENT:
printf("'%.*s'\n", (int) token->data.comment.len,
- pbuffer + token->data.comment.data_off);
+ pbuffer + token->data.comment.data.off);
break;
case HUBBUB_TOKEN_CHARACTER:
printf("'%.*s'\n", (int) token->data.character.len,
- pbuffer + token->data.character.data_off);
+ pbuffer + token->data.character.data.off);
break;
case HUBBUB_TOKEN_EOF:
printf("\n");
diff --git a/test/tokeniser.c b/test/tokeniser.c
index 271b986..32ecdbc 100644
--- a/test/tokeniser.c
+++ b/test/tokeniser.c
@@ -128,44 +128,44 @@ void token_handler(const hubbub_token *token, void *pw)
case HUBBUB_TOKEN_DOCTYPE:
printf("'%.*s' (%svalid)\n",
(int) token->data.doctype.name.len,
- pbuffer + token->data.doctype.name.data_off,
+ pbuffer + token->data.doctype.name.data.off,
token->data.doctype.correct ? "" : "in");
break;
case HUBBUB_TOKEN_START_TAG:
printf("'%.*s' %s\n",
(int) token->data.tag.name.len,
- pbuffer + token->data.tag.name.data_off,
+ pbuffer + token->data.tag.name.data.off,
(token->data.tag.n_attributes > 0) ?
"attributes:" : "");
for (i = 0; i < token->data.tag.n_attributes; i++) {
printf("\t'%.*s' = '%.*s'\n",
(int) token->data.tag.attributes[i].name.len,
- pbuffer + token->data.tag.attributes[i].name.data_off,
+ pbuffer + token->data.tag.attributes[i].name.data.off,
(int) token->data.tag.attributes[i].value.len,
- pbuffer + token->data.tag.attributes[i].value.data_off);
+ pbuffer + token->data.tag.attributes[i].value.data.off);
}
break;
case HUBBUB_TOKEN_END_TAG:
printf("'%.*s' %s\n",
(int) token->data.tag.name.len,
- pbuffer + token->data.tag.name.data_off,
+ pbuffer + token->data.tag.name.data.off,
(token->data.tag.n_attributes > 0) ?
"attributes:" : "");
for (i = 0; i < token->data.tag.n_attributes; i++) {
printf("\t'%.*s' = '%.*s'\n",
(int) token->data.tag.attributes[i].name.len,
- pbuffer + token->data.tag.attributes[i].name.data_off,
+ pbuffer + token->data.tag.attributes[i].name.data.off,
(int) token->data.tag.attributes[i].value.len,
- pbuffer + token->data.tag.attributes[i].value.data_off);
+ pbuffer + token->data.tag.attributes[i].value.data.off);
}
break;
case HUBBUB_TOKEN_COMMENT:
printf("'%.*s'\n", (int) token->data.comment.len,
- pbuffer + token->data.comment.data_off);
+ pbuffer + token->data.comment.data.off);
break;
case HUBBUB_TOKEN_CHARACTER:
printf("'%.*s'\n", (int) token->data.character.len,
- pbuffer + token->data.character.data_off);
+ pbuffer + token->data.character.data.off);
break;
case HUBBUB_TOKEN_EOF:
printf("\n");
diff --git a/test/tokeniser2.c b/test/tokeniser2.c
index 103a3d5..f72e0d7 100644
--- a/test/tokeniser2.c
+++ b/test/tokeniser2.c
@@ -280,7 +280,7 @@ void token_handler(const hubbub_token *token, void *pw)
bool expvalid = json_object_get_boolean((struct json_object *)
array_list_get_idx(items, 2));
char *gotname = (char *) (ctx->pbuffer +
- token->data.doctype.name.data_off);
+ token->data.doctype.name.data.off);
printf("'%.*s' (%svalid)\n",
(int) token->data.doctype.name.len,
@@ -302,7 +302,7 @@ void token_handler(const hubbub_token *token, void *pw)
(struct json_object *)
array_list_get_idx(items, 2))->head;
char *tagname = (char *) (ctx->pbuffer +
- token->data.tag.name.data_off);
+ token->data.tag.name.data.off);
printf("'%.*s' %s\n",
(int) token->data.tag.name.len,
@@ -318,11 +318,11 @@ void token_handler(const hubbub_token *token, void *pw)
char *expval = json_object_get_string(
(struct json_object *) expattrs->v);
char *gotname = (char *) (ctx->pbuffer +
- token->data.tag.attributes[i].name.data_off);
+ token->data.tag.attributes[i].name.data.off);
size_t namelen =
token->data.tag.attributes[i].name.len;
char *gotval = (char *) (ctx->pbuffer +
- token->data.tag.attributes[i].value.data_off);
+ token->data.tag.attributes[i].value.data.off);
size_t vallen =
token->data.tag.attributes[i].value.len;
@@ -347,7 +347,7 @@ void token_handler(const hubbub_token *token, void *pw)
char *expname = json_object_get_string((struct json_object *)
array_list_get_idx(items, 1));
char *tagname = (char *) (ctx->pbuffer +
- token->data.tag.name.data_off);
+ token->data.tag.name.data.off);
printf("'%.*s' %s\n",
(int) token->data.tag.name.len,
@@ -364,7 +364,7 @@ void token_handler(const hubbub_token *token, void *pw)
char *expstr = json_object_get_string((struct json_object *)
array_list_get_idx(items, 1));
char *gotstr = (char *) (ctx->pbuffer +
- token->data.comment.data_off);
+ token->data.comment.data.off);
printf("'%.*s'\n", (int) token->data.comment.len, gotstr);
@@ -377,7 +377,7 @@ void token_handler(const hubbub_token *token, void *pw)
char *expstr = json_object_get_string((struct json_object *)
array_list_get_idx(items, 1));
char *gotstr = (char *) (ctx->pbuffer +
- token->data.character.data_off);
+ token->data.character.data.off);
size_t len = min(token->data.character.len,
strlen(expstr + ctx->char_off));
@@ -392,7 +392,7 @@ void token_handler(const hubbub_token *token, void *pw)
hubbub_token t;
t.type = HUBBUB_TOKEN_CHARACTER;
- t.data.character.data_off += len;
+ t.data.character.data.off += len;
t.data.character.len -= len;
ctx->char_off = 0;
diff --git a/test/tree.c b/test/tree.c
index 04ce026..f4e6c3c 100644
--- a/test/tree.c
+++ b/test/tree.c
@@ -11,7 +11,7 @@
#include "testutils.h"
-#define NODE_REF_CHUNK 1024
+#define NODE_REF_CHUNK 8192
static uint16_t *node_ref;
static uintptr_t node_ref_alloc;
static uintptr_t node_counter;
@@ -72,6 +72,22 @@ static void *myrealloc(void *ptr, size_t len, void *pw)
return realloc(ptr, len);
}
+static const uint8_t *ptr_from_hubbub_string(const hubbub_string *string)
+{
+ const uint8_t *data;
+
+ switch (string->type) {
+ case HUBBUB_STRING_OFF:
+ data = pbuffer + string->data.off;
+ break;
+ case HUBBUB_STRING_PTR:
+ data = string->data.ptr;
+ break;
+ }
+
+ return data;
+}
+
int main(int argc, char **argv)
{
hubbub_parser *parser;
@@ -188,7 +204,7 @@ void buffer_handler(const uint8_t *buffer, size_t len, void *pw)
int create_comment(void *ctx, const hubbub_string *data, void **result)
{
printf("Creating (%u) [comment '%.*s']\n", ++node_counter,
- data->len, pbuffer + data->data_off);
+ data->len, ptr_from_hubbub_string(data));
GROW_REF
node_ref[node_counter] = 0;
@@ -208,7 +224,7 @@ int create_doctype(void *ctx, const hubbub_string *qname,
UNUSED(system_id);
printf("Creating (%u) [doctype '%.*s']\n", ++node_counter,
- qname->len, pbuffer + qname->data_off);
+ qname->len, ptr_from_hubbub_string(qname));
GROW_REF
node_ref[node_counter] = 0;
@@ -223,7 +239,7 @@ int create_doctype(void *ctx, const hubbub_string *qname,
int create_element(void *ctx, const hubbub_tag *tag, void **result)
{
printf("Creating (%u) [element '%.*s']\n", ++node_counter,
- tag->name.len, pbuffer + tag->name.data_off);
+ tag->name.len, ptr_from_hubbub_string(&tag->name));
GROW_REF
node_ref[node_counter] = 0;
@@ -254,7 +270,7 @@ int create_element_verbatim(void *ctx, const uint8_t *name, size_t len,
int create_text(void *ctx, const hubbub_string *data, void **result)
{
printf("Creating (%u) [text '%.*s']\n", ++node_counter,
- data->len, pbuffer + data->data_off);
+ data->len, ptr_from_hubbub_string(data));
GROW_REF
node_ref[node_counter] = 0;