summaryrefslogtreecommitdiff
path: root/src/tokeniser
diff options
context:
space:
mode:
authorAndrew Sidwell <andy@entai.co.uk>2008-08-13 15:27:35 +0000
committerAndrew Sidwell <andy@entai.co.uk>2008-08-13 15:27:35 +0000
commit523dc4f249106cfcd8547ad0743b76bb1a4d68cf (patch)
treeee8ea239f850325811786a405f689a69fad52216 /src/tokeniser
parent7bd2aa99c18825ed092f2f191d0d7363c416a26b (diff)
downloadlibhubbub-523dc4f249106cfcd8547ad0743b76bb1a4d68cf.tar.gz
libhubbub-523dc4f249106cfcd8547ad0743b76bb1a4d68cf.tar.bz2
Fix tokeniser so make test passes, with possible perf hit.
svn path=/trunk/hubbub/; revision=5093
Diffstat (limited to 'src/tokeniser')
-rw-r--r--src/tokeniser/tokeniser.c61
1 files changed, 43 insertions, 18 deletions
diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index db8fbd6..b01ada9 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -579,15 +579,11 @@ hubbub_error hubbub_tokeniser_run(hubbub_tokeniser *tokeniser)
* \todo document them properly here
*/
-#define START_BUF(str, cptr, lengt) \
+#define START_BUF(str, cptr, length) \
do { \
- uint8_t *data = tokeniser->buffer->data + \
- tokeniser->buffer->length; \
- parserutils_buffer_append( \
- tokeniser->buffer, \
- cptr, (lengt)); \
- (str).ptr = data; \
- (str).len = (lengt); \
+ parserutils_buffer_append(tokeniser->buffer, \
+ cptr, (length)); \
+ (str).len = (length); \
} while (0)
#define COLLECT(str, cptr, length) \
@@ -1055,7 +1051,7 @@ hubbub_error hubbub_tokeniser_handle_tag_name(hubbub_tokeniser *tokeniser)
assert(tokeniser->context.pending > 0);
/* assert(tokeniser->context.chars.ptr[0] == '<'); */
assert(ctag->name.len > 0);
- assert(ctag->name.ptr);
+/* assert(ctag->name.ptr); */
if (cptr == PARSERUTILS_INPUTSTREAM_OOD) {
return HUBBUB_OOD;
@@ -1445,6 +1441,9 @@ hubbub_error hubbub_tokeniser_handle_attribute_value_uq(hubbub_tokeniser *tokeni
c = CHAR(cptr);
+ assert(c == '&' ||
+ ctag->attributes[ctag->n_attributes - 1].value.len >= 1);
+
if (c == '\t' || c == '\n' || c == '\f' || c == ' ' || c == '\r') {
tokeniser->context.pending += len;
tokeniser->state = STATE_BEFORE_ATTRIBUTE_NAME;
@@ -1498,12 +1497,7 @@ hubbub_error hubbub_tokeniser_handle_character_reference_in_attribute_value(
tokeniser->context.match_entity.length
+ 1;
- if (attr->value.len == 0) {
- START_BUF(attr->value,
- utf8, sizeof(utf8) - len);
- } else {
- COLLECT(attr->value, utf8, sizeof(utf8) - len);
- }
+ COLLECT_MS(attr->value, utf8, sizeof(utf8) - len);
} else {
size_t len;
uintptr_t cptr = parserutils_inputstream_peek(
@@ -2850,13 +2844,27 @@ hubbub_error emit_current_tag(hubbub_tokeniser *tokeniser)
/* Emit current tag */
token.type = tokeniser->context.current_tag_type;
token.data.tag = tokeniser->context.current_tag;
- token.data.tag.ns = HUBBUB_NS_HTML;
+ token.data.tag.ns = HUBBUB_NS_HTML;
+
- /* Discard duplicate attributes */
- uint32_t i, j;
uint32_t n_attributes = token.data.tag.n_attributes;
hubbub_attribute *attrs = token.data.tag.attributes;
+ /* Set pointers correctly... */
+ uint8_t *ptr = tokeniser->buffer->data;
+ token.data.tag.name.ptr = tokeniser->buffer->data;
+ ptr += token.data.tag.name.len;
+
+ for (uint32_t i = 0; i < n_attributes; i++) {
+ attrs[i].name.ptr = ptr;
+ ptr += attrs[i].name.len;
+ attrs[i].value.ptr = ptr;
+ ptr += attrs[i].value.len;
+ }
+
+
+ uint32_t i, j;
+
/* Discard duplicate attributes */
for (i = 0; i < n_attributes; i++) {
for (j = 0; j < n_attributes; j++) {
@@ -2930,6 +2938,7 @@ hubbub_error emit_current_comment(hubbub_tokeniser *tokeniser)
token.type = HUBBUB_TOKEN_COMMENT;
token.data.comment = tokeniser->context.current_comment;
+ token.data.comment.ptr = tokeniser->buffer->data;
return hubbub_tokeniser_emit_token(tokeniser, &token);
}
@@ -2952,6 +2961,22 @@ hubbub_error emit_current_doctype(hubbub_tokeniser *tokeniser,
if (force_quirks == true)
token.data.doctype.force_quirks = true;
+ /* Set pointers correctly */
+ uint8_t *ptr = tokeniser->buffer->data;
+
+ token.data.doctype.name.ptr = ptr;
+ ptr += token.data.doctype.name.len;
+
+ if (token.data.doctype.public_missing == false) {
+ token.data.doctype.public_id.ptr = ptr;
+ ptr += token.data.doctype.public_id.len;
+ }
+
+ if (token.data.doctype.system_missing == false) {
+ token.data.doctype.system_id.ptr = ptr;
+ ptr += token.data.doctype.system_id.len;
+ }
+
return hubbub_tokeniser_emit_token(tokeniser, &token);
}