summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2009-03-10 12:49:49 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2009-03-10 12:49:49 +0000
commita6a2f7a824491d82af40f1ce8699ef4eb7d192e3 (patch)
treeadf73cd5fbdd41250b13c2367c001ff50cf661f2 /src
parentfb097fc6e5e031c38cf9d81275559928064cd42e (diff)
downloadlibhubbub-a6a2f7a824491d82af40f1ce8699ef4eb7d192e3.tar.gz
libhubbub-a6a2f7a824491d82af40f1ce8699ef4eb7d192e3.tar.bz2
Sync tokeniser tests with html5lib.
Sync tokeniser implementation with the spec. Fix handling of \0 in the tag open state. The unicodeCharacters test is disabled, as json-c doesn't like it. svn path=/trunk/hubbub/; revision=6755
Diffstat (limited to 'src')
-rw-r--r--src/tokeniser/tokeniser.c31
1 files changed, 19 insertions, 12 deletions
diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index 7f68676..3b2fa9e 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -870,15 +870,6 @@ hubbub_error hubbub_tokeniser_handle_tag_open(hubbub_tokeniser *tokeniser)
ctag->n_attributes = 0;
tokeniser->state = STATE_TAG_NAME;
- } else if (c == '\0') {
- tokeniser->context.pending += len;
- tokeniser->context.current_tag_type =
- HUBBUB_TOKEN_START_TAG;
-
- START_BUF(ctag->name, u_fffd, sizeof(u_fffd));
- ctag->n_attributes = 0;
-
- tokeniser->state = STATE_TAG_NAME;
} else if (c == '>') {
/** \todo parse error */
@@ -1274,7 +1265,7 @@ hubbub_error hubbub_tokeniser_handle_after_attribute_name(
} else {
hubbub_attribute *attr;
- if (c == '"' || c == '\'' || c == '=') {
+ if (c == '"' || c == '\'') {
/** \todo parse error */
}
@@ -1327,6 +1318,7 @@ hubbub_error hubbub_tokeniser_handle_before_attribute_value(
if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
return emit_current_tag(tokeniser);
} else {
@@ -1347,6 +1339,7 @@ hubbub_error hubbub_tokeniser_handle_before_attribute_value(
tokeniser->context.pending += len;
tokeniser->state = STATE_ATTRIBUTE_VALUE_SQ;
} else if (c == '>') {
+ /** \todo parse error */
tokeniser->context.pending += len;
tokeniser->state = STATE_DATA;
@@ -1357,6 +1350,10 @@ hubbub_error hubbub_tokeniser_handle_before_attribute_value(
u_fffd, sizeof(u_fffd));
tokeniser->state = STATE_ATTRIBUTE_VALUE_UQ;
} else {
+ if (c == '=') {
+ /** \todo parse error */
+ }
+
tokeniser->context.pending += len;
START_BUF(ctag->attributes[ctag->n_attributes - 1].value,
cptr, len);
@@ -1991,6 +1988,7 @@ hubbub_error hubbub_tokeniser_handle_before_doctype_name(
if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
/* Emit current doctype, force-quirks on */
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
@@ -2005,11 +2003,16 @@ hubbub_error hubbub_tokeniser_handle_before_doctype_name(
if (c == '\t' || c == '\n' || c == '\f' || c == ' ' || c == '\r') {
/* pass over in silence */
} else if (c == '>') {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
} else {
if (c == '\0') {
START_BUF(cdoc->name, u_fffd, sizeof(u_fffd));
+ } else if ('A' <= c && c <= 'Z') {
+ uint8_t lc = c + 0x20;
+
+ START_BUF(cdoc->name, &lc, len);
} else {
START_BUF(cdoc->name, cptr, len);
}
@@ -2050,6 +2053,9 @@ hubbub_error hubbub_tokeniser_handle_doctype_name(hubbub_tokeniser *tokeniser)
return emit_current_doctype(tokeniser, false);
} else if (c == '\0') {
COLLECT(cdoc->name, u_fffd, sizeof(u_fffd));
+ } else if ('A' <= c && c <= 'Z') {
+ uint8_t lc = c + 0x20;
+ COLLECT(cdoc->name, &lc, len);
} else {
COLLECT(cdoc->name, cptr, len);
}
@@ -2886,11 +2892,12 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity(
cp = cp1252Table[cp - 0x80];
} else if (cp == 0x0D) {
cp = 0x000A;
- } else if (ctx->match_entity.overflow || cp <= 0x0008 ||
+ } else if (ctx->match_entity.overflow ||
+ cp <= 0x0008 || cp == 0x000B ||
(0x000E <= cp && cp <= 0x001F) ||
(0x007F <= cp && cp <= 0x009F) ||
(0xD800 <= cp && cp <= 0xDFFF) ||
- (0xFDD0 <= cp && cp <= 0xFDDF) ||
+ (0xFDD0 <= cp && cp <= 0xFDEF) ||
(cp & 0xFFFE) == 0xFFFE) {
/* the check for cp > 0x10FFFF per spec is performed
* in the loop above to avoid overflow */