summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Sidwell <andy@entai.co.uk>2008-06-23 03:06:24 +0000
committerAndrew Sidwell <andy@entai.co.uk>2008-06-23 03:06:24 +0000
commit32b6a91bddaade84dd9287c7c9f93b6f5b712e9e (patch)
tree40eca7e29ca8d1cb6c39beb5342a4b1d1614b5b0
parent8fe466a70d9e96d7e5eb0a619a3ea4f23c9b32f9 (diff)
downloadlibhubbub-32b6a91bddaade84dd9287c7c9f93b6f5b712e9e.tar.gz
libhubbub-32b6a91bddaade84dd9287c7c9f93b6f5b712e9e.tar.bz2
Update the treebuilder so it's closer to current spec (I'm sure I've missed stuff).
svn path=/trunk/hubbub/; revision=4426
-rw-r--r--src/treebuilder/in_body.c57
-rw-r--r--src/treebuilder/internal.h16
-rw-r--r--src/treebuilder/treebuilder.c87
3 files changed, 108 insertions, 52 deletions
diff --git a/src/treebuilder/in_body.c b/src/treebuilder/in_body.c
index 7fefdfd..c19b55e 100644
--- a/src/treebuilder/in_body.c
+++ b/src/treebuilder/in_body.c
@@ -247,24 +247,35 @@ bool process_start_tag(hubbub_treebuilder *treebuilder,
if (type == HTML) {
process_html_in_body(treebuilder, token);
- } else if (type == BASE || type == LINK || type == META) {
- process_base_link_meta_in_head(treebuilder,
- token, type);
+ } else if (type == BASE || type == COMMAND ||
+ type == EVENT_SOURCE || type == LINK) {
+ process_base_link_meta_in_head(treebuilder, token, type);
+
+ /** \todo ack sc flag */
+ } else if (type == META) {
+ process_base_link_meta_in_head(treebuilder, token, type);
+
+ /** \todo ack sc flag */
+ /** \todo detect charset */
} else if (type == SCRIPT) {
process_script_in_head(treebuilder, token);
- } else if (type == STYLE) {
+ } else if (type == NOFRAMES || type == STYLE) {
parse_generic_rcdata(treebuilder, token, false);
} else if (type == TITLE) {
parse_generic_rcdata(treebuilder, token, true);
} else if (type == BODY) {
process_body_in_body(treebuilder, token);
- } else if (type == ADDRESS || type == BLOCKQUOTE ||
- type == CENTER || type == DIR ||
- type == DIV || type == DL ||
- type == FIELDSET || type == H1 || type == H2 ||
- type == H3 || type == H4 || type == H5 ||
- type == H6 || type == MENU || type == OL ||
- type == P || type == UL) {
+ } else if (type == ADDRESS || type == ARTICLE || type == ASIDE ||
+ type == BLOCKQUOTE || type == CENTER ||
+ type == DATAGRID || type == DETAILS ||
+ type == DIALOG || type == DIR ||
+ type == DIV || type == DL || type == FIELDSET ||
+ type == FIGURE || type == FOOTER ||
+ type == H1 || type == H2 || type == H3 ||
+ type == H4 || type == H5 || type == H6 ||
+ type == HEADER || type == MENU || type == NAV ||
+ type == OL || type == P || type == SECTION ||
+ type == UL) {
process_container_in_body(treebuilder, token);
} else if (type == PRE || type == LISTING) {
process_container_in_body(treebuilder, token);
@@ -334,19 +345,25 @@ bool process_start_tag(hubbub_treebuilder *treebuilder,
treebuilder->context.mode == IN_CELL) {
treebuilder->context.mode = IN_SELECT_IN_TABLE;
}
+ } else if (type == RP || type == RT) {
+ /** \todo ruby */
+ } else if (type == MATH) {
+ reconstruct_active_formatting_list(treebuilder);
+ /** \todo adjust foreign attributes */
+ /** \todo insert foreign element */
+ if (token->data.tag.self_closing) {
+ /** \todo pop off the stack of open elements */
+ /** \todo ack sc flag */
+ } else {
+ /** \todo set to "in foreign content" */
+ }
} else if (type == CAPTION || type == COL || type == COLGROUP ||
- type == FRAME || type == FRAMESET ||
- type == HEAD || type == OPTION ||
- type == OPTGROUP || type == TBODY ||
+ type == FRAME || type == FRAMESET ||
+ type == HEAD || type == TBODY ||
type == TD || type == TFOOT || type == TH ||
type == THEAD || type == TR) {
/** \todo parse error */
-/* } else if (type == EVENT_SOURCE || type == SECTION ||
- type == NAV || type == ARTICLE ||
- type == ASIDE || type == HEADER ||
- type == FOOTER || type == DATAGRID ||
- type == COMMAND) {
-*/ } else {
+ } else {
process_phrasing_in_body(treebuilder, token);
}
diff --git a/src/treebuilder/internal.h b/src/treebuilder/internal.h
index e5410f5..3d966e5 100644
--- a/src/treebuilder/internal.h
+++ b/src/treebuilder/internal.h
@@ -39,19 +39,21 @@ typedef enum
typedef enum
{
/* Special */
- ADDRESS, AREA, BASE, BASEFONT, BGSOUND, BLOCKQUOTE, BODY, BR, CENTER,
- COL, COLGROUP, DD, DIR, DIV, DL, DT, EMBED, FIELDSET, FORM, FRAME,
- FRAMESET, H1, H2, H3, H4, H5, H6, HEAD, HR, IFRAME, IMAGE, IMG, INPUT,
- ISINDEX, LI, LINK, LISTING, MENU, META, NOEMBED, NOFRAMES, NOSCRIPT,
- OL, OPTGROUP, OPTION, P, PARAM, PLAINTEXT, PRE, SCRIPT, SELECT, SPACER,
- STYLE, TBODY, TEXTAREA, TFOOT, THEAD, TITLE, TR, UL, WBR,
+ ADDRESS, AREA, ARTICLE, ASIDE, BASE, BASEFONT, BGSOUND, BLOCKQUOTE,
+ BODY, BR, CENTER, COL, COLGROUP, COMMAND, DATAGRID, DD, DETAILS,
+ DIALOG, DIR, DIV, DL, DT, EMBED, EVENT_SOURCE, FIELDSET, FIGURE,
+ FOOTER, FORM, FRAME, FRAMESET, H1, H2, H3, H4, H5, H6, HEAD, HEADER,
+ HR, IFRAME, IMAGE, IMG, INPUT, ISINDEX, LI, LINK, LISTING, MENU, META,
+ NAV, NOEMBED, NOFRAMES, NOSCRIPT, OL, OPTGROUP, OPTION, P, PARAM,
+ PLAINTEXT, PRE, SCRIPT, SECTION, SELECT, SPACER, STYLE, TBODY,
+ TEXTAREA, TFOOT, THEAD, TITLE, TR, UL, WBR,
/* Scoping */
APPLET, BUTTON, CAPTION, HTML, MARQUEE, OBJECT, TABLE, TD, TH,
/* Formatting */
A, B, BIG, EM, FONT, I, NOBR, S, SMALL, STRIKE, STRONG, TT, U,
/* Phrasing */
/**< \todo Enumerate phrasing elements */
- XMP, LABEL,
+ LABEL, MATH, RP, RT, XMP,
UNKNOWN,
} element_type;
diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c
index 90cca11..a16c298 100644
--- a/src/treebuilder/treebuilder.c
+++ b/src/treebuilder/treebuilder.c
@@ -390,12 +390,12 @@ bool handle_initial(hubbub_treebuilder *treebuilder, const hubbub_token *token)
treebuilder->tree_handler->set_quirks_mode(
treebuilder->tree_handler->ctx,
HUBBUB_QUIRKS_MODE_FULL);
-
+ treebuilder->context.mode = BEFORE_HTML;
reprocess = true;
}
break;
case HUBBUB_TOKEN_COMMENT:
- process_comment_append(treebuilder, token,
+ process_comment_append(treebuilder, token,
treebuilder->context.document);
break;
case HUBBUB_TOKEN_DOCTYPE:
@@ -403,11 +403,14 @@ bool handle_initial(hubbub_treebuilder *treebuilder, const hubbub_token *token)
int success;
void *doctype, *appended;
+ /** \todo parse error */
+
/** \todo need public and system ids from tokeniser */
success = treebuilder->tree_handler->create_doctype(
treebuilder->tree_handler->ctx,
&token->data.doctype.name,
- NULL, NULL, &doctype);
+ &token->data.doctype.public_id,
+ &token->data.doctype.system_id, &doctype);
if (success != 0) {
/** \todo errors */
}
@@ -424,7 +427,7 @@ bool handle_initial(hubbub_treebuilder *treebuilder, const hubbub_token *token)
doctype);
}
- /** \todo doctype processing */
+ /* \todo look up the doctype in a catalog */
treebuilder->tree_handler->unref_node(
treebuilder->tree_handler->ctx, appended);
@@ -474,7 +477,7 @@ bool handle_before_html(hubbub_treebuilder *treebuilder,
treebuilder->context.document);
break;
case HUBBUB_TOKEN_CHARACTER:
- reprocess = process_characters_expect_whitespace(treebuilder,
+ reprocess = process_characters_expect_whitespace(treebuilder,
token, false);
break;
case HUBBUB_TOKEN_START_TAG:
@@ -608,8 +611,7 @@ bool handle_before_head(hubbub_treebuilder *treebuilder,
element_type type = element_type_from_name(treebuilder,
&token->data.tag.name);
- if (type == HEAD || type == BODY || type == HTML ||
- type == P || type == BR) {
+ if (type == HEAD || type == BR) {
reprocess = true;
} else {
/** \todo parse error */
@@ -687,11 +689,23 @@ bool handle_in_head(hubbub_treebuilder *treebuilder,
if (type == HTML) {
/* Process as if "in body" */
process_tag_in_body(treebuilder, token);
- } else if (type == BASE || type == LINK || type == META) {
- process_base_link_meta_in_head(treebuilder,
+ } else if (type == BASE || type == COMMAND ||
+ type == EVENT_SOURCE || type == LINK) {
+ process_base_link_meta_in_head(treebuilder,
+ token, type);
+
+ /** \todo ack sc flag */
+ } else if (type == META) {
+ process_base_link_meta_in_head(treebuilder,
token, type);
+
+ /** \todo ack sc flag */
+
+ /** \todo detect charset */
} else if (type == TITLE) {
parse_generic_rcdata(treebuilder, token, true);
+ } else if (type == NOFRAMES || type == STYLE) {
+ parse_generic_rcdata(treebuilder, token, false);
} else if (type == NOSCRIPT) {
/** \todo determine if scripting is enabled */
if (false /*scripting_is_enabled*/) {
@@ -700,8 +714,6 @@ bool handle_in_head(hubbub_treebuilder *treebuilder,
insert_element(treebuilder, &token->data.tag);
treebuilder->context.mode = IN_HEAD_NOSCRIPT;
}
- } else if (type == STYLE) {
- parse_generic_rcdata(treebuilder, token, false);
} else if (type == SCRIPT) {
process_script_in_head(treebuilder, token);
} else if (type == HEAD) {
@@ -718,10 +730,9 @@ bool handle_in_head(hubbub_treebuilder *treebuilder,
if (type == HEAD) {
handled = true;
- } else if (type == BODY || type == HTML ||
- type == P || type == BR) {
+ } else if (type == BR) {
reprocess = true;
- }
+ } /** \todo parse error */
}
break;
case HUBBUB_TOKEN_EOF:
@@ -762,10 +773,12 @@ bool handle_in_head_noscript(hubbub_treebuilder *treebuilder,
switch (token->type) {
case HUBBUB_TOKEN_CHARACTER:
+ /* This should be equivalent to "in head" processing */
reprocess = process_characters_expect_whitespace(treebuilder,
token, true);
break;
case HUBBUB_TOKEN_COMMENT:
+ /* This should be equivalent to "in head" processing */
process_comment_append(treebuilder, token,
treebuilder->context.element_stack[
treebuilder->context.current_node].node);
@@ -781,10 +794,27 @@ bool handle_in_head_noscript(hubbub_treebuilder *treebuilder,
if (type == HTML) {
/* Process as "in body" */
process_tag_in_body(treebuilder, token);
- } else if (type == LINK || type == META) {
- process_base_link_meta_in_head(treebuilder,
+ } else if (type == NOSCRIPT) {
+ handled = true;
+ } else if (type == LINK) {
+ /* This should be equivalent to "in head" processing */
+ process_base_link_meta_in_head(treebuilder,
token, type);
+
+ /** \todo ack sc flag */
+ } else if (type == META) {
+ /* This should be equivalent to "in head" processing */
+ process_base_link_meta_in_head(treebuilder,
+ token, type);
+
+ /** \todo ack sc flag */
+
+ /** \todo detect charset */
+ } else if (type == NOFRAMES) {
+ /* This should be equivalent to "in head" processing */
+ parse_generic_rcdata(treebuilder, token, true);
} else if (type == STYLE) {
+ /* This should be equivalent to "in head" processing */
parse_generic_rcdata(treebuilder, token, false);
} else if (type == HEAD || type == NOSCRIPT) {
/** \todo parse error */
@@ -801,7 +831,7 @@ bool handle_in_head_noscript(hubbub_treebuilder *treebuilder,
if (type == NOSCRIPT) {
handled = true;
- } else if (type == P || type == BR) {
+ } else if (type == BR) {
/** \todo parse error */
reprocess = true;
} else {
@@ -848,8 +878,7 @@ bool handle_after_head(hubbub_treebuilder *treebuilder,
switch (token->type) {
case HUBBUB_TOKEN_CHARACTER:
- reprocess = process_characters_expect_whitespace(treebuilder,
- token, true);
+ append_text(treebuilder, &token->data.character);
break;
case HUBBUB_TOKEN_COMMENT:
process_comment_append(treebuilder, token,
@@ -873,8 +902,8 @@ bool handle_after_head(hubbub_treebuilder *treebuilder,
insert_element(treebuilder, &token->data.tag);
treebuilder->context.mode = IN_FRAMESET;
} else if (type == BASE || type == LINK || type == META ||
- type == SCRIPT || type == STYLE ||
- type == TITLE) {
+ type == NOFRAMES || type == SCRIPT ||
+ type == STYLE || type == TITLE) {
element_type otype;
void *node;
@@ -886,12 +915,16 @@ bool handle_after_head(hubbub_treebuilder *treebuilder,
/** \todo errors */
}
+
+ /* This should be identical to handling "in head" */
if (type == BASE || type == LINK || type == META) {
+ /** \todo ack sc flag */
+
process_base_link_meta_in_head(treebuilder,
token, type);
} else if (type == SCRIPT) {
process_script_in_head(treebuilder, token);
- } else if (type == STYLE) {
+ } else if (type == STYLE || type == NOFRAMES) {
parse_generic_rcdata(treebuilder, token, false);
} else if (type == TITLE) {
parse_generic_rcdata(treebuilder, token, true);
@@ -903,12 +936,16 @@ bool handle_after_head(hubbub_treebuilder *treebuilder,
/* No need to unref node as we never increased
* its reference count when pushing it on the stack */
+ } else if (type == HEAD) {
+ /** \todo parse error */
} else {
reprocess = true;
}
}
break;
case HUBBUB_TOKEN_END_TAG:
+ /** \parse error */
+ break;
case HUBBUB_TOKEN_EOF:
reprocess = true;
break;
@@ -1093,8 +1130,6 @@ bool handle_script_collect_characters(hubbub_treebuilder *treebuilder,
int success;
void *text, *appended;
- /** \todo fragment case -- skip this lot entirely */
-
success = treebuilder->tree_handler->create_text(
treebuilder->tree_handler->ctx,
&treebuilder->context.collect.string,
@@ -1103,6 +1138,8 @@ bool handle_script_collect_characters(hubbub_treebuilder *treebuilder,
/** \todo errors */
}
+ /** \todo fragment case -- skip this lot entirely */
+
success = treebuilder->tree_handler->append_child(
treebuilder->tree_handler->ctx,
treebuilder->context.collect.node,
@@ -2093,7 +2130,7 @@ void formatting_list_dump(hubbub_treebuilder *treebuilder, FILE *fp)
*/
const char *element_type_to_name(element_type type)
{
- for (uint32_t i = 0;
+ for (size_t i = 0;
i < sizeof(name_type_map) / sizeof(name_type_map[0]);
i++) {
if (name_type_map[i].type == type)