summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Sidwell <andy@entai.co.uk>2008-06-30 10:45:26 +0000
committerAndrew Sidwell <andy@entai.co.uk>2008-06-30 10:45:26 +0000
commit833ee4b1f01b5da2327ab79777219b88528162c6 (patch)
tree6e740940753b73513528c6cc932361008af3860b
parent17447f074c102bdac9fdf3e1e0308c1b2e9718b6 (diff)
downloadlibhubbub-833ee4b1f01b5da2327ab79777219b88528162c6.tar.gz
libhubbub-833ee4b1f01b5da2327ab79777219b88528162c6.tar.bz2
Add "in foreign content" handling. Not convinced this is the best way.
svn path=/trunk/hubbub/; revision=4475
-rw-r--r--src/treebuilder/Makefile1
-rw-r--r--src/treebuilder/in_foreign_content.c139
-rw-r--r--src/treebuilder/internal.h7
-rw-r--r--src/treebuilder/modes.h4
-rw-r--r--src/treebuilder/treebuilder.c6
5 files changed, 152 insertions, 5 deletions
diff --git a/src/treebuilder/Makefile b/src/treebuilder/Makefile
index f2e8e3a..3b736b2 100644
--- a/src/treebuilder/Makefile
+++ b/src/treebuilder/Makefile
@@ -37,6 +37,7 @@ SRCS_$(d) := treebuilder.c \
in_head_noscript.c after_head.c in_body.c \
in_caption.c in_column_group.c in_table_body.c in_row.c \
in_cell.c in_select.c in_select_in_table.c \
+ in_foreign_content.c \
generic_rcdata.c script_collect.c
# Append to sources for component
diff --git a/src/treebuilder/in_foreign_content.c b/src/treebuilder/in_foreign_content.c
new file mode 100644
index 0000000..d1338e7
--- /dev/null
+++ b/src/treebuilder/in_foreign_content.c
@@ -0,0 +1,139 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 Andrew Sidwell
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include "treebuilder/modes.h"
+#include "treebuilder/internal.h"
+#include "treebuilder/treebuilder.h"
+#include "utils/utils.h"
+
+
+
+static bool element_in_scope_in_non_html_ns(hubbub_treebuilder *treebuilder)
+{
+ uint32_t node;
+
+ if (treebuilder->context.element_stack == NULL)
+ return false;
+
+ for (node = treebuilder->context.current_node; node > 0; node--) {
+ element_type node_ns =
+ treebuilder->context.element_stack[node].ns;
+
+ if (node_ns != HTML)
+ return true;
+ }
+
+ return false;
+}
+
+
+
+/**
+ * Handle tokens in "in foreign content" insertion mode
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ * \return True to reprocess the token, false otherwise
+ */
+bool handle_in_foreign_content(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ bool reprocess = false;
+
+ switch (token->type) {
+ case HUBBUB_TOKEN_CHARACTER:
+ append_text(treebuilder, &token->data.character);
+ break;
+ case HUBBUB_TOKEN_COMMENT:
+ process_comment_append(treebuilder, token,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node);
+ break;
+ case HUBBUB_TOKEN_DOCTYPE:
+ /** \todo parse error */
+ break;
+ case HUBBUB_TOKEN_START_TAG:
+ {
+ element_type type = element_type_from_name(treebuilder,
+ &token->data.tag.name);
+
+ element_type cur_node = current_node(treebuilder);
+ hubbub_ns cur_node_ns = current_node_ns(treebuilder);
+
+ if (cur_node_ns == HUBBUB_NS_HTML ||
+ (cur_node_ns == HUBBUB_NS_MATHML &&
+ (type != MGLYPH && type != MALIGNMARK) &&
+ (cur_node == MI || cur_node == MO ||
+ cur_node == MN || cur_node == MS ||
+ cur_node == MTEXT))) {
+ treebuilder->context.mode =
+ treebuilder->context.second_mode;
+ hubbub_treebuilder_token_handler(token, treebuilder);
+
+ if (treebuilder->context.mode == IN_FOREIGN_CONTENT &&
+ !element_in_scope_in_non_html_ns(treebuilder)) {
+ treebuilder->context.mode =
+ treebuilder->context.second_mode;
+ }
+ } else if (type == B || type == BIG || type == BLOCKQUOTE ||
+ type == BODY || type == BR || type == CENTER ||
+ type == CODE || type == DD || type == DIV ||
+ type == DL || type == DT || type == EM ||
+ type == EMBED || type == FONT || type == H1 ||
+ type == H2 || type == H3 || type == H4 ||
+ type == H5 || type == H6 || type == HEAD ||
+ type == HR || type == I || type == IMG ||
+ type == LI || type == LISTING ||
+ type == MENU || type == META || type == NOBR ||
+ type == OL || type == P || type == PRE ||
+ type == RUBY || type == S || type == SMALL ||
+ type == SPAN || type == STRONG ||
+ type == STRIKE || type == SUB || type == SUP ||
+ type == TABLE || type == TT || type == U ||
+ type == UL || type == VAR) {
+ /** \todo parse error */
+
+ while (cur_node_ns != HUBBUB_NS_HTML) {
+ void *node;
+ element_stack_pop(treebuilder, &cur_node_ns,
+ &cur_node, &node);
+ cur_node_ns = current_node_ns(treebuilder);
+ }
+
+ treebuilder->context.mode =
+ treebuilder->context.second_mode;
+ } else {
+ hubbub_tag tag = token->data.tag;
+
+ adjust_foreign_attributes(treebuilder, &tag);
+
+ /* Set to the right namespace and insert */
+ tag.ns = cur_node_ns;
+
+ if (token->data.tag.self_closing) {
+ insert_element_no_push(treebuilder, &tag);
+ /** \todo ack sc flag */
+ } else {
+ insert_element(treebuilder, &tag);
+ }
+ }
+ }
+ break;
+ case HUBBUB_TOKEN_END_TAG:
+ /** \parse error */
+ break;
+ case HUBBUB_TOKEN_EOF:
+ reprocess = true;
+ break;
+ }
+
+ return reprocess;
+}
+
diff --git a/src/treebuilder/internal.h b/src/treebuilder/internal.h
index 6f7278c..f9fd09e 100644
--- a/src/treebuilder/internal.h
+++ b/src/treebuilder/internal.h
@@ -27,7 +27,9 @@ typedef enum
A, B, BIG, EM, FONT, I, NOBR, S, SMALL, STRIKE, STRONG, TT, U,
/* Phrasing */
/**< \todo Enumerate phrasing elements */
- LABEL, MATH, RP, RT, XMP,
+ CODE, LABEL, RP, RT, RUBY, SPAN, SUB, SUP, VAR, XMP,
+/* MathML */
+ MATH, MGLYPH, MALIGNMARK, MI, MO, MN, MS, MTEXT,
UNKNOWN,
} element_type;
@@ -104,6 +106,8 @@ struct hubbub_treebuilder
void *alloc_pw; /**< Client private data */
};
+void hubbub_treebuilder_token_handler(const hubbub_token *token, void *pw);
+
bool process_characters_expect_whitespace(
hubbub_treebuilder *treebuilder, const hubbub_token *token,
bool insert_into_current_node);
@@ -142,6 +146,7 @@ bool element_stack_pop(hubbub_treebuilder *treebuilder,
bool element_stack_pop_until(hubbub_treebuilder *treebuilder,
element_type type);
element_type current_node(hubbub_treebuilder *treebuilder);
+hubbub_ns current_node_ns(hubbub_treebuilder *treebuilder);
element_type prev_node(hubbub_treebuilder *treebuilder);
bool formatting_list_append(hubbub_treebuilder *treebuilder,
diff --git a/src/treebuilder/modes.h b/src/treebuilder/modes.h
index c97e9de..8172ddd 100644
--- a/src/treebuilder/modes.h
+++ b/src/treebuilder/modes.h
@@ -59,6 +59,8 @@ bool handle_in_caption(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
bool handle_in_column_group(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
+bool handle_in_table_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
bool handle_in_row(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
bool handle_in_cell(hubbub_treebuilder *treebuilder,
@@ -71,6 +73,8 @@ bool handle_generic_rcdata(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
bool handle_script_collect_characters(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
+bool handle_in_foreign_content(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
bool process_in_head(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c
index cef73d3..3c25540 100644
--- a/src/treebuilder/treebuilder.c
+++ b/src/treebuilder/treebuilder.c
@@ -66,12 +66,10 @@ static const struct {
static void hubbub_treebuilder_buffer_handler(const uint8_t *data,
size_t len, void *pw);
-static void hubbub_treebuilder_token_handler(const hubbub_token *token,
- void *pw);
/**
- * Create a hubbub treebuilder
+ * Create a hubbub treebuilder
*
* \param tokeniser Underlying tokeniser instance
* \param alloc Memory (de)allocation function
@@ -109,7 +107,7 @@ hubbub_treebuilder *hubbub_treebuilder_create(hubbub_tokeniser *tokeniser,
return NULL;
}
tb->context.stack_alloc = ELEMENT_STACK_CHUNK;
- /* We rely on HTML not being equal to zero to determine
+ /* We rely on HTML not being equal to zero to determine
* if the first item in the stack is in use. Assert this here. */
assert(HTML != 0);
tb->context.element_stack[0].type = 0;