summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/tokeniser/tokeniser.c334
1 files changed, 172 insertions, 162 deletions
diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index 8a2eec6..ff10c4b 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -251,6 +251,13 @@ static bool hubbub_tokeniser_handle_numbered_entity(
static bool hubbub_tokeniser_handle_named_entity(
hubbub_tokeniser *tokeniser);
+static inline bool emit_character_token(hubbub_tokeniser *tokeniser,
+ const hubbub_string *chars);
+static inline bool emit_current_chars(hubbub_tokeniser *tokeniser);
+static inline bool emit_current_tag(hubbub_tokeniser *tokeniser);
+static inline bool emit_current_comment(hubbub_tokeniser *tokeniser);
+static inline bool emit_current_doctype(hubbub_tokeniser *tokeniser,
+ bool force_quirks);
static void hubbub_tokeniser_emit_token(hubbub_tokeniser *tokeniser,
hubbub_token *token);
@@ -595,168 +602,6 @@ hubbub_error hubbub_tokeniser_run(hubbub_tokeniser *tokeniser)
/* no-op */
-/**
- * Emit a character token.
- *
- * \param tokeniser Tokeniser instance
- * \param chars Pointer to hubbub_string to emit
- * \return true
- */
-static inline bool emit_character_token(hubbub_tokeniser *tokeniser,
- const hubbub_string *chars)
-{
- hubbub_token token;
-
- token.type = HUBBUB_TOKEN_CHARACTER;
- token.data.character = *chars;
-
- hubbub_tokeniser_emit_token(tokeniser, &token);
-
- return true;
-}
-
-/**
- * Emit the current pending characters being stored in the tokeniser context.
- *
- * \param tokeniser Tokeniser instance
- * \return true
- */
-static inline bool emit_current_chars(hubbub_tokeniser *tokeniser)
-{
- hubbub_token token;
-
- size_t len;
- uintptr_t cptr = parserutils_inputstream_peek(
- tokeniser->input, 0, &len);
-
- token.type = HUBBUB_TOKEN_CHARACTER;
- token.data.character.ptr = (uint8_t *) cptr;
- token.data.character.len = tokeniser->context.pending;
-
- hubbub_tokeniser_emit_token(tokeniser, &token);
-
- return true;
-}
-
-/**
- * Emit the current tag token being stored in the tokeniser context.
- *
- * \param tokeniser Tokeniser instance
- * \return true
- */
-static inline bool emit_current_tag(hubbub_tokeniser *tokeniser)
-{
- hubbub_token token;
-
- /* Emit current tag */
- token.type = tokeniser->context.current_tag_type;
- token.data.tag = tokeniser->context.current_tag;
- token.data.tag.ns = HUBBUB_NS_HTML;
-
- /* Discard duplicate attributes */
- uint32_t i, j;
- uint32_t n_attributes = token.data.tag.n_attributes;
- hubbub_attribute *attrs = token.data.tag.attributes;
-
- /* Discard duplicate attributes */
- for (i = 0; i < n_attributes; i++) {
- for (j = 0; j < n_attributes; j++) {
- uint32_t move;
-
- if (j == i ||
- attrs[i].name.len !=
- attrs[j].name.len ||
- strncmp((char *)attrs[i].name.ptr,
- (char *)attrs[j].name.ptr,
- attrs[i].name.len) != 0) {
- /* Attributes don't match */
- continue;
- }
-
- /* Calculate amount to move */
- move = (n_attributes - 1 -
- ((i < j) ? j : i)) *
- sizeof(hubbub_attribute);
-
- if (move > 0) {
- memmove((i < j) ? &attrs[j]
- : &attrs[i],
- (i < j) ? &attrs[j+1]
- : &attrs[i+1],
- move);
- }
-
- /* And reduce the number of attributes */
- n_attributes--;
- }
- }
-
- token.data.tag.n_attributes = n_attributes;
-
- hubbub_tokeniser_emit_token(tokeniser, &token);
-
- if (token.type == HUBBUB_TOKEN_START_TAG) {
- /* Save start tag name for R?CDATA */
- if (token.data.tag.name.len <
- sizeof(tokeniser->context.last_start_tag_name)) {
- strncpy((char *)tokeniser->context.last_start_tag_name,
- (const char *)token.data.tag.name.ptr,
- token.data.tag.name.len);
- tokeniser->context.last_start_tag_len =
- token.data.tag.name.len;
- } else {
- tokeniser->context.last_start_tag_name[0] = '\0';
- tokeniser->context.last_start_tag_len = 0;
- }
- } else /* if (token->type == HUBBUB_TOKEN_END_TAG) */ {
- /* Reset content model after R?CDATA elements */
- tokeniser->content_model = HUBBUB_CONTENT_MODEL_PCDATA;
- }
-
- return true;
-}
-
-/**
- * Emit the current comment token being stored in the tokeniser context.
- *
- * \param tokeniser Tokeniser instance
- * \return true
- */
-static inline bool emit_current_comment(hubbub_tokeniser *tokeniser)
-{
- hubbub_token token;
-
- token.type = HUBBUB_TOKEN_COMMENT;
- token.data.comment = tokeniser->context.current_comment;
-
- hubbub_tokeniser_emit_token(tokeniser, &token);
-
- return true;
-}
-
-/**
- * Emit the current doctype token being stored in the tokeniser context.
- *
- * \param tokeniser Tokeniser instance
- * \param force_qurirks Force quirks mode on this document
- * \return true
- */
-static inline bool emit_current_doctype(hubbub_tokeniser *tokeniser,
- bool force_quirks)
-{
- hubbub_token token;
-
- /* Emit doctype */
- token.type = HUBBUB_TOKEN_DOCTYPE;
- token.data.doctype = tokeniser->context.current_doctype;
- if (force_quirks == true)
- token.data.doctype.force_quirks = true;
-
- hubbub_tokeniser_emit_token(tokeniser, &token);
-
- return true;
-}
-
@@ -2990,6 +2835,171 @@ bool hubbub_tokeniser_handle_named_entity(hubbub_tokeniser *tokeniser)
}
+
+/*** Token emitting bits ***/
+
+/**
+ * Emit a character token.
+ *
+ * \param tokeniser Tokeniser instance
+ * \param chars Pointer to hubbub_string to emit
+ * \return true
+ */
+static inline bool emit_character_token(hubbub_tokeniser *tokeniser,
+ const hubbub_string *chars)
+{
+ hubbub_token token;
+
+ token.type = HUBBUB_TOKEN_CHARACTER;
+ token.data.character = *chars;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ return true;
+}
+
+/**
+ * Emit the current pending characters being stored in the tokeniser context.
+ *
+ * \param tokeniser Tokeniser instance
+ * \return true
+ */
+static inline bool emit_current_chars(hubbub_tokeniser *tokeniser)
+{
+ hubbub_token token;
+
+ size_t len;
+ uintptr_t cptr = parserutils_inputstream_peek(
+ tokeniser->input, 0, &len);
+
+ token.type = HUBBUB_TOKEN_CHARACTER;
+ token.data.character.ptr = (uint8_t *) cptr;
+ token.data.character.len = tokeniser->context.pending;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ return true;
+}
+
+/**
+ * Emit the current tag token being stored in the tokeniser context.
+ *
+ * \param tokeniser Tokeniser instance
+ * \return true
+ */
+static inline bool emit_current_tag(hubbub_tokeniser *tokeniser)
+{
+ hubbub_token token;
+
+ /* Emit current tag */
+ token.type = tokeniser->context.current_tag_type;
+ token.data.tag = tokeniser->context.current_tag;
+ token.data.tag.ns = HUBBUB_NS_HTML;
+
+ /* Discard duplicate attributes */
+ uint32_t i, j;
+ uint32_t n_attributes = token.data.tag.n_attributes;
+ hubbub_attribute *attrs = token.data.tag.attributes;
+
+ /* Discard duplicate attributes */
+ for (i = 0; i < n_attributes; i++) {
+ for (j = 0; j < n_attributes; j++) {
+ uint32_t move;
+
+ if (j == i ||
+ attrs[i].name.len !=
+ attrs[j].name.len ||
+ strncmp((char *)attrs[i].name.ptr,
+ (char *)attrs[j].name.ptr,
+ attrs[i].name.len) != 0) {
+ /* Attributes don't match */
+ continue;
+ }
+
+ /* Calculate amount to move */
+ move = (n_attributes - 1 -
+ ((i < j) ? j : i)) *
+ sizeof(hubbub_attribute);
+
+ if (move > 0) {
+ memmove((i < j) ? &attrs[j]
+ : &attrs[i],
+ (i < j) ? &attrs[j+1]
+ : &attrs[i+1],
+ move);
+ }
+
+ /* And reduce the number of attributes */
+ n_attributes--;
+ }
+ }
+
+ token.data.tag.n_attributes = n_attributes;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ if (token.type == HUBBUB_TOKEN_START_TAG) {
+ /* Save start tag name for R?CDATA */
+ if (token.data.tag.name.len <
+ sizeof(tokeniser->context.last_start_tag_name)) {
+ strncpy((char *)tokeniser->context.last_start_tag_name,
+ (const char *)token.data.tag.name.ptr,
+ token.data.tag.name.len);
+ tokeniser->context.last_start_tag_len =
+ token.data.tag.name.len;
+ } else {
+ tokeniser->context.last_start_tag_name[0] = '\0';
+ tokeniser->context.last_start_tag_len = 0;
+ }
+ } else /* if (token->type == HUBBUB_TOKEN_END_TAG) */ {
+ /* Reset content model after R?CDATA elements */
+ tokeniser->content_model = HUBBUB_CONTENT_MODEL_PCDATA;
+ }
+
+ return true;
+}
+
+/**
+ * Emit the current comment token being stored in the tokeniser context.
+ *
+ * \param tokeniser Tokeniser instance
+ * \return true
+ */
+static inline bool emit_current_comment(hubbub_tokeniser *tokeniser)
+{
+ hubbub_token token;
+
+ token.type = HUBBUB_TOKEN_COMMENT;
+ token.data.comment = tokeniser->context.current_comment;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ return true;
+}
+
+/**
+ * Emit the current doctype token being stored in the tokeniser context.
+ *
+ * \param tokeniser Tokeniser instance
+ * \param force_qurirks Force quirks mode on this document
+ * \return true
+ */
+static inline bool emit_current_doctype(hubbub_tokeniser *tokeniser,
+ bool force_quirks)
+{
+ hubbub_token token;
+
+ /* Emit doctype */
+ token.type = HUBBUB_TOKEN_DOCTYPE;
+ token.data.doctype = tokeniser->context.current_doctype;
+ if (force_quirks == true)
+ token.data.doctype.force_quirks = true;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ return true;
+}
+
/**
* Emit a token, performing sanity checks if necessary
*