From 717836563d59f1d07c407c98d04696a768735d2b Mon Sep 17 00:00:00 2001 From: Andrew Sidwell Date: Mon, 4 Aug 2008 00:03:38 +0000 Subject: Rearrange emitting functions so they're all clumped together at the bottom of the file. svn path=/trunk/hubbub/; revision=4889 --- src/tokeniser/tokeniser.c | 334 ++++++++++++++++++++++++---------------------- 1 file changed, 172 insertions(+), 162 deletions(-) (limited to 'src') diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c index 8a2eec6..ff10c4b 100644 --- a/src/tokeniser/tokeniser.c +++ b/src/tokeniser/tokeniser.c @@ -251,6 +251,13 @@ static bool hubbub_tokeniser_handle_numbered_entity( static bool hubbub_tokeniser_handle_named_entity( hubbub_tokeniser *tokeniser); +static inline bool emit_character_token(hubbub_tokeniser *tokeniser, + const hubbub_string *chars); +static inline bool emit_current_chars(hubbub_tokeniser *tokeniser); +static inline bool emit_current_tag(hubbub_tokeniser *tokeniser); +static inline bool emit_current_comment(hubbub_tokeniser *tokeniser); +static inline bool emit_current_doctype(hubbub_tokeniser *tokeniser, + bool force_quirks); static void hubbub_tokeniser_emit_token(hubbub_tokeniser *tokeniser, hubbub_token *token); @@ -595,168 +602,6 @@ hubbub_error hubbub_tokeniser_run(hubbub_tokeniser *tokeniser) /* no-op */ -/** - * Emit a character token. - * - * \param tokeniser Tokeniser instance - * \param chars Pointer to hubbub_string to emit - * \return true - */ -static inline bool emit_character_token(hubbub_tokeniser *tokeniser, - const hubbub_string *chars) -{ - hubbub_token token; - - token.type = HUBBUB_TOKEN_CHARACTER; - token.data.character = *chars; - - hubbub_tokeniser_emit_token(tokeniser, &token); - - return true; -} - -/** - * Emit the current pending characters being stored in the tokeniser context. - * - * \param tokeniser Tokeniser instance - * \return true - */ -static inline bool emit_current_chars(hubbub_tokeniser *tokeniser) -{ - hubbub_token token; - - size_t len; - uintptr_t cptr = parserutils_inputstream_peek( - tokeniser->input, 0, &len); - - token.type = HUBBUB_TOKEN_CHARACTER; - token.data.character.ptr = (uint8_t *) cptr; - token.data.character.len = tokeniser->context.pending; - - hubbub_tokeniser_emit_token(tokeniser, &token); - - return true; -} - -/** - * Emit the current tag token being stored in the tokeniser context. - * - * \param tokeniser Tokeniser instance - * \return true - */ -static inline bool emit_current_tag(hubbub_tokeniser *tokeniser) -{ - hubbub_token token; - - /* Emit current tag */ - token.type = tokeniser->context.current_tag_type; - token.data.tag = tokeniser->context.current_tag; - token.data.tag.ns = HUBBUB_NS_HTML; - - /* Discard duplicate attributes */ - uint32_t i, j; - uint32_t n_attributes = token.data.tag.n_attributes; - hubbub_attribute *attrs = token.data.tag.attributes; - - /* Discard duplicate attributes */ - for (i = 0; i < n_attributes; i++) { - for (j = 0; j < n_attributes; j++) { - uint32_t move; - - if (j == i || - attrs[i].name.len != - attrs[j].name.len || - strncmp((char *)attrs[i].name.ptr, - (char *)attrs[j].name.ptr, - attrs[i].name.len) != 0) { - /* Attributes don't match */ - continue; - } - - /* Calculate amount to move */ - move = (n_attributes - 1 - - ((i < j) ? j : i)) * - sizeof(hubbub_attribute); - - if (move > 0) { - memmove((i < j) ? &attrs[j] - : &attrs[i], - (i < j) ? &attrs[j+1] - : &attrs[i+1], - move); - } - - /* And reduce the number of attributes */ - n_attributes--; - } - } - - token.data.tag.n_attributes = n_attributes; - - hubbub_tokeniser_emit_token(tokeniser, &token); - - if (token.type == HUBBUB_TOKEN_START_TAG) { - /* Save start tag name for R?CDATA */ - if (token.data.tag.name.len < - sizeof(tokeniser->context.last_start_tag_name)) { - strncpy((char *)tokeniser->context.last_start_tag_name, - (const char *)token.data.tag.name.ptr, - token.data.tag.name.len); - tokeniser->context.last_start_tag_len = - token.data.tag.name.len; - } else { - tokeniser->context.last_start_tag_name[0] = '\0'; - tokeniser->context.last_start_tag_len = 0; - } - } else /* if (token->type == HUBBUB_TOKEN_END_TAG) */ { - /* Reset content model after R?CDATA elements */ - tokeniser->content_model = HUBBUB_CONTENT_MODEL_PCDATA; - } - - return true; -} - -/** - * Emit the current comment token being stored in the tokeniser context. - * - * \param tokeniser Tokeniser instance - * \return true - */ -static inline bool emit_current_comment(hubbub_tokeniser *tokeniser) -{ - hubbub_token token; - - token.type = HUBBUB_TOKEN_COMMENT; - token.data.comment = tokeniser->context.current_comment; - - hubbub_tokeniser_emit_token(tokeniser, &token); - - return true; -} - -/** - * Emit the current doctype token being stored in the tokeniser context. - * - * \param tokeniser Tokeniser instance - * \param force_qurirks Force quirks mode on this document - * \return true - */ -static inline bool emit_current_doctype(hubbub_tokeniser *tokeniser, - bool force_quirks) -{ - hubbub_token token; - - /* Emit doctype */ - token.type = HUBBUB_TOKEN_DOCTYPE; - token.data.doctype = tokeniser->context.current_doctype; - if (force_quirks == true) - token.data.doctype.force_quirks = true; - - hubbub_tokeniser_emit_token(tokeniser, &token); - - return true; -} - @@ -2990,6 +2835,171 @@ bool hubbub_tokeniser_handle_named_entity(hubbub_tokeniser *tokeniser) } + +/*** Token emitting bits ***/ + +/** + * Emit a character token. + * + * \param tokeniser Tokeniser instance + * \param chars Pointer to hubbub_string to emit + * \return true + */ +static inline bool emit_character_token(hubbub_tokeniser *tokeniser, + const hubbub_string *chars) +{ + hubbub_token token; + + token.type = HUBBUB_TOKEN_CHARACTER; + token.data.character = *chars; + + hubbub_tokeniser_emit_token(tokeniser, &token); + + return true; +} + +/** + * Emit the current pending characters being stored in the tokeniser context. + * + * \param tokeniser Tokeniser instance + * \return true + */ +static inline bool emit_current_chars(hubbub_tokeniser *tokeniser) +{ + hubbub_token token; + + size_t len; + uintptr_t cptr = parserutils_inputstream_peek( + tokeniser->input, 0, &len); + + token.type = HUBBUB_TOKEN_CHARACTER; + token.data.character.ptr = (uint8_t *) cptr; + token.data.character.len = tokeniser->context.pending; + + hubbub_tokeniser_emit_token(tokeniser, &token); + + return true; +} + +/** + * Emit the current tag token being stored in the tokeniser context. + * + * \param tokeniser Tokeniser instance + * \return true + */ +static inline bool emit_current_tag(hubbub_tokeniser *tokeniser) +{ + hubbub_token token; + + /* Emit current tag */ + token.type = tokeniser->context.current_tag_type; + token.data.tag = tokeniser->context.current_tag; + token.data.tag.ns = HUBBUB_NS_HTML; + + /* Discard duplicate attributes */ + uint32_t i, j; + uint32_t n_attributes = token.data.tag.n_attributes; + hubbub_attribute *attrs = token.data.tag.attributes; + + /* Discard duplicate attributes */ + for (i = 0; i < n_attributes; i++) { + for (j = 0; j < n_attributes; j++) { + uint32_t move; + + if (j == i || + attrs[i].name.len != + attrs[j].name.len || + strncmp((char *)attrs[i].name.ptr, + (char *)attrs[j].name.ptr, + attrs[i].name.len) != 0) { + /* Attributes don't match */ + continue; + } + + /* Calculate amount to move */ + move = (n_attributes - 1 - + ((i < j) ? j : i)) * + sizeof(hubbub_attribute); + + if (move > 0) { + memmove((i < j) ? &attrs[j] + : &attrs[i], + (i < j) ? &attrs[j+1] + : &attrs[i+1], + move); + } + + /* And reduce the number of attributes */ + n_attributes--; + } + } + + token.data.tag.n_attributes = n_attributes; + + hubbub_tokeniser_emit_token(tokeniser, &token); + + if (token.type == HUBBUB_TOKEN_START_TAG) { + /* Save start tag name for R?CDATA */ + if (token.data.tag.name.len < + sizeof(tokeniser->context.last_start_tag_name)) { + strncpy((char *)tokeniser->context.last_start_tag_name, + (const char *)token.data.tag.name.ptr, + token.data.tag.name.len); + tokeniser->context.last_start_tag_len = + token.data.tag.name.len; + } else { + tokeniser->context.last_start_tag_name[0] = '\0'; + tokeniser->context.last_start_tag_len = 0; + } + } else /* if (token->type == HUBBUB_TOKEN_END_TAG) */ { + /* Reset content model after R?CDATA elements */ + tokeniser->content_model = HUBBUB_CONTENT_MODEL_PCDATA; + } + + return true; +} + +/** + * Emit the current comment token being stored in the tokeniser context. + * + * \param tokeniser Tokeniser instance + * \return true + */ +static inline bool emit_current_comment(hubbub_tokeniser *tokeniser) +{ + hubbub_token token; + + token.type = HUBBUB_TOKEN_COMMENT; + token.data.comment = tokeniser->context.current_comment; + + hubbub_tokeniser_emit_token(tokeniser, &token); + + return true; +} + +/** + * Emit the current doctype token being stored in the tokeniser context. + * + * \param tokeniser Tokeniser instance + * \param force_qurirks Force quirks mode on this document + * \return true + */ +static inline bool emit_current_doctype(hubbub_tokeniser *tokeniser, + bool force_quirks) +{ + hubbub_token token; + + /* Emit doctype */ + token.type = HUBBUB_TOKEN_DOCTYPE; + token.data.doctype = tokeniser->context.current_doctype; + if (force_quirks == true) + token.data.doctype.force_quirks = true; + + hubbub_tokeniser_emit_token(tokeniser, &token); + + return true; +} + /** * Emit a token, performing sanity checks if necessary * -- cgit v1.2.3