From 91bee91db8eb8d9a62afb31b3be5a834e8f2135d Mon Sep 17 00:00:00 2001 From: Michael Drake Date: Thu, 23 Jan 2014 23:41:58 +0000 Subject: Strip and collapse whitespace when gathering html option values. --- src/core/document.c | 17 +++++++++ src/core/document.h | 2 + src/core/string.c | 84 +++++++++++++++++++++++++++++++++++++++++- src/core/string.h | 23 ++++++++++++ src/html/html_option_element.c | 84 +++++++++++++++++++++++++++++++++++++++++- 5 files changed, 207 insertions(+), 3 deletions(-) diff --git a/src/core/document.c b/src/core/document.c index 22f08f1..a78cde9 100644 --- a/src/core/document.c +++ b/src/core/document.c @@ -147,6 +147,14 @@ dom_exception _dom_document_initialise(dom_document *doc, return err; } + err = dom_string_create_interned((const uint8_t *) "script", + SLEN("script"), &doc->script_string); + if (err != DOM_NO_ERR) { + dom_string_unref(doc->id_name); + dom_string_unref(doc->class_string); + return err; + } + /* Intern the empty string. The use of a space in the constant * is to prevent the compiler warning about an empty string. */ @@ -155,6 +163,7 @@ dom_exception _dom_document_initialise(dom_document *doc, if (err != DOM_NO_ERR) { dom_string_unref(doc->id_name); dom_string_unref(doc->class_string); + dom_string_unref(doc->script_string); return err; } @@ -165,6 +174,7 @@ dom_exception _dom_document_initialise(dom_document *doc, dom_string_unref(doc->_memo_empty); dom_string_unref(doc->id_name); dom_string_unref(doc->class_string); + dom_string_unref(doc->script_string); return err; } @@ -176,6 +186,7 @@ dom_exception _dom_document_initialise(dom_document *doc, dom_string_unref(doc->_memo_empty); dom_string_unref(doc->id_name); dom_string_unref(doc->class_string); + dom_string_unref(doc->script_string); return err; } @@ -188,6 +199,7 @@ dom_exception _dom_document_initialise(dom_document *doc, dom_string_unref(doc->_memo_empty); dom_string_unref(doc->id_name); dom_string_unref(doc->class_string); + dom_string_unref(doc->script_string); return err; } @@ -201,6 +213,7 @@ dom_exception _dom_document_initialise(dom_document *doc, dom_string_unref(doc->_memo_empty); dom_string_unref(doc->id_name); dom_string_unref(doc->class_string); + dom_string_unref(doc->script_string); return err; } @@ -215,6 +228,7 @@ dom_exception _dom_document_initialise(dom_document *doc, dom_string_unref(doc->_memo_empty); dom_string_unref(doc->id_name); dom_string_unref(doc->class_string); + dom_string_unref(doc->script_string); return err; } @@ -230,6 +244,7 @@ dom_exception _dom_document_initialise(dom_document *doc, dom_string_unref(doc->_memo_empty); dom_string_unref(doc->id_name); dom_string_unref(doc->class_string); + dom_string_unref(doc->script_string); return err; } @@ -246,6 +261,7 @@ dom_exception _dom_document_initialise(dom_document *doc, dom_string_unref(doc->_memo_empty); dom_string_unref(doc->id_name); dom_string_unref(doc->class_string); + dom_string_unref(doc->script_string); return err; } @@ -283,6 +299,7 @@ bool _dom_document_finalise(dom_document *doc) dom_string_unref(doc->id_name); dom_string_unref(doc->class_string); + dom_string_unref(doc->script_string); dom_string_unref(doc->_memo_empty); dom_string_unref(doc->_memo_domnodeinserted); dom_string_unref(doc->_memo_domnoderemoved); diff --git a/src/core/document.h b/src/core/document.h index de49cf2..2837893 100644 --- a/src/core/document.h +++ b/src/core/document.h @@ -54,6 +54,8 @@ struct dom_document { dom_string *class_string; /**< The string "class". */ + dom_string *script_string; /**< The string "script". */ + dom_document_event_internal dei; /**< The DocumentEvent interface */ dom_document_quirks_mode quirks; diff --git a/src/core/string.c b/src/core/string.c index 9ba3576..9df2cd3 100644 --- a/src/core/string.c +++ b/src/core/string.c @@ -271,8 +271,8 @@ bool dom_string_caseless_isequal(const dom_string *s1, const dom_string *s2) is2->type == DOM_STRING_INTERNED) { bool match; - if (lwc_string_caseless_isequal(is1->data.intern, is2->data.intern, - &match) != lwc_error_ok) + if (lwc_string_caseless_isequal(is1->data.intern, + is2->data.intern, &match) != lwc_error_ok) return false; return match; @@ -1018,3 +1018,83 @@ dom_string_tolower(dom_string *source, bool ascii_only, dom_string **lower) return exc; } +/* exported function documented in string.h */ +dom_exception dom_string_whitespace_op(dom_string *s, + enum dom_whitespace_op op, dom_string **ret) +{ + const uint8_t *src_text = (const uint8_t *) dom_string_data(s); + size_t len = dom_string_byte_length(s); + const uint8_t *src_pos; + const uint8_t *src_end; + dom_exception exc; + uint8_t *temp_pos; + uint8_t *temp; + + if (len == 0) { + *ret = dom_string_ref(s); + } + + temp = malloc(len); + if (temp == NULL) { + return DOM_NO_MEM_ERR; + } + + src_pos = src_text; + src_end = src_text + len; + temp_pos = temp; + + if (op & DOM_WHITESPACE_STRIP_LEADING) { + while (src_pos < src_end) { + if (*src_pos == ' ' || *src_pos == '\t' || + *src_pos == '\n' || *src_pos == '\r' || + *src_pos == '\f') + src_pos++; + else + break; + } + } + + while (src_pos < src_end) { + if ((op & DOM_WHITESPACE_COLLAPSE) && + (*src_pos == ' ' || *src_pos == '\t' || + *src_pos == '\n' || *src_pos == '\r' || + *src_pos == '\f')) { + /* Got a whitespace character */ + do { + /* Skip all adjacent whitespace */ + src_pos++; + } while (src_pos < src_end && + (*src_pos == ' ' || *src_pos == '\t' || + *src_pos == '\n' || *src_pos == '\r' || + *src_pos == '\f')); + /* Gets replaced with single space in output */ + *temp_pos++ = ' '; + } else { + /* Otherwise, copy to output */ + *temp_pos++ = *src_pos++; + } + } + + if (op & DOM_WHITESPACE_STRIP_TRAILING) { + if (temp_pos > temp) { + temp_pos--; + if (*temp_pos != ' ') + temp_pos++; + } + } + + /* New length */ + len = temp_pos - temp; + + /* Make new string */ + if (((dom_string_internal *) s)->type == DOM_STRING_CDATA) { + exc = dom_string_create(temp, len, ret); + } else { + exc = dom_string_create_interned(temp, len, ret); + } + + free(temp); + + return exc; +} + diff --git a/src/core/string.h b/src/core/string.h index cbf7d36..9fca1fa 100644 --- a/src/core/string.h +++ b/src/core/string.h @@ -14,5 +14,28 @@ /* Map the lwc_error to dom_exception */ dom_exception _dom_exception_from_lwc_error(lwc_error err); +enum dom_whitespace_op { + DOM_WHITESPACE_STRIP_LEADING = (1 << 0), + DOM_WHITESPACE_STRIP_TRAILING = (1 << 1), + DOM_WHITESPACE_STRIP = DOM_WHITESPACE_STRIP_LEADING | + DOM_WHITESPACE_STRIP_TRAILING, + DOM_WHITESPACE_COLLAPSE = (1 << 2), + DOM_WHITESPACE_STRIP_COLLAPSE = DOM_WHITESPACE_STRIP | + DOM_WHITESPACE_COLLAPSE +}; + +/** Perform whitespace operations on given string + * + * \param s Given string + * \param op Whitespace operation(s) to perform + * \param ret New string with whitespace ops performed. Caller owns ref + * + * \return DOM_NO_ERR on success. + * + * \note Right now, will return DOM_NOT_SUPPORTED_ERR if ascii_only is false. + */ +dom_exception dom_string_whitespace_op(dom_string *s, + enum dom_whitespace_op op, dom_string **ret); + #endif diff --git a/src/html/html_option_element.c b/src/html/html_option_element.c index 1584bac..b133deb 100644 --- a/src/html/html_option_element.c +++ b/src/html/html_option_element.c @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -152,6 +153,86 @@ dom_exception dom_html_option_element_set_default_selected( return DOM_NO_ERR; } +/** + * Helper for dom_html_option_element_get_text + */ +static dom_exception dom_html_option_element_get_text_node( + dom_node_internal *n, dom_string **text) +{ + dom_string *node_name = NULL; + dom_string *node_ns = NULL; + dom_document *owner = NULL; + dom_string *str = NULL; + dom_string *ret = NULL; + dom_exception exc; + + *text = NULL; + + assert(n->owner != NULL); + owner = n->owner; + + for (n = n->first_child; n != NULL; n = n->next) { + /* Skip irrelevent node types */ + if (n->type == DOM_COMMENT_NODE || + n->type == DOM_PROCESSING_INSTRUCTION_NODE) + continue; + + if (n->type == DOM_ELEMENT_NODE) { + /* Skip script elements with html or svg namespace */ + exc = dom_node_get_local_name(n, &node_name); + if (exc != DOM_NO_ERR) + return exc; + if (dom_string_caseless_isequal(node_name, + owner->script_string)) { + exc = dom_node_get_namespace(n, &node_ns); + if (exc != DOM_NO_ERR) { + dom_string_unref(node_name); + return exc; + } + if (dom_string_caseless_isequal(node_ns, + dom_namespaces[ + DOM_NAMESPACE_HTML]) || + dom_string_caseless_isequal(node_ns, + dom_namespaces[ + DOM_NAMESPACE_SVG])) { + dom_string_unref(node_name); + dom_string_unref(node_ns); + continue; + } + dom_string_unref(node_ns); + } + dom_string_unref(node_name); + + /* Get text inside child node 'n' */ + dom_html_option_element_get_text_node(n, + (str == NULL) ? &str : &ret); + } else { + /* Handle other nodes with their get_text_content + * specialisation */ + dom_node_get_text_content(n, + (str == NULL) ? &str : &ret); + } + + /* If we already have text, concatenate it */ + if (ret != NULL) { + dom_string *new_str; + dom_string_concat(str, ret, &new_str); + dom_string_unref(str); + dom_string_unref(ret); + str = new_str; + } + } + + /* Strip and collapse whitespace */ + if (str != NULL) { + dom_string_whitespace_op(str, + DOM_WHITESPACE_STRIP_COLLAPSE, text); + dom_string_unref(str); + } + + return DOM_NO_ERR; +} + /** * Get the text contained in the option * @@ -162,7 +243,8 @@ dom_exception dom_html_option_element_set_default_selected( dom_exception dom_html_option_element_get_text( dom_html_option_element *option, dom_string **text) { - return dom_node_get_text_content(option, text); + return dom_html_option_element_get_text_node( + (dom_node_internal *) option, text); } /** -- cgit v1.2.3