diff options
-rw-r--r-- | include/dom/html/html_elements.h | 9 | ||||
-rw-r--r-- | src/core/node.c | 6 | ||||
-rw-r--r-- | src/html/html_document.c | 27 | ||||
-rw-r--r-- | src/html/html_tablerow_element.c | 13 | ||||
-rw-r--r-- | test/Makefile | 9 | ||||
-rw-r--r-- | test/data/normalize/INDEX | 4 | ||||
-rw-r--r-- | test/data/normalize/regression.html | 13 | ||||
-rw-r--r-- | test/normalize.c | 190 |
8 files changed, 267 insertions, 4 deletions
diff --git a/include/dom/html/html_elements.h b/include/dom/html/html_elements.h index 5b54bbe..6e954c5 100644 --- a/include/dom/html/html_elements.h +++ b/include/dom/html/html_elements.h @@ -12,6 +12,7 @@ DOM_HTML_ELEMENT_STRINGS_ENTRY(_UNKNOWN) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(A) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(ABBR) \ + DOM_HTML_ELEMENT_STRINGS_ENTRY(ACRONYM) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(ADDRESS) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(APPLET) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(AREA) \ @@ -23,6 +24,8 @@ DOM_HTML_ELEMENT_STRINGS_ENTRY(BASEFONT) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(BDI) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(BDO) \ + DOM_HTML_ELEMENT_STRINGS_ENTRY(BGSOUND) \ + DOM_HTML_ELEMENT_STRINGS_ENTRY(BIG) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(BLOCKQUOTE) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(BODY) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(BR) \ @@ -81,11 +84,14 @@ DOM_HTML_ELEMENT_STRINGS_ENTRY(MAIN) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(MAP) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(MARK) \ + DOM_HTML_ELEMENT_STRINGS_ENTRY(MARQUEE) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(MENU) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(MENUITEM) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(META) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(METER) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(NAV) \ + DOM_HTML_ELEMENT_STRINGS_ENTRY(NOBR) \ + DOM_HTML_ELEMENT_STRINGS_ENTRY(NOFRAMES) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(NOSCRIPT) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(OBJECT) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(OL) \ @@ -108,7 +114,9 @@ DOM_HTML_ELEMENT_STRINGS_ENTRY(SELECT) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(SMALL) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(SOURCE) \ + DOM_HTML_ELEMENT_STRINGS_ENTRY(SPACER) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(SPAN) \ + DOM_HTML_ELEMENT_STRINGS_ENTRY(STRIKE) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(STRONG) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(STYLE) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(SUB) \ @@ -126,6 +134,7 @@ DOM_HTML_ELEMENT_STRINGS_ENTRY(TITLE) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(TR) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(TRACK) \ + DOM_HTML_ELEMENT_STRINGS_ENTRY(TT) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(U) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(UL) \ DOM_HTML_ELEMENT_STRINGS_ENTRY(VAR) \ diff --git a/src/core/node.c b/src/core/node.c index 1218742..da179c3 100644 --- a/src/core/node.c +++ b/src/core/node.c @@ -2241,16 +2241,16 @@ dom_exception _dom_merge_adjacent_text(dom_node_internal *p, assert(p->type == DOM_TEXT_NODE); assert(n->type == DOM_TEXT_NODE); - err = dom_text_get_whole_text(n, &str); + err = dom_characterdata_get_data(n, &str); if (err != DOM_NO_ERR) return err; err = dom_characterdata_append_data(p, str); + dom_string_unref(str); + if (err != DOM_NO_ERR) return err; - dom_string_unref(str); - return DOM_NO_ERR; } diff --git a/src/html/html_document.c b/src/html/html_document.c index 5471f4f..cf3c25d 100644 --- a/src/html/html_document.c +++ b/src/html/html_document.c @@ -319,6 +319,8 @@ static inline dom_html_element_type _dom_html_document_get_element_type( RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_RP) else RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_RT) + else + RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_TT) break; case 3: RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_DIV) @@ -354,6 +356,8 @@ static inline dom_html_element_type _dom_html_document_get_element_type( RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_BDO) else RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_WBR) + else + RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_BIG) break; case 4: RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_META) @@ -395,6 +399,8 @@ static inline dom_html_element_type _dom_html_document_get_element_type( RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_BASE) else RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_TIME) + else + RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_NOBR) break; case 5: RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_INPUT) @@ -469,6 +475,10 @@ static inline dom_html_element_type _dom_html_document_get_element_type( RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_SOURCE) else RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_STRONG) + else + RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_SPACER) + else + RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_STRIKE) break; case 7: RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_CAPTION) @@ -486,6 +496,12 @@ static inline dom_html_element_type _dom_html_document_get_element_type( RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_SECTION) else RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_SUMMARY) + else + RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_ACRONYM) + else + RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_BGSOUND) + else + RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_MARQUEE) break; case 8: RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_TEXTAREA) @@ -509,6 +525,8 @@ static inline dom_html_element_type _dom_html_document_get_element_type( RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_PROGRESS) else RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_TEMPLATE) + else + RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_NOFRAMES) break; case 10: RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_BLOCKQUOTE) @@ -809,8 +827,10 @@ _dom_html_document_create_element_internal( case DOM_HTML_ELEMENT_TYPE_EM: case DOM_HTML_ELEMENT_TYPE_RP: case DOM_HTML_ELEMENT_TYPE_RT: + case DOM_HTML_ELEMENT_TYPE_TT: case DOM_HTML_ELEMENT_TYPE_BDI: case DOM_HTML_ELEMENT_TYPE_BDO: + case DOM_HTML_ELEMENT_TYPE_BIG: case DOM_HTML_ELEMENT_TYPE_DFN: case DOM_HTML_ELEMENT_TYPE_KBD: case DOM_HTML_ELEMENT_TYPE_NAV: @@ -823,6 +843,7 @@ _dom_html_document_create_element_internal( case DOM_HTML_ELEMENT_TYPE_CODE: case DOM_HTML_ELEMENT_TYPE_MAIN: case DOM_HTML_ELEMENT_TYPE_MARK: + case DOM_HTML_ELEMENT_TYPE_NOBR: case DOM_HTML_ELEMENT_TYPE_RUBY: case DOM_HTML_ELEMENT_TYPE_SAMP: case DOM_HTML_ELEMENT_TYPE_ASIDE: @@ -833,10 +854,16 @@ _dom_html_document_create_element_internal( case DOM_HTML_ELEMENT_TYPE_HEADER: case DOM_HTML_ELEMENT_TYPE_HGROUP: case DOM_HTML_ELEMENT_TYPE_STRONG: + case DOM_HTML_ELEMENT_TYPE_SPACER: + case DOM_HTML_ELEMENT_TYPE_STRIKE: + case DOM_HTML_ELEMENT_TYPE_ACRONYM: case DOM_HTML_ELEMENT_TYPE_ADDRESS: case DOM_HTML_ELEMENT_TYPE_ARTICLE: + case DOM_HTML_ELEMENT_TYPE_BGSOUND: + case DOM_HTML_ELEMENT_TYPE_MARQUEE: case DOM_HTML_ELEMENT_TYPE_SECTION: case DOM_HTML_ELEMENT_TYPE_SUMMARY: + case DOM_HTML_ELEMENT_TYPE_NOFRAMES: case DOM_HTML_ELEMENT_TYPE_NOSCRIPT: case DOM_HTML_ELEMENT_TYPE_FIGCAPTION: /* These have no specialisation: use HTMLElement */ diff --git a/src/html/html_tablerow_element.c b/src/html/html_tablerow_element.c index 1a6069b..39fce23 100644 --- a/src/html/html_tablerow_element.c +++ b/src/html/html_tablerow_element.c @@ -212,6 +212,12 @@ dom_exception dom_html_table_row_element_get_row_index( uint32_t count = 0; + if (n == NULL) { + /* Firefox returns -1 for an orphaned table row */ + *row_index = -1; + return DOM_NO_ERR; + } + for (n = n->first_child; n != (dom_node_internal *)table_row; n = n->next) { if(n->type == DOM_ELEMENT_NODE && @@ -297,6 +303,13 @@ dom_exception dom_html_table_row_element_get_section_row_index( dom_node_internal *n = ((dom_node_internal *)table_row)->parent; dom_html_document *doc = (dom_html_document *) ((dom_node_internal *) table_row)->owner; int32_t count = 0; + + if (n == NULL) { + /* Firefox returns -1 for orphaned rows */ + *section_row_index = -1; + return DOM_NO_ERR; + } + for (n = n->first_child; n != (dom_node_internal *)table_row; n = n->next) { if (n->type == DOM_ELEMENT_NODE && diff --git a/test/Makefile b/test/Makefile index 951d1bd..cafa2e6 100644 --- a/test/Makefile +++ b/test/Makefile @@ -5,6 +5,9 @@ TESTCFLAGS := $(TESTCFLAGS) -I$(DIR) -I$(DIR)testutils -Ibindings/xml -Ibindings ALL_XML_TESTS := +WANT_XML_TEST := $(WANT_TEST) +# WANT_XML_TEST := no + # 1: Path to XML file # 2: Fragment C file name # 3: DTD file @@ -12,7 +15,7 @@ ALL_XML_TESTS := define do_xml_test -ifeq ($$(WANT_TEST),yes) +ifeq ($$(WANT_XML_TEST),yes) $(DIR)$2: $(DIR)testcases/tests/$1 $(DIR)transform.pl $(DIR)DOMTSHandler.pm $(VQ)$(ECHO) " XFORM: $1" @@ -48,10 +51,14 @@ endef $(DIR)INDEX: test/Makefile $(VQ)$(ECHO) " INDEX: Making test index" $(Q)$(ECHO) "#test desc dir" > $@ +ifeq ($(WANT_XML_TEST),yes) $(foreach XMLTEST,$(sort $(ALL_XML_TESTS)),$(call write_index,$(XMLTEST))) +endif + $(Q)$(ECHO) "normalize Normalize nodes normalize" > $@ TEST_PREREQS := $(TEST_PREREQS) $(DIR)INDEX +DIR_TEST_ITEMS := $(DIR_TEST_ITEMS) normalize:normalize.c;$(testutils_files) # Include the level 1 core tests $(eval $(call do_xml_suite,level1/core,dom1-interfaces.xml)) # Include level 1 html tests diff --git a/test/data/normalize/INDEX b/test/data/normalize/INDEX new file mode 100644 index 0000000..ea212ae --- /dev/null +++ b/test/data/normalize/INDEX @@ -0,0 +1,4 @@ +# Index file for generic CSS content +# +# Test Description +regression.html OOMing regression check diff --git a/test/data/normalize/regression.html b/test/data/normalize/regression.html new file mode 100644 index 0000000..084ca67 --- /dev/null +++ b/test/data/normalize/regression.html @@ -0,0 +1,13 @@ +<html> + <head> + <style> + ><<>><><<>><>><<<<>><><><<>>>><<<<>><<><<>>><<><>>><>><<><<>><<>><<<<<<>>><>><><><><<>><><<<<>><>>><><><<><>>><><><><>><<<<>>>><>><><<<<<><><<<>>>>>>>>>><>>><<<<<<<<<<>>>>><><<<>>><>>>>>><>><<<<><<><>><<>><><<<>>>>>>>><<<>>>><<><<<<<><><><>><>><<<>><<<><><<<>>>>><><<>>>><>>><<<<<<<<>>><<<>>>><>>>>>><><>><><>><><>>>>><<>><<>>>><<>><><<>><><<><<>><><<<>>><<><><<<><>><<><<>><><<<<<<<<<<>><>><><<>>><> + </style> + </head> +<body> + <script> + document.head.normalize() + </script> +</body> +</html> + diff --git a/test/normalize.c b/test/normalize.c new file mode 100644 index 0000000..1c8a30d --- /dev/null +++ b/test/normalize.c @@ -0,0 +1,190 @@ +/* + * This file is part of libdom test suite. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2024 Daniel Silverstone <dsilvers@netsurf-browser.org> + */ + +#include <stdio.h> +#include <stdbool.h> + +#include "testutils/domts.h" + +#ifdef __linux__ + +#include <sys/time.h> +#include <sys/resource.h> + +#define SOFT_LIMIT (128ul * 1024ul * 1024ul) +#define HARD_LIMIT_MUL (4ul) + +static void limit_ram(void) +{ + struct rlimit lim; + + if (getrlimit(RLIMIT_DATA, &lim) == -1) { + perror("Unable to get RLIMIT_DATA"); + } else { + printf("Initial limits were: soft=%lu hard=%lu\n", + lim.rlim_cur, + lim.rlim_max); + } + + lim.rlim_cur = SOFT_LIMIT; + lim.rlim_max = lim.rlim_cur * HARD_LIMIT_MUL; + + if (setrlimit(RLIMIT_DATA, &lim) == -1) { + perror("Unable to set RLIMIT_DATA"); + } else { + printf("Set limits to: soft=%lu hard=%lu\n", + lim.rlim_cur, + lim.rlim_max); + } +} + +#else + +static void limit_ram(void) +{ + printf("Cannot limit RAM, risky execution proceeds...\n"); +} +#endif + +static dom_exception +get_node(dom_document *doc, dom_string *tag, dom_node **out) +{ + dom_nodelist *nodes; + dom_exception err; + dom_node *docele; + dom_node *ret; + + err = dom_document_get_document_element(doc, &docele); + if (err != DOM_NO_ERR) { + printf("Could not get document node\n"); + return err; + } + err = dom_element_get_elements_by_tag_name(docele, tag, &nodes); + dom_node_unref(docele); + if (err != DOM_NO_ERR) { + printf("Could not enumerate elements for %*s\n", + dom_string_length(tag), + dom_string_data(tag)); + return err; + } + + err = dom_nodelist_item(nodes, 0, &ret); + dom_nodelist_unref(nodes); + if (err != DOM_NO_ERR) { + printf("Could not retrieve element[0] for %*s\n", + dom_string_length(tag), + dom_string_data(tag)); + return err; + } + + *out = ret; + + return DOM_NO_ERR; +} + +static bool test_normalize(const char *fname) +{ + bool outcome = true; + dom_document *doc = NULL; + dom_string *domHEAD = NULL, *domBODY = NULL; + dom_node *html = NULL, *head = NULL, *body = NULL; + dom_exception err = DOM_NO_ERR; + + limit_ram(); + + printf("Loading: %s\n", fname); + doc = load_html(fname, false); + if (doc == NULL) { + printf("Failed to load file\n"); + outcome = false; + goto cleanup; + } + + /* We have an HTML document so we normalise the head and the body */ + + err = dom_string_create((uint8_t *)"HEAD", 4, &domHEAD); + if (err != DOM_NO_ERR) { + printf("Failed to create HEAD string\n"); + outcome = false; + goto cleanup; + } + + err = dom_string_create((uint8_t *)"BODY", 4, &domBODY); + if (err != DOM_NO_ERR) { + printf("Failed to create BODY string\n"); + outcome = false; + goto cleanup; + } + + err = get_node(doc, domHEAD, &head); + if (err != DOM_NO_ERR) { + outcome = false; + goto cleanup; + } + + err = get_node(doc, domBODY, &body); + if (err != DOM_NO_ERR) { + outcome = false; + goto cleanup; + } + + printf("Normalizing head, please wait...\n"); + err = dom_node_normalize(head); + if (err != DOM_NO_ERR) { + printf("Failed to normalize head\n"); + outcome = false; + goto cleanup; + } + printf("Normalizing body, please wait...\n"); + err = dom_node_normalize(body); + if (err != DOM_NO_ERR) { + printf("Failed to normalize body\n"); + outcome = false; + goto cleanup; + } + + printf("All done\n"); + +cleanup: + if (err != DOM_NO_ERR) { + printf("DOM Exception: %d\n", err); + } + if (head != NULL) { + dom_node_unref(head); + } + if (html != NULL) { + dom_node_unref(html); + } + if (domBODY != NULL) { + dom_string_unref(domBODY); + } + if (domHEAD != NULL) { + dom_string_unref(domHEAD); + } + if (doc != NULL) { + dom_node_unref(doc); + doc = NULL; + } + return outcome; +} + +int main(int argc, char **argv) +{ + if (argc != 2) { + fprintf(stderr, "usage: %s inputfile", argv[0]); + return 1; + } + + /* This test simply loads the HTML and normalises the top node */ + if (!test_normalize(argv[1])) { + printf("\nFAILED\n"); + return 1; + } + + printf("\nPASS\n"); + return 0; +}
\ No newline at end of file |