summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/dom/html/html_elements.h9
-rw-r--r--src/core/node.c6
-rw-r--r--src/html/html_document.c27
-rw-r--r--src/html/html_tablerow_element.c13
-rw-r--r--test/Makefile9
-rw-r--r--test/data/normalize/INDEX4
-rw-r--r--test/data/normalize/regression.html13
-rw-r--r--test/normalize.c190
8 files changed, 267 insertions, 4 deletions
diff --git a/include/dom/html/html_elements.h b/include/dom/html/html_elements.h
index 5b54bbe..6e954c5 100644
--- a/include/dom/html/html_elements.h
+++ b/include/dom/html/html_elements.h
@@ -12,6 +12,7 @@
DOM_HTML_ELEMENT_STRINGS_ENTRY(_UNKNOWN) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(A) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(ABBR) \
+ DOM_HTML_ELEMENT_STRINGS_ENTRY(ACRONYM) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(ADDRESS) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(APPLET) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(AREA) \
@@ -23,6 +24,8 @@
DOM_HTML_ELEMENT_STRINGS_ENTRY(BASEFONT) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(BDI) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(BDO) \
+ DOM_HTML_ELEMENT_STRINGS_ENTRY(BGSOUND) \
+ DOM_HTML_ELEMENT_STRINGS_ENTRY(BIG) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(BLOCKQUOTE) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(BODY) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(BR) \
@@ -81,11 +84,14 @@
DOM_HTML_ELEMENT_STRINGS_ENTRY(MAIN) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(MAP) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(MARK) \
+ DOM_HTML_ELEMENT_STRINGS_ENTRY(MARQUEE) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(MENU) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(MENUITEM) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(META) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(METER) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(NAV) \
+ DOM_HTML_ELEMENT_STRINGS_ENTRY(NOBR) \
+ DOM_HTML_ELEMENT_STRINGS_ENTRY(NOFRAMES) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(NOSCRIPT) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(OBJECT) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(OL) \
@@ -108,7 +114,9 @@
DOM_HTML_ELEMENT_STRINGS_ENTRY(SELECT) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(SMALL) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(SOURCE) \
+ DOM_HTML_ELEMENT_STRINGS_ENTRY(SPACER) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(SPAN) \
+ DOM_HTML_ELEMENT_STRINGS_ENTRY(STRIKE) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(STRONG) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(STYLE) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(SUB) \
@@ -126,6 +134,7 @@
DOM_HTML_ELEMENT_STRINGS_ENTRY(TITLE) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(TR) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(TRACK) \
+ DOM_HTML_ELEMENT_STRINGS_ENTRY(TT) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(U) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(UL) \
DOM_HTML_ELEMENT_STRINGS_ENTRY(VAR) \
diff --git a/src/core/node.c b/src/core/node.c
index 1218742..da179c3 100644
--- a/src/core/node.c
+++ b/src/core/node.c
@@ -2241,16 +2241,16 @@ dom_exception _dom_merge_adjacent_text(dom_node_internal *p,
assert(p->type == DOM_TEXT_NODE);
assert(n->type == DOM_TEXT_NODE);
- err = dom_text_get_whole_text(n, &str);
+ err = dom_characterdata_get_data(n, &str);
if (err != DOM_NO_ERR)
return err;
err = dom_characterdata_append_data(p, str);
+ dom_string_unref(str);
+
if (err != DOM_NO_ERR)
return err;
- dom_string_unref(str);
-
return DOM_NO_ERR;
}
diff --git a/src/html/html_document.c b/src/html/html_document.c
index 5471f4f..cf3c25d 100644
--- a/src/html/html_document.c
+++ b/src/html/html_document.c
@@ -319,6 +319,8 @@ static inline dom_html_element_type _dom_html_document_get_element_type(
RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_RP)
else
RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_RT)
+ else
+ RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_TT)
break;
case 3:
RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_DIV)
@@ -354,6 +356,8 @@ static inline dom_html_element_type _dom_html_document_get_element_type(
RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_BDO)
else
RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_WBR)
+ else
+ RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_BIG)
break;
case 4:
RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_META)
@@ -395,6 +399,8 @@ static inline dom_html_element_type _dom_html_document_get_element_type(
RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_BASE)
else
RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_TIME)
+ else
+ RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_NOBR)
break;
case 5:
RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_INPUT)
@@ -469,6 +475,10 @@ static inline dom_html_element_type _dom_html_document_get_element_type(
RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_SOURCE)
else
RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_STRONG)
+ else
+ RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_SPACER)
+ else
+ RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_STRIKE)
break;
case 7:
RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_CAPTION)
@@ -486,6 +496,12 @@ static inline dom_html_element_type _dom_html_document_get_element_type(
RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_SECTION)
else
RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_SUMMARY)
+ else
+ RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_ACRONYM)
+ else
+ RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_BGSOUND)
+ else
+ RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_MARQUEE)
break;
case 8:
RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_TEXTAREA)
@@ -509,6 +525,8 @@ static inline dom_html_element_type _dom_html_document_get_element_type(
RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_PROGRESS)
else
RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_TEMPLATE)
+ else
+ RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_NOFRAMES)
break;
case 10:
RETURN_IF_MATCH(upper, DOM_HTML_ELEMENT_TYPE_BLOCKQUOTE)
@@ -809,8 +827,10 @@ _dom_html_document_create_element_internal(
case DOM_HTML_ELEMENT_TYPE_EM:
case DOM_HTML_ELEMENT_TYPE_RP:
case DOM_HTML_ELEMENT_TYPE_RT:
+ case DOM_HTML_ELEMENT_TYPE_TT:
case DOM_HTML_ELEMENT_TYPE_BDI:
case DOM_HTML_ELEMENT_TYPE_BDO:
+ case DOM_HTML_ELEMENT_TYPE_BIG:
case DOM_HTML_ELEMENT_TYPE_DFN:
case DOM_HTML_ELEMENT_TYPE_KBD:
case DOM_HTML_ELEMENT_TYPE_NAV:
@@ -823,6 +843,7 @@ _dom_html_document_create_element_internal(
case DOM_HTML_ELEMENT_TYPE_CODE:
case DOM_HTML_ELEMENT_TYPE_MAIN:
case DOM_HTML_ELEMENT_TYPE_MARK:
+ case DOM_HTML_ELEMENT_TYPE_NOBR:
case DOM_HTML_ELEMENT_TYPE_RUBY:
case DOM_HTML_ELEMENT_TYPE_SAMP:
case DOM_HTML_ELEMENT_TYPE_ASIDE:
@@ -833,10 +854,16 @@ _dom_html_document_create_element_internal(
case DOM_HTML_ELEMENT_TYPE_HEADER:
case DOM_HTML_ELEMENT_TYPE_HGROUP:
case DOM_HTML_ELEMENT_TYPE_STRONG:
+ case DOM_HTML_ELEMENT_TYPE_SPACER:
+ case DOM_HTML_ELEMENT_TYPE_STRIKE:
+ case DOM_HTML_ELEMENT_TYPE_ACRONYM:
case DOM_HTML_ELEMENT_TYPE_ADDRESS:
case DOM_HTML_ELEMENT_TYPE_ARTICLE:
+ case DOM_HTML_ELEMENT_TYPE_BGSOUND:
+ case DOM_HTML_ELEMENT_TYPE_MARQUEE:
case DOM_HTML_ELEMENT_TYPE_SECTION:
case DOM_HTML_ELEMENT_TYPE_SUMMARY:
+ case DOM_HTML_ELEMENT_TYPE_NOFRAMES:
case DOM_HTML_ELEMENT_TYPE_NOSCRIPT:
case DOM_HTML_ELEMENT_TYPE_FIGCAPTION:
/* These have no specialisation: use HTMLElement */
diff --git a/src/html/html_tablerow_element.c b/src/html/html_tablerow_element.c
index 1a6069b..39fce23 100644
--- a/src/html/html_tablerow_element.c
+++ b/src/html/html_tablerow_element.c
@@ -212,6 +212,12 @@ dom_exception dom_html_table_row_element_get_row_index(
uint32_t count = 0;
+ if (n == NULL) {
+ /* Firefox returns -1 for an orphaned table row */
+ *row_index = -1;
+ return DOM_NO_ERR;
+ }
+
for (n = n->first_child; n != (dom_node_internal *)table_row;
n = n->next) {
if(n->type == DOM_ELEMENT_NODE &&
@@ -297,6 +303,13 @@ dom_exception dom_html_table_row_element_get_section_row_index(
dom_node_internal *n = ((dom_node_internal *)table_row)->parent;
dom_html_document *doc = (dom_html_document *) ((dom_node_internal *) table_row)->owner;
int32_t count = 0;
+
+ if (n == NULL) {
+ /* Firefox returns -1 for orphaned rows */
+ *section_row_index = -1;
+ return DOM_NO_ERR;
+ }
+
for (n = n->first_child; n != (dom_node_internal *)table_row;
n = n->next) {
if (n->type == DOM_ELEMENT_NODE &&
diff --git a/test/Makefile b/test/Makefile
index 951d1bd..cafa2e6 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -5,6 +5,9 @@ TESTCFLAGS := $(TESTCFLAGS) -I$(DIR) -I$(DIR)testutils -Ibindings/xml -Ibindings
ALL_XML_TESTS :=
+WANT_XML_TEST := $(WANT_TEST)
+# WANT_XML_TEST := no
+
# 1: Path to XML file
# 2: Fragment C file name
# 3: DTD file
@@ -12,7 +15,7 @@ ALL_XML_TESTS :=
define do_xml_test
-ifeq ($$(WANT_TEST),yes)
+ifeq ($$(WANT_XML_TEST),yes)
$(DIR)$2: $(DIR)testcases/tests/$1 $(DIR)transform.pl $(DIR)DOMTSHandler.pm
$(VQ)$(ECHO) " XFORM: $1"
@@ -48,10 +51,14 @@ endef
$(DIR)INDEX: test/Makefile
$(VQ)$(ECHO) " INDEX: Making test index"
$(Q)$(ECHO) "#test desc dir" > $@
+ifeq ($(WANT_XML_TEST),yes)
$(foreach XMLTEST,$(sort $(ALL_XML_TESTS)),$(call write_index,$(XMLTEST)))
+endif
+ $(Q)$(ECHO) "normalize Normalize nodes normalize" > $@
TEST_PREREQS := $(TEST_PREREQS) $(DIR)INDEX
+DIR_TEST_ITEMS := $(DIR_TEST_ITEMS) normalize:normalize.c;$(testutils_files)
# Include the level 1 core tests
$(eval $(call do_xml_suite,level1/core,dom1-interfaces.xml))
# Include level 1 html tests
diff --git a/test/data/normalize/INDEX b/test/data/normalize/INDEX
new file mode 100644
index 0000000..ea212ae
--- /dev/null
+++ b/test/data/normalize/INDEX
@@ -0,0 +1,4 @@
+# Index file for generic CSS content
+#
+# Test Description
+regression.html OOMing regression check
diff --git a/test/data/normalize/regression.html b/test/data/normalize/regression.html
new file mode 100644
index 0000000..084ca67
--- /dev/null
+++ b/test/data/normalize/regression.html
@@ -0,0 +1,13 @@
+<html>
+ <head>
+ <style>
+ ><<>><><<>><>><<<<>><><><<>>>><<<<>><<><<>>><<><>>><>><<><<>><<>><<<<<<>>><>><><><><<>><><<<<>><>>><><><<><>>><><><><>><<<<>>>><>><><<<<<><><<<>>>>>>>>>><>>><<<<<<<<<<>>>>><><<<>>><>>>>>><>><<<<><<><>><<>><><<<>>>>>>>><<<>>>><<><<<<<><><><>><>><<<>><<<><><<<>>>>><><<>>>><>>><<<<<<<<>>><<<>>>><>>>>>><><>><><>><><>>>>><<>><<>>>><<>><><<>><><<><<>><><<<>>><<><><<<><>><<><<>><><<<<<<<<<<>><>><><<>>><>
+ </style>
+ </head>
+<body>
+ <script>
+ document.head.normalize()
+ </script>
+</body>
+</html>
+
diff --git a/test/normalize.c b/test/normalize.c
new file mode 100644
index 0000000..1c8a30d
--- /dev/null
+++ b/test/normalize.c
@@ -0,0 +1,190 @@
+/*
+ * This file is part of libdom test suite.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2024 Daniel Silverstone <dsilvers@netsurf-browser.org>
+ */
+
+#include <stdio.h>
+#include <stdbool.h>
+
+#include "testutils/domts.h"
+
+#ifdef __linux__
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#define SOFT_LIMIT (128ul * 1024ul * 1024ul)
+#define HARD_LIMIT_MUL (4ul)
+
+static void limit_ram(void)
+{
+ struct rlimit lim;
+
+ if (getrlimit(RLIMIT_DATA, &lim) == -1) {
+ perror("Unable to get RLIMIT_DATA");
+ } else {
+ printf("Initial limits were: soft=%lu hard=%lu\n",
+ lim.rlim_cur,
+ lim.rlim_max);
+ }
+
+ lim.rlim_cur = SOFT_LIMIT;
+ lim.rlim_max = lim.rlim_cur * HARD_LIMIT_MUL;
+
+ if (setrlimit(RLIMIT_DATA, &lim) == -1) {
+ perror("Unable to set RLIMIT_DATA");
+ } else {
+ printf("Set limits to: soft=%lu hard=%lu\n",
+ lim.rlim_cur,
+ lim.rlim_max);
+ }
+}
+
+#else
+
+static void limit_ram(void)
+{
+ printf("Cannot limit RAM, risky execution proceeds...\n");
+}
+#endif
+
+static dom_exception
+get_node(dom_document *doc, dom_string *tag, dom_node **out)
+{
+ dom_nodelist *nodes;
+ dom_exception err;
+ dom_node *docele;
+ dom_node *ret;
+
+ err = dom_document_get_document_element(doc, &docele);
+ if (err != DOM_NO_ERR) {
+ printf("Could not get document node\n");
+ return err;
+ }
+ err = dom_element_get_elements_by_tag_name(docele, tag, &nodes);
+ dom_node_unref(docele);
+ if (err != DOM_NO_ERR) {
+ printf("Could not enumerate elements for %*s\n",
+ dom_string_length(tag),
+ dom_string_data(tag));
+ return err;
+ }
+
+ err = dom_nodelist_item(nodes, 0, &ret);
+ dom_nodelist_unref(nodes);
+ if (err != DOM_NO_ERR) {
+ printf("Could not retrieve element[0] for %*s\n",
+ dom_string_length(tag),
+ dom_string_data(tag));
+ return err;
+ }
+
+ *out = ret;
+
+ return DOM_NO_ERR;
+}
+
+static bool test_normalize(const char *fname)
+{
+ bool outcome = true;
+ dom_document *doc = NULL;
+ dom_string *domHEAD = NULL, *domBODY = NULL;
+ dom_node *html = NULL, *head = NULL, *body = NULL;
+ dom_exception err = DOM_NO_ERR;
+
+ limit_ram();
+
+ printf("Loading: %s\n", fname);
+ doc = load_html(fname, false);
+ if (doc == NULL) {
+ printf("Failed to load file\n");
+ outcome = false;
+ goto cleanup;
+ }
+
+ /* We have an HTML document so we normalise the head and the body */
+
+ err = dom_string_create((uint8_t *)"HEAD", 4, &domHEAD);
+ if (err != DOM_NO_ERR) {
+ printf("Failed to create HEAD string\n");
+ outcome = false;
+ goto cleanup;
+ }
+
+ err = dom_string_create((uint8_t *)"BODY", 4, &domBODY);
+ if (err != DOM_NO_ERR) {
+ printf("Failed to create BODY string\n");
+ outcome = false;
+ goto cleanup;
+ }
+
+ err = get_node(doc, domHEAD, &head);
+ if (err != DOM_NO_ERR) {
+ outcome = false;
+ goto cleanup;
+ }
+
+ err = get_node(doc, domBODY, &body);
+ if (err != DOM_NO_ERR) {
+ outcome = false;
+ goto cleanup;
+ }
+
+ printf("Normalizing head, please wait...\n");
+ err = dom_node_normalize(head);
+ if (err != DOM_NO_ERR) {
+ printf("Failed to normalize head\n");
+ outcome = false;
+ goto cleanup;
+ }
+ printf("Normalizing body, please wait...\n");
+ err = dom_node_normalize(body);
+ if (err != DOM_NO_ERR) {
+ printf("Failed to normalize body\n");
+ outcome = false;
+ goto cleanup;
+ }
+
+ printf("All done\n");
+
+cleanup:
+ if (err != DOM_NO_ERR) {
+ printf("DOM Exception: %d\n", err);
+ }
+ if (head != NULL) {
+ dom_node_unref(head);
+ }
+ if (html != NULL) {
+ dom_node_unref(html);
+ }
+ if (domBODY != NULL) {
+ dom_string_unref(domBODY);
+ }
+ if (domHEAD != NULL) {
+ dom_string_unref(domHEAD);
+ }
+ if (doc != NULL) {
+ dom_node_unref(doc);
+ doc = NULL;
+ }
+ return outcome;
+}
+
+int main(int argc, char **argv)
+{
+ if (argc != 2) {
+ fprintf(stderr, "usage: %s inputfile", argv[0]);
+ return 1;
+ }
+
+ /* This test simply loads the HTML and normalises the top node */
+ if (!test_normalize(argv[1])) {
+ printf("\nFAILED\n");
+ return 1;
+ }
+
+ printf("\nPASS\n");
+ return 0;
+} \ No newline at end of file