summaryrefslogtreecommitdiff
path: root/test/data/tokeniser2/test2.test
diff options
context:
space:
mode:
Diffstat (limited to 'test/data/tokeniser2/test2.test')
-rw-r--r--test/data/tokeniser2/test2.test93
1 files changed, 67 insertions, 26 deletions
diff --git a/test/data/tokeniser2/test2.test b/test/data/tokeniser2/test2.test
index 32c0f99..50c3531 100644
--- a/test/data/tokeniser2/test2.test
+++ b/test/data/tokeniser2/test2.test
@@ -1,32 +1,68 @@
{"tests": [
-{"description":"Doctype without a name",
+{"description":"DOCTYPE without name",
"input":"<!DOCTYPE>",
-"output":["ParseError", "ParseError", ["DOCTYPE", "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]},
-{"description":"Correct doctype without a space before name",
+{"description":"DOCTYPE without space before name",
"input":"<!DOCTYPEhtml>",
-"output":["ParseError", ["DOCTYPE", "HTML", false]]},
+"output":["ParseError", ["DOCTYPE", "html", null, null, true]]},
-{"description":"Incorrect doctype without a space before name",
+{"description":"Incorrect DOCTYPE without a space before name",
"input":"<!DOCTYPEfoo>",
-"output":["ParseError", ["DOCTYPE", "FOO", true]]},
+"output":["ParseError", ["DOCTYPE", "foo", null, null, true]]},
-{"description":"Bogus doctype",
+{"description":"DOCTYPE with publicId",
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
-"output":["ParseError", ["DOCTYPE", "HTML", true]]},
+"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},
+
+{"description":"DOCTYPE with EOF after PUBLIC",
+"input":"<!DOCTYPE html PUBLIC",
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
+
+{"description":"DOCTYPE with EOF after PUBLIC '",
+"input":"<!DOCTYPE html PUBLIC '",
+"output":["ParseError", ["DOCTYPE", "html", "", null, false]]},
+
+{"description":"DOCTYPE with EOF after PUBLIC 'x",
+"input":"<!DOCTYPE html PUBLIC 'x",
+"output":["ParseError", ["DOCTYPE", "html", "x", null, false]]},
+
+{"description":"DOCTYPE with systemId",
+"input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
+"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
+
+{"description":"DOCTYPE with publicId and systemId",
+"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
+"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
+
+{"description":"DOCTYPE with > in double-quoted publicId",
+"input":"<!DOCTYPE html PUBLIC \">x",
+"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]},
+
+{"description":"DOCTYPE with > in single-quoted publicId",
+"input":"<!DOCTYPE html PUBLIC '>x",
+"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]},
+
+{"description":"DOCTYPE with > in double-quoted systemId",
+"input":"<!DOCTYPE html PUBLIC \"foo\" \">x",
+"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]},
+
+{"description":"DOCTYPE with > in single-quoted systemId",
+"input":"<!DOCTYPE html PUBLIC 'foo' '>x",
+"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]},
{"description":"Incomplete doctype",
"input":"<!DOCTYPE html ",
-"output":["ParseError", ["DOCTYPE", "HTML", true]]},
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Numeric entity representing the NUL character",
"input":"&#0000;",
-"output":[["Character", "\uFFFD"]]},
+"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"Hexadecimal entity representing the NUL character",
"input":"&#x0000;",
-"output":[["Character", "\uFFFD"]]},
+"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
"input":"&#2225222;",
@@ -36,13 +72,9 @@
"input":"&#x1010FFFF;",
"output":["ParseError", ["Character", "\uFFFD"]]},
-{"description":"Numeric entity representing a Windows-1252 'codepoint'",
-"input":"&#137;",
-"output":[["Character", "\u2030"]]},
-
-{"description":"Hexadecimal entity representing a Windows-1252 'codepoint'",
-"input":"&#x89;",
-"output":[["Character", "\u2030"]]},
+{"description":"Hexadecimal entity pair representing a surrogate pair",
+"input":"&#xD869;&#xDED6;",
+"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]},
{"description":"Hexadecimal entity with mixed uppercase and lowercase",
"input":"&#xaBcD;",
@@ -54,19 +86,23 @@
{"description":"Unescaped ampersand in attribute value",
"input":"<h a='&'>",
-"output":["ParseError", ["StartTag", "h", { "a":"&" }]]},
+"output":[["StartTag", "h", { "a":"&" }]]},
{"description":"StartTag containing <",
"input":"<a<b>",
-"output":["ParseError", ["StartTag", "a", { }], ["StartTag", "b", { }]]},
+"output":[["StartTag", "a<b", { }]]},
{"description":"Non-void element containing trailing /",
"input":"<h/>",
-"output":["ParseError", ["StartTag", "h", { }]]},
+"output":[["StartTag","h",{},true]]},
{"description":"Void element with permitted slash",
"input":"<br/>",
-"output":[["StartTag", "br", { }]]},
+"output":[["StartTag","br",{},true]]},
+
+{"description":"Void element with permitted slash (with attribute)",
+"input":"<br foo='bar'/>",
+"output":[["StartTag","br",{"foo":"bar"},true]]},
{"description":"StartTag containing /",
"input":"<h/a='b'>",
@@ -96,12 +132,17 @@
"input":"foo < bar",
"output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]},
-/* jmb -- libjson uses C strings internally, thus the input gets truncated before the
- * data is fed to the input stream (and thus the tokeniser)
{"description":"Null Byte Replacement",
"input":"\u0000",
-"output":[["Character", "\ufffd"]]}
-*/
+"output":["ParseError", ["Character", "\ufffd"]]},
+
+{"description":"Comment with dash",
+"input":"<!---x",
+"output":["ParseError", ["Comment", "-x"]]},
+
+{"description":"Entity + newline",
+"input":"\nx\n&gt;\n",
+"output":[["Character","\nx\n>\n"]]}
]}