1 files changed, 67 insertions, 26 deletions
diff --git a/test/data/tokeniser2/test2.test b/test/data/tokeniser2/test2.test
index 32c0f99..50c3531 100644
--- a/test/data/tokeniser2/test2.test
+++ b/test/data/tokeniser2/test2.test
@@ -1,32 +1,68 @@
 {"tests": [
 
-{"description":"Doctype without a name",
+{"description":"DOCTYPE without name",
 "input":"<!DOCTYPE>",
-"output":["ParseError", "ParseError", ["DOCTYPE", "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]},
 
-{"description":"Correct doctype without a space before name",
+{"description":"DOCTYPE without space before name",
 "input":"<!DOCTYPEhtml>",
-"output":["ParseError", ["DOCTYPE", "HTML", false]]},
+"output":["ParseError", ["DOCTYPE", "html", null, null, true]]},
 
-{"description":"Incorrect doctype without a space before name",
+{"description":"Incorrect DOCTYPE without a space before name",
 "input":"<!DOCTYPEfoo>",
-"output":["ParseError", ["DOCTYPE", "FOO", true]]},
+"output":["ParseError", ["DOCTYPE", "foo", null, null, true]]},
 
-{"description":"Bogus doctype",
+{"description":"DOCTYPE with publicId",
 "input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
-"output":["ParseError", ["DOCTYPE", "HTML", true]]},
+"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},
+
+{"description":"DOCTYPE with EOF after PUBLIC",
+"input":"<!DOCTYPE html PUBLIC",
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
+
+{"description":"DOCTYPE with EOF after PUBLIC '",
+"input":"<!DOCTYPE html PUBLIC '",
+"output":["ParseError", ["DOCTYPE", "html", "", null, false]]},
+
+{"description":"DOCTYPE with EOF after PUBLIC 'x",
+"input":"<!DOCTYPE html PUBLIC 'x",
+"output":["ParseError", ["DOCTYPE", "html", "x", null, false]]},
+
+{"description":"DOCTYPE with systemId",
+"input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
+"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
+
+{"description":"DOCTYPE with publicId and systemId",
+"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
+"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
+
+{"description":"DOCTYPE with > in double-quoted publicId",
+"input":"<!DOCTYPE html PUBLIC \">x",
+"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]},
+
+{"description":"DOCTYPE with > in single-quoted publicId",
+"input":"<!DOCTYPE html PUBLIC '>x",
+"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]},
+
+{"description":"DOCTYPE with > in double-quoted systemId",
+"input":"<!DOCTYPE html PUBLIC \"foo\" \">x",
+"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]},
+
+{"description":"DOCTYPE with > in single-quoted systemId",
+"input":"<!DOCTYPE html PUBLIC 'foo' '>x",
+"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]},
 
 {"description":"Incomplete doctype",
 "input":"<!DOCTYPE html ",
-"output":["ParseError", ["DOCTYPE", "HTML", true]]},
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
 
 {"description":"Numeric entity representing the NUL character",
 "input":"&#0000;",
-"output":[["Character", "\uFFFD"]]},
+"output":["ParseError", ["Character", "\uFFFD"]]},
 
 {"description":"Hexadecimal entity representing the NUL character",
 "input":"&#x0000;",
-"output":[["Character", "\uFFFD"]]},
+"output":["ParseError", ["Character", "\uFFFD"]]},
 
 {"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
 "input":"&#2225222;",
@@ -36,13 +72,9 @@
 "input":"&#x1010FFFF;",
 "output":["ParseError", ["Character", "\uFFFD"]]},
 
-{"description":"Numeric entity representing a Windows-1252 'codepoint'",
-"input":"&#137;",
-"output":[["Character", "\u2030"]]},
-
-{"description":"Hexadecimal entity representing a Windows-1252 'codepoint'",
-"input":"&#x89;",
-"output":[["Character", "\u2030"]]},
+{"description":"Hexadecimal entity pair representing a surrogate pair",
+"input":"&#xD869;&#xDED6;",
+"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]},
 
 {"description":"Hexadecimal entity with mixed uppercase and lowercase",
 "input":"&#xaBcD;",
@@ -54,19 +86,23 @@
 
 {"description":"Unescaped ampersand in attribute value",
 "input":"<h a='&'>",
-"output":["ParseError", ["StartTag", "h", { "a":"&" }]]},
+"output":[["StartTag", "h", { "a":"&" }]]},
 
 {"description":"StartTag containing <",
 "input":"<a<b>",
-"output":["ParseError", ["StartTag", "a", { }], ["StartTag", "b", { }]]},
+"output":[["StartTag", "a<b", { }]]},
 
 {"description":"Non-void element containing trailing /",
 "input":"<h/>",
-"output":["ParseError", ["StartTag", "h", { }]]},
+"output":[["StartTag","h",{},true]]},
 
 {"description":"Void element with permitted slash",
 "input":"<br/>",
-"output":[["StartTag", "br", { }]]},
+"output":[["StartTag","br",{},true]]},
+
+{"description":"Void element with permitted slash (with attribute)",
+"input":"<br foo='bar'/>",
+"output":[["StartTag","br",{"foo":"bar"},true]]},
 
 {"description":"StartTag containing /",
 "input":"<h/a='b'>",
@@ -96,12 +132,17 @@
 "input":"foo < bar",
 "output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]},
 
-/* jmb -- libjson uses C strings internally, thus the input gets truncated before the
- * data is fed to the input stream (and thus the tokeniser)
 {"description":"Null Byte Replacement",
 "input":"\u0000",
-"output":[["Character", "\ufffd"]]}
-*/
+"output":["ParseError", ["Character", "\ufffd"]]},
+
+{"description":"Comment with dash",
+"input":"<!---x",
+"output":["ParseError", ["Comment", "-x"]]},
+
+{"description":"Entity + newline",
+"input":"\nx\n&gt;\n",
+"output":[["Character","\nx\n>\n"]]}
 
 ]}