diff options
Diffstat (limited to 'test/data/tokeniser2/test2.test')
-rw-r--r-- | test/data/tokeniser2/test2.test | 93 |
1 files changed, 67 insertions, 26 deletions
diff --git a/test/data/tokeniser2/test2.test b/test/data/tokeniser2/test2.test index 32c0f99..50c3531 100644 --- a/test/data/tokeniser2/test2.test +++ b/test/data/tokeniser2/test2.test @@ -1,32 +1,68 @@ {"tests": [ -{"description":"Doctype without a name", +{"description":"DOCTYPE without name", "input":"<!DOCTYPE>", -"output":["ParseError", "ParseError", ["DOCTYPE", "", true]]}, +"output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]}, -{"description":"Correct doctype without a space before name", +{"description":"DOCTYPE without space before name", "input":"<!DOCTYPEhtml>", -"output":["ParseError", ["DOCTYPE", "HTML", false]]}, +"output":["ParseError", ["DOCTYPE", "html", null, null, true]]}, -{"description":"Incorrect doctype without a space before name", +{"description":"Incorrect DOCTYPE without a space before name", "input":"<!DOCTYPEfoo>", -"output":["ParseError", ["DOCTYPE", "FOO", true]]}, +"output":["ParseError", ["DOCTYPE", "foo", null, null, true]]}, -{"description":"Bogus doctype", +{"description":"DOCTYPE with publicId", "input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">", -"output":["ParseError", ["DOCTYPE", "HTML", true]]}, +"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]}, + +{"description":"DOCTYPE with EOF after PUBLIC", +"input":"<!DOCTYPE html PUBLIC", +"output":["ParseError", ["DOCTYPE", "html", null, null, false]]}, + +{"description":"DOCTYPE with EOF after PUBLIC '", +"input":"<!DOCTYPE html PUBLIC '", +"output":["ParseError", ["DOCTYPE", "html", "", null, false]]}, + +{"description":"DOCTYPE with EOF after PUBLIC 'x", +"input":"<!DOCTYPE html PUBLIC 'x", +"output":["ParseError", ["DOCTYPE", "html", "x", null, false]]}, + +{"description":"DOCTYPE with systemId", +"input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">", +"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]}, + +{"description":"DOCTYPE with publicId and systemId", +"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">", +"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]}, + +{"description":"DOCTYPE with > in double-quoted publicId", +"input":"<!DOCTYPE html PUBLIC \">x", +"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]}, + +{"description":"DOCTYPE with > in single-quoted publicId", +"input":"<!DOCTYPE html PUBLIC '>x", +"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]}, + +{"description":"DOCTYPE with > in double-quoted systemId", +"input":"<!DOCTYPE html PUBLIC \"foo\" \">x", +"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]}, + +{"description":"DOCTYPE with > in single-quoted systemId", +"input":"<!DOCTYPE html PUBLIC 'foo' '>x", +"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]}, {"description":"Incomplete doctype", "input":"<!DOCTYPE html ", -"output":["ParseError", ["DOCTYPE", "HTML", true]]}, +"output":["ParseError", ["DOCTYPE", "html", null, null, false]]}, {"description":"Numeric entity representing the NUL character", "input":"�", -"output":[["Character", "\uFFFD"]]}, +"output":["ParseError", ["Character", "\uFFFD"]]}, {"description":"Hexadecimal entity representing the NUL character", "input":"�", -"output":[["Character", "\uFFFD"]]}, +"output":["ParseError", ["Character", "\uFFFD"]]}, {"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)", "input":"�", @@ -36,13 +72,9 @@ "input":"�", "output":["ParseError", ["Character", "\uFFFD"]]}, -{"description":"Numeric entity representing a Windows-1252 'codepoint'", -"input":"‰", -"output":[["Character", "\u2030"]]}, - -{"description":"Hexadecimal entity representing a Windows-1252 'codepoint'", -"input":"‰", -"output":[["Character", "\u2030"]]}, +{"description":"Hexadecimal entity pair representing a surrogate pair", +"input":"��", +"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]}, {"description":"Hexadecimal entity with mixed uppercase and lowercase", "input":"ꯍ", @@ -54,19 +86,23 @@ {"description":"Unescaped ampersand in attribute value", "input":"<h a='&'>", -"output":["ParseError", ["StartTag", "h", { "a":"&" }]]}, +"output":[["StartTag", "h", { "a":"&" }]]}, {"description":"StartTag containing <", "input":"<a<b>", -"output":["ParseError", ["StartTag", "a", { }], ["StartTag", "b", { }]]}, +"output":[["StartTag", "a<b", { }]]}, {"description":"Non-void element containing trailing /", "input":"<h/>", -"output":["ParseError", ["StartTag", "h", { }]]}, +"output":[["StartTag","h",{},true]]}, {"description":"Void element with permitted slash", "input":"<br/>", -"output":[["StartTag", "br", { }]]}, +"output":[["StartTag","br",{},true]]}, + +{"description":"Void element with permitted slash (with attribute)", +"input":"<br foo='bar'/>", +"output":[["StartTag","br",{"foo":"bar"},true]]}, {"description":"StartTag containing /", "input":"<h/a='b'>", @@ -96,12 +132,17 @@ "input":"foo < bar", "output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]}, -/* jmb -- libjson uses C strings internally, thus the input gets truncated before the - * data is fed to the input stream (and thus the tokeniser) {"description":"Null Byte Replacement", "input":"\u0000", -"output":[["Character", "\ufffd"]]} -*/ +"output":["ParseError", ["Character", "\ufffd"]]}, + +{"description":"Comment with dash", +"input":"<!---x", +"output":["ParseError", ["Comment", "-x"]]}, + +{"description":"Entity + newline", +"input":"\nx\n>\n", +"output":[["Character","\nx\n>\n"]]} ]} |