diff options
author | Andrew Sidwell <andy@entai.co.uk> | 2008-06-16 05:37:06 +0000 |
---|---|---|
committer | Andrew Sidwell <andy@entai.co.uk> | 2008-06-16 05:37:06 +0000 |
commit | 32386253b3e285406fc5bc5cbd2567b14a18524f (patch) | |
tree | 23824c752b7dd5e60e7a88042b21e2b5e70e5d38 /test/data/tokeniser2/test2.test | |
parent | dfdc3e098d65ebcd7bc0c247d179201f81132ccb (diff) | |
download | libhubbub-32386253b3e285406fc5bc5cbd2567b14a18524f.tar.gz libhubbub-32386253b3e285406fc5bc5cbd2567b14a18524f.tar.bz2 |
Update the testsuite to match the tokeniser. Also, import new html5lib tests for better coverage. Known issue: the tokeniser test stalls using the testrunner script.
svn path=/trunk/hubbub/; revision=4356
Diffstat (limited to 'test/data/tokeniser2/test2.test')
-rw-r--r-- | test/data/tokeniser2/test2.test | 93 |
1 files changed, 67 insertions, 26 deletions
diff --git a/test/data/tokeniser2/test2.test b/test/data/tokeniser2/test2.test index 32c0f99..50c3531 100644 --- a/test/data/tokeniser2/test2.test +++ b/test/data/tokeniser2/test2.test @@ -1,32 +1,68 @@ {"tests": [ -{"description":"Doctype without a name", +{"description":"DOCTYPE without name", "input":"<!DOCTYPE>", -"output":["ParseError", "ParseError", ["DOCTYPE", "", true]]}, +"output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]}, -{"description":"Correct doctype without a space before name", +{"description":"DOCTYPE without space before name", "input":"<!DOCTYPEhtml>", -"output":["ParseError", ["DOCTYPE", "HTML", false]]}, +"output":["ParseError", ["DOCTYPE", "html", null, null, true]]}, -{"description":"Incorrect doctype without a space before name", +{"description":"Incorrect DOCTYPE without a space before name", "input":"<!DOCTYPEfoo>", -"output":["ParseError", ["DOCTYPE", "FOO", true]]}, +"output":["ParseError", ["DOCTYPE", "foo", null, null, true]]}, -{"description":"Bogus doctype", +{"description":"DOCTYPE with publicId", "input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">", -"output":["ParseError", ["DOCTYPE", "HTML", true]]}, +"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]}, + +{"description":"DOCTYPE with EOF after PUBLIC", +"input":"<!DOCTYPE html PUBLIC", +"output":["ParseError", ["DOCTYPE", "html", null, null, false]]}, + +{"description":"DOCTYPE with EOF after PUBLIC '", +"input":"<!DOCTYPE html PUBLIC '", +"output":["ParseError", ["DOCTYPE", "html", "", null, false]]}, + +{"description":"DOCTYPE with EOF after PUBLIC 'x", +"input":"<!DOCTYPE html PUBLIC 'x", +"output":["ParseError", ["DOCTYPE", "html", "x", null, false]]}, + +{"description":"DOCTYPE with systemId", +"input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">", +"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]}, + +{"description":"DOCTYPE with publicId and systemId", +"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">", +"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]}, + +{"description":"DOCTYPE with > in double-quoted publicId", +"input":"<!DOCTYPE html PUBLIC \">x", +"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]}, + +{"description":"DOCTYPE with > in single-quoted publicId", +"input":"<!DOCTYPE html PUBLIC '>x", +"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]}, + +{"description":"DOCTYPE with > in double-quoted systemId", +"input":"<!DOCTYPE html PUBLIC \"foo\" \">x", +"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]}, + +{"description":"DOCTYPE with > in single-quoted systemId", +"input":"<!DOCTYPE html PUBLIC 'foo' '>x", +"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]}, {"description":"Incomplete doctype", "input":"<!DOCTYPE html ", -"output":["ParseError", ["DOCTYPE", "HTML", true]]}, +"output":["ParseError", ["DOCTYPE", "html", null, null, false]]}, {"description":"Numeric entity representing the NUL character", "input":"�", -"output":[["Character", "\uFFFD"]]}, +"output":["ParseError", ["Character", "\uFFFD"]]}, {"description":"Hexadecimal entity representing the NUL character", "input":"�", -"output":[["Character", "\uFFFD"]]}, +"output":["ParseError", ["Character", "\uFFFD"]]}, {"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)", "input":"�", @@ -36,13 +72,9 @@ "input":"�", "output":["ParseError", ["Character", "\uFFFD"]]}, -{"description":"Numeric entity representing a Windows-1252 'codepoint'", -"input":"‰", -"output":[["Character", "\u2030"]]}, - -{"description":"Hexadecimal entity representing a Windows-1252 'codepoint'", -"input":"‰", -"output":[["Character", "\u2030"]]}, +{"description":"Hexadecimal entity pair representing a surrogate pair", +"input":"��", +"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]}, {"description":"Hexadecimal entity with mixed uppercase and lowercase", "input":"ꯍ", @@ -54,19 +86,23 @@ {"description":"Unescaped ampersand in attribute value", "input":"<h a='&'>", -"output":["ParseError", ["StartTag", "h", { "a":"&" }]]}, +"output":[["StartTag", "h", { "a":"&" }]]}, {"description":"StartTag containing <", "input":"<a<b>", -"output":["ParseError", ["StartTag", "a", { }], ["StartTag", "b", { }]]}, +"output":[["StartTag", "a<b", { }]]}, {"description":"Non-void element containing trailing /", "input":"<h/>", -"output":["ParseError", ["StartTag", "h", { }]]}, +"output":[["StartTag","h",{},true]]}, {"description":"Void element with permitted slash", "input":"<br/>", -"output":[["StartTag", "br", { }]]}, +"output":[["StartTag","br",{},true]]}, + +{"description":"Void element with permitted slash (with attribute)", +"input":"<br foo='bar'/>", +"output":[["StartTag","br",{"foo":"bar"},true]]}, {"description":"StartTag containing /", "input":"<h/a='b'>", @@ -96,12 +132,17 @@ "input":"foo < bar", "output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]}, -/* jmb -- libjson uses C strings internally, thus the input gets truncated before the - * data is fed to the input stream (and thus the tokeniser) {"description":"Null Byte Replacement", "input":"\u0000", -"output":[["Character", "\ufffd"]]} -*/ +"output":["ParseError", ["Character", "\ufffd"]]}, + +{"description":"Comment with dash", +"input":"<!---x", +"output":["ParseError", ["Comment", "-x"]]}, + +{"description":"Entity + newline", +"input":"\nx\n>\n", +"output":[["Character","\nx\n>\n"]]} ]} |