diff options
author | John Mark Bell <jmb@netsurf-browser.org> | 2007-06-23 22:40:25 +0000 |
---|---|---|
committer | John Mark Bell <jmb@netsurf-browser.org> | 2007-06-23 22:40:25 +0000 |
commit | 7b30a5520cfb56e651f0eb4da85a3e07747da7dc (patch) | |
tree | 5d6281c071c089e1e7a8ae6f8044cecaf6a7db16 /test/data/tokeniser2/test1.test | |
download | libhubbub-7b30a5520cfb56e651f0eb4da85a3e07747da7dc.tar.gz libhubbub-7b30a5520cfb56e651f0eb4da85a3e07747da7dc.tar.bz2 |
Import hubbub -- an HTML parsing library.
Plenty of work still to do (like tree generation ;)
svn path=/trunk/hubbub/; revision=3359
Diffstat (limited to 'test/data/tokeniser2/test1.test')
-rw-r--r-- | test/data/tokeniser2/test1.test | 136 |
1 files changed, 136 insertions, 0 deletions
diff --git a/test/data/tokeniser2/test1.test b/test/data/tokeniser2/test1.test new file mode 100644 index 0000000..c12ff5a --- /dev/null +++ b/test/data/tokeniser2/test1.test @@ -0,0 +1,136 @@ +{"tests": [ + +{"description":"Correct Doctype lowercase", +"input":"<!DOCTYPE html>", +"output":[["DOCTYPE", "HTML", false]]}, + +{"description":"Correct Doctype uppercase", +"input":"<!DOCTYPE HTML>", +"output":[["DOCTYPE", "HTML", false]]}, + +{"description":"Correct Doctype mixed case", +"input":"<!DOCTYPE HtMl>", +"output":[["DOCTYPE", "HTML", false]]}, + +{"description":"Truncated doctype start", +"input":"<!DOC>", +"output":["ParseError", ["Comment", "DOC"]]}, + +{"description":"Doctype in error", +"input":"<!DOCTYPE foo>", +"output":[["DOCTYPE", "FOO", true]]}, + +{"description":"Single Start Tag", +"input":"<h>", +"output":[["StartTag", "h", {}]]}, + +{"description":"Empty end tag", +"input":"</>", +"output":["ParseError"]}, + +{"description":"Empty start tag", +"input":"<>", +"output":["ParseError", ["Character", "<>"]]}, + +{"description":"Start Tag w/attribute", +"input":"<h a='b'>", +"output":[["StartTag", "h", {"a":"b"}]]}, + +{"description":"Start Tag w/attribute no quotes", +"input":"<h a=b>", +"output":[["StartTag", "h", {"a":"b"}]]}, + +{"description":"Start/End Tag", +"input":"<h></h>", +"output":[["StartTag", "h", {}], ["EndTag", "h"]]}, + +{"description":"Two unclosed start tags", +"input":"<p>One<p>Two", +"output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]}, + +{"description":"End Tag w/attribute", +"input":"<h></h a='b'>", +"output":[["StartTag", "h", {}], "ParseError", ["EndTag", "h"]]}, + +{"description":"Multiple atts", +"input":"<h a='b' c='d'>", +"output":[["StartTag", "h", {"a":"b", "c":"d"}]]}, + +{"description":"Multiple atts no space", +"input":"<h a='b'c='d'>", +"output":[["StartTag", "h", {"a":"b", "c":"d"}]]}, + +{"description":"Repeated attr", + "input":"<h a='b' a='d'>", + "output":["ParseError", ["StartTag", "h", {"a":"b"}]]}, + +{"description":"Simple comment", + "input":"<!--comment-->", + "output":[["Comment", "comment"]]}, + +{"description":"Comment, Central dash no space", + "input":"<!----->", + "output":["ParseError", ["Comment", "-"]]}, + +{"description":"Comment, two central dashes", +"input":"<!-- --comment -->", +"output":["ParseError", ["Comment", " --comment "]]}, + +{"description":"Unfinished comment", +"input":"<!--comment", +"output":["ParseError", ["Comment", "comment"]]}, + +{"description":"Start of a comment", +"input":"<!-", +"output":["ParseError", ["Comment", "-"]]}, + +{"description":"Ampersand only", +"input":"&", +"output":["ParseError", ["Character", "&"]]}, + +{"description":"Unfinished entity", +"input":"&f", +"output":["ParseError", ["Character", "&"], ["Character", "f"]]}, + +{"description":"Ampersand, number sign", +"input":"&#", +"output":["ParseError", ["Character", "&"], ["Character", "#"]]}, + +{"description":"Unfinished numeric entity", +"input":"&#x", +"output":["ParseError", ["Character", "&#x"]]}, + +{"description":"Entity with trailing semicolon (1)", +"input":"I'm ¬it", +"output":[["Character","I'm ¬it"]]}, + +{"description":"Entity with trailing semicolon (2)", +"input":"I'm ∉", +"output":[["Character","I'm ∉"]]}, + +{"description":"Entity without trailing semicolon (1)", +"input":"I'm ¬it", +"output":[["Character","I'm "], "ParseError", ["Character", "¬"], +["Character", "it"]]}, + +{"description":"Entity without trailing semicolon (2)", +"input":"I'm ¬in", +"output":[["Character","I'm "], "ParseError", ["Character", "∉"]]}, + +{"description":"Partial entity match at end of file", +"input":"I'm &no", +"output":[["Character","I'm "], "ParseError", ["Character", "&no"]]}, + +{"description":"ASCII decimal entity", +"input":"$", +"output":[["Character","$"]]}, + +{"description":"ASCII hexadecimal entity", +"input":"?", +"output":[["Character","?"]]}, + +{"description":"Hexadecimal entity in attribute", +"input":"<h a='?'></h>", +"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]} + +]} |