summaryrefslogtreecommitdiff
path: root/test/data/tokeniser2/test1.test
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2007-06-23 22:40:25 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2007-06-23 22:40:25 +0000
commit7b30a5520cfb56e651f0eb4da85a3e07747da7dc (patch)
tree5d6281c071c089e1e7a8ae6f8044cecaf6a7db16 /test/data/tokeniser2/test1.test
downloadlibhubbub-7b30a5520cfb56e651f0eb4da85a3e07747da7dc.tar.gz
libhubbub-7b30a5520cfb56e651f0eb4da85a3e07747da7dc.tar.bz2
Import hubbub -- an HTML parsing library.
Plenty of work still to do (like tree generation ;) svn path=/trunk/hubbub/; revision=3359
Diffstat (limited to 'test/data/tokeniser2/test1.test')
-rw-r--r--test/data/tokeniser2/test1.test136
1 files changed, 136 insertions, 0 deletions
diff --git a/test/data/tokeniser2/test1.test b/test/data/tokeniser2/test1.test
new file mode 100644
index 0000000..c12ff5a
--- /dev/null
+++ b/test/data/tokeniser2/test1.test
@@ -0,0 +1,136 @@
+{"tests": [
+
+{"description":"Correct Doctype lowercase",
+"input":"<!DOCTYPE html>",
+"output":[["DOCTYPE", "HTML", false]]},
+
+{"description":"Correct Doctype uppercase",
+"input":"<!DOCTYPE HTML>",
+"output":[["DOCTYPE", "HTML", false]]},
+
+{"description":"Correct Doctype mixed case",
+"input":"<!DOCTYPE HtMl>",
+"output":[["DOCTYPE", "HTML", false]]},
+
+{"description":"Truncated doctype start",
+"input":"<!DOC>",
+"output":["ParseError", ["Comment", "DOC"]]},
+
+{"description":"Doctype in error",
+"input":"<!DOCTYPE foo>",
+"output":[["DOCTYPE", "FOO", true]]},
+
+{"description":"Single Start Tag",
+"input":"<h>",
+"output":[["StartTag", "h", {}]]},
+
+{"description":"Empty end tag",
+"input":"</>",
+"output":["ParseError"]},
+
+{"description":"Empty start tag",
+"input":"<>",
+"output":["ParseError", ["Character", "<>"]]},
+
+{"description":"Start Tag w/attribute",
+"input":"<h a='b'>",
+"output":[["StartTag", "h", {"a":"b"}]]},
+
+{"description":"Start Tag w/attribute no quotes",
+"input":"<h a=b>",
+"output":[["StartTag", "h", {"a":"b"}]]},
+
+{"description":"Start/End Tag",
+"input":"<h></h>",
+"output":[["StartTag", "h", {}], ["EndTag", "h"]]},
+
+{"description":"Two unclosed start tags",
+"input":"<p>One<p>Two",
+"output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]},
+
+{"description":"End Tag w/attribute",
+"input":"<h></h a='b'>",
+"output":[["StartTag", "h", {}], "ParseError", ["EndTag", "h"]]},
+
+{"description":"Multiple atts",
+"input":"<h a='b' c='d'>",
+"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
+
+{"description":"Multiple atts no space",
+"input":"<h a='b'c='d'>",
+"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
+
+{"description":"Repeated attr",
+ "input":"<h a='b' a='d'>",
+ "output":["ParseError", ["StartTag", "h", {"a":"b"}]]},
+
+{"description":"Simple comment",
+ "input":"<!--comment-->",
+ "output":[["Comment", "comment"]]},
+
+{"description":"Comment, Central dash no space",
+ "input":"<!----->",
+ "output":["ParseError", ["Comment", "-"]]},
+
+{"description":"Comment, two central dashes",
+"input":"<!-- --comment -->",
+"output":["ParseError", ["Comment", " --comment "]]},
+
+{"description":"Unfinished comment",
+"input":"<!--comment",
+"output":["ParseError", ["Comment", "comment"]]},
+
+{"description":"Start of a comment",
+"input":"<!-",
+"output":["ParseError", ["Comment", "-"]]},
+
+{"description":"Ampersand only",
+"input":"&",
+"output":["ParseError", ["Character", "&"]]},
+
+{"description":"Unfinished entity",
+"input":"&f",
+"output":["ParseError", ["Character", "&"], ["Character", "f"]]},
+
+{"description":"Ampersand, number sign",
+"input":"&#",
+"output":["ParseError", ["Character", "&"], ["Character", "#"]]},
+
+{"description":"Unfinished numeric entity",
+"input":"&#x",
+"output":["ParseError", ["Character", "&#x"]]},
+
+{"description":"Entity with trailing semicolon (1)",
+"input":"I'm &not;it",
+"output":[["Character","I'm ¬it"]]},
+
+{"description":"Entity with trailing semicolon (2)",
+"input":"I'm &notin;",
+"output":[["Character","I'm ∉"]]},
+
+{"description":"Entity without trailing semicolon (1)",
+"input":"I'm &notit",
+"output":[["Character","I'm "], "ParseError", ["Character", "¬"],
+["Character", "it"]]},
+
+{"description":"Entity without trailing semicolon (2)",
+"input":"I'm &notin",
+"output":[["Character","I'm "], "ParseError", ["Character", "∉"]]},
+
+{"description":"Partial entity match at end of file",
+"input":"I'm &no",
+"output":[["Character","I'm "], "ParseError", ["Character", "&no"]]},
+
+{"description":"ASCII decimal entity",
+"input":"&#0036;",
+"output":[["Character","$"]]},
+
+{"description":"ASCII hexadecimal entity",
+"input":"&#x3f;",
+"output":[["Character","?"]]},
+
+{"description":"Hexadecimal entity in attribute",
+"input":"<h a='&#x3f;'></h>",
+"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]}
+
+]}