summaryrefslogtreecommitdiff
path: root/test/data/tokeniser2
diff options
context:
space:
mode:
authorAndrew Sidwell <andy@entai.co.uk>2008-06-16 05:37:06 +0000
committerAndrew Sidwell <andy@entai.co.uk>2008-06-16 05:37:06 +0000
commit32386253b3e285406fc5bc5cbd2567b14a18524f (patch)
tree23824c752b7dd5e60e7a88042b21e2b5e70e5d38 /test/data/tokeniser2
parentdfdc3e098d65ebcd7bc0c247d179201f81132ccb (diff)
downloadlibhubbub-32386253b3e285406fc5bc5cbd2567b14a18524f.tar.gz
libhubbub-32386253b3e285406fc5bc5cbd2567b14a18524f.tar.bz2
Update the testsuite to match the tokeniser. Also, import new html5lib tests for better coverage. Known issue: the tokeniser test stalls using the testrunner script.
svn path=/trunk/hubbub/; revision=4356
Diffstat (limited to 'test/data/tokeniser2')
-rw-r--r--test/data/tokeniser2/INDEX6
-rw-r--r--test/data/tokeniser2/contentModelFlags.test33
-rw-r--r--test/data/tokeniser2/entities.test2339
-rw-r--r--test/data/tokeniser2/escapeFlag.test33
-rw-r--r--test/data/tokeniser2/test1.test68
-rw-r--r--test/data/tokeniser2/test2.test93
-rw-r--r--test/data/tokeniser2/test3.test367
-rw-r--r--test/data/tokeniser2/test4.test289
8 files changed, 3176 insertions, 52 deletions
diff --git a/test/data/tokeniser2/INDEX b/test/data/tokeniser2/INDEX
index 8539aeb..00c5e01 100644
--- a/test/data/tokeniser2/INDEX
+++ b/test/data/tokeniser2/INDEX
@@ -4,4 +4,8 @@
test1.test html5lib tests (part 1)
test2.test html5lib tests (part 2)
-contentModelFlags.test html5lib content model tests \ No newline at end of file
+test3.test html5lib tests (part 3)
+test4.test html5lib tests (part 4)
+contentModelFlags.test html5lib content model tests
+entities.test html5lib entity tests
+escapeFlag.test html5lib escape flag tests
diff --git a/test/data/tokeniser2/contentModelFlags.test b/test/data/tokeniser2/contentModelFlags.test
index 84d41fc..b4f450a 100644
--- a/test/data/tokeniser2/contentModelFlags.test
+++ b/test/data/tokeniser2/contentModelFlags.test
@@ -2,34 +2,49 @@
{"description":"PLAINTEXT content model flag",
"contentModelFlags":["PLAINTEXT"],
+"lastStartTag":"plaintext",
"input":"<head>&body;",
"output":[["Character", "<head>&body;"]]},
{"description":"End tag closing RCDATA or CDATA",
"contentModelFlags":["RCDATA", "CDATA"],
-"lastStartTag":"bar",
-"input":"foo</bar>",
-"output":[["Character", "foo"], ["EndTag", "bar"]]},
+"lastStartTag":"pre",
+"input":"foo</pre>",
+"output":[["Character", "foo"], ["EndTag", "pre"]]},
+
+{"description":"End tag closing RCDATA or CDATA (case-insensitivity)",
+"contentModelFlags":["RCDATA", "CDATA"],
+"lastStartTag":"pre",
+"input":"foo</pRe>",
+"output":[["Character", "foo"], ["EndTag", "pre"]]},
{"description":"End tag with incorrect name in RCDATA or CDATA",
"contentModelFlags":["RCDATA", "CDATA"],
-"lastStartTag":"baz",
-"input":"</foo>bar</baz>",
-"output":["ParseError", ["Character", "</foo>bar"], ["EndTag", "baz"]]},
+"lastStartTag":"pre",
+"input":"</foo>bar</pre>",
+"output":[["Character", "</foo>bar"], ["EndTag", "pre"]]},
+
+{"description":"End tag with incorrect name in RCDATA or CDATA (starting like correct name)",
+"contentModelFlags":["RCDATA", "CDATA"],
+"lastStartTag":"pre",
+"input":"</foo>bar</preaar>",
+"output":[["Character", "</foo>bar</preaar>"]]},
{"description":"End tag closing RCDATA or CDATA, switching back to PCDATA",
"contentModelFlags":["RCDATA", "CDATA"],
-"lastStartTag":"bar",
-"input":"foo</bar></baz>",
-"output":[["Character", "foo"], ["EndTag", "bar"], ["EndTag", "baz"]]},
+"lastStartTag":"pre",
+"input":"foo</pre></baz>",
+"output":[["Character", "foo"], ["EndTag", "pre"], ["EndTag", "baz"]]},
{"description":"CDATA w/ something looking like an entity",
"contentModelFlags":["CDATA"],
+"lastStartTag":"pre",
"input":"&foo;",
"output":[["Character", "&foo;"]]},
{"description":"RCDATA w/ an entity",
"contentModelFlags":["RCDATA"],
+"lastStartTag":"textarea",
"input":"&lt;",
"output":[["Character", "<"]]}
diff --git a/test/data/tokeniser2/entities.test b/test/data/tokeniser2/entities.test
new file mode 100644
index 0000000..8b8d352
--- /dev/null
+++ b/test/data/tokeniser2/entities.test
@@ -0,0 +1,2339 @@
+{"tests": [
+
+{"description": "Undefined named entity in attribute value ending in semicolon and whose name starts with a known entity name.",
+"input":"<h a='&noti;'>",
+"output": ["ParseError", ["StartTag", "h", {"a": "&noti;"}]]},
+
+{"description": "Named entity: AElig with a semi-colon.",
+"input":"&AElig;",
+"output": [["Character", "\u00C6"]]},
+
+{"description": "Named entity: AElig without a semi-colon.",
+"input":"&AElig",
+"output": ["ParseError", ["Character", "\u00C6"]]},
+
+{"description": "Named entity: AMP with a semi-colon.",
+"input":"&AMP;",
+"output": [["Character", "\u0026"]]},
+
+{"description": "Named entity: AMP without a semi-colon.",
+"input":"&AMP",
+"output": ["ParseError", ["Character", "\u0026"]]},
+
+{"description": "Named entity: Aacute with a semi-colon.",
+"input":"&Aacute;",
+"output": [["Character", "\u00C1"]]},
+
+{"description": "Named entity: Aacute without a semi-colon.",
+"input":"&Aacute",
+"output": ["ParseError", ["Character", "\u00C1"]]},
+
+{"description": "Named entity: Acirc with a semi-colon.",
+"input":"&Acirc;",
+"output": [["Character", "\u00C2"]]},
+
+{"description": "Named entity: Acirc without a semi-colon.",
+"input":"&Acirc",
+"output": ["ParseError", ["Character", "\u00C2"]]},
+
+{"description": "Named entity: Agrave with a semi-colon.",
+"input":"&Agrave;",
+"output": [["Character", "\u00C0"]]},
+
+{"description": "Named entity: Agrave without a semi-colon.",
+"input":"&Agrave",
+"output": ["ParseError", ["Character", "\u00C0"]]},
+
+{"description": "Named entity: Alpha with a semi-colon.",
+"input":"&Alpha;",
+"output": [["Character", "\u0391"]]},
+
+{"description": "Named entity: Aring with a semi-colon.",
+"input":"&Aring;",
+"output": [["Character", "\u00C5"]]},
+
+{"description": "Named entity: Aring without a semi-colon.",
+"input":"&Aring",
+"output": ["ParseError", ["Character", "\u00C5"]]},
+
+{"description": "Named entity: Atilde with a semi-colon.",
+"input":"&Atilde;",
+"output": [["Character", "\u00C3"]]},
+
+{"description": "Named entity: Atilde without a semi-colon.",
+"input":"&Atilde",
+"output": ["ParseError", ["Character", "\u00C3"]]},
+
+{"description": "Named entity: Auml with a semi-colon.",
+"input":"&Auml;",
+"output": [["Character", "\u00C4"]]},
+
+{"description": "Named entity: Auml without a semi-colon.",
+"input":"&Auml",
+"output": ["ParseError", ["Character", "\u00C4"]]},
+
+{"description": "Named entity: Beta with a semi-colon.",
+"input":"&Beta;",
+"output": [["Character", "\u0392"]]},
+
+{"description": "Named entity: COPY with a semi-colon.",
+"input":"&COPY;",
+"output": [["Character", "\u00A9"]]},
+
+{"description": "Named entity: COPY without a semi-colon.",
+"input":"&COPY",
+"output": ["ParseError", ["Character", "\u00A9"]]},
+
+{"description": "Named entity: Ccedil with a semi-colon.",
+"input":"&Ccedil;",
+"output": [["Character", "\u00C7"]]},
+
+{"description": "Named entity: Ccedil without a semi-colon.",
+"input":"&Ccedil",
+"output": ["ParseError", ["Character", "\u00C7"]]},
+
+{"description": "Named entity: Chi with a semi-colon.",
+"input":"&Chi;",
+"output": [["Character", "\u03A7"]]},
+
+{"description": "Named entity: Dagger with a semi-colon.",
+"input":"&Dagger;",
+"output": [["Character", "\u2021"]]},
+
+{"description": "Named entity: Delta with a semi-colon.",
+"input":"&Delta;",
+"output": [["Character", "\u0394"]]},
+
+{"description": "Named entity: ETH with a semi-colon.",
+"input":"&ETH;",
+"output": [["Character", "\u00D0"]]},
+
+{"description": "Named entity: ETH without a semi-colon.",
+"input":"&ETH",
+"output": ["ParseError", ["Character", "\u00D0"]]},
+
+{"description": "Named entity: Eacute with a semi-colon.",
+"input":"&Eacute;",
+"output": [["Character", "\u00C9"]]},
+
+{"description": "Named entity: Eacute without a semi-colon.",
+"input":"&Eacute",
+"output": ["ParseError", ["Character", "\u00C9"]]},
+
+{"description": "Named entity: Ecirc with a semi-colon.",
+"input":"&Ecirc;",
+"output": [["Character", "\u00CA"]]},
+
+{"description": "Named entity: Ecirc without a semi-colon.",
+"input":"&Ecirc",
+"output": ["ParseError", ["Character", "\u00CA"]]},
+
+{"description": "Named entity: Egrave with a semi-colon.",
+"input":"&Egrave;",
+"output": [["Character", "\u00C8"]]},
+
+{"description": "Named entity: Egrave without a semi-colon.",
+"input":"&Egrave",
+"output": ["ParseError", ["Character", "\u00C8"]]},
+
+{"description": "Named entity: Epsilon with a semi-colon.",
+"input":"&Epsilon;",
+"output": [["Character", "\u0395"]]},
+
+{"description": "Named entity: Eta with a semi-colon.",
+"input":"&Eta;",
+"output": [["Character", "\u0397"]]},
+
+{"description": "Named entity: Euml with a semi-colon.",
+"input":"&Euml;",
+"output": [["Character", "\u00CB"]]},
+
+{"description": "Named entity: Euml without a semi-colon.",
+"input":"&Euml",
+"output": ["ParseError", ["Character", "\u00CB"]]},
+
+{"description": "Named entity: GT with a semi-colon.",
+"input":"&GT;",
+"output": [["Character", "\u003E"]]},
+
+{"description": "Named entity: GT without a semi-colon.",
+"input":"&GT",
+"output": ["ParseError", ["Character", "\u003E"]]},
+
+{"description": "Named entity: Gamma with a semi-colon.",
+"input":"&Gamma;",
+"output": [["Character", "\u0393"]]},
+
+{"description": "Named entity: Iacute with a semi-colon.",
+"input":"&Iacute;",
+"output": [["Character", "\u00CD"]]},
+
+{"description": "Named entity: Iacute without a semi-colon.",
+"input":"&Iacute",
+"output": ["ParseError", ["Character", "\u00CD"]]},
+
+{"description": "Named entity: Icirc with a semi-colon.",
+"input":"&Icirc;",
+"output": [["Character", "\u00CE"]]},
+
+{"description": "Named entity: Icirc without a semi-colon.",
+"input":"&Icirc",
+"output": ["ParseError", ["Character", "\u00CE"]]},
+
+{"description": "Named entity: Igrave with a semi-colon.",
+"input":"&Igrave;",
+"output": [["Character", "\u00CC"]]},
+
+{"description": "Named entity: Igrave without a semi-colon.",
+"input":"&Igrave",
+"output": ["ParseError", ["Character", "\u00CC"]]},
+
+{"description": "Named entity: Iota with a semi-colon.",
+"input":"&Iota;",
+"output": [["Character", "\u0399"]]},
+
+{"description": "Named entity: Iuml with a semi-colon.",
+"input":"&Iuml;",
+"output": [["Character", "\u00CF"]]},
+
+{"description": "Named entity: Iuml without a semi-colon.",
+"input":"&Iuml",
+"output": ["ParseError", ["Character", "\u00CF"]]},
+
+{"description": "Named entity: Kappa with a semi-colon.",
+"input":"&Kappa;",
+"output": [["Character", "\u039A"]]},
+
+{"description": "Named entity: LT with a semi-colon.",
+"input":"&LT;",
+"output": [["Character", "\u003C"]]},
+
+{"description": "Named entity: LT without a semi-colon.",
+"input":"&LT",
+"output": ["ParseError", ["Character", "\u003C"]]},
+
+{"description": "Named entity: Lambda with a semi-colon.",
+"input":"&Lambda;",
+"output": [["Character", "\u039B"]]},
+
+{"description": "Named entity: Mu with a semi-colon.",
+"input":"&Mu;",
+"output": [["Character", "\u039C"]]},
+
+{"description": "Named entity: Ntilde with a semi-colon.",
+"input":"&Ntilde;",
+"output": [["Character", "\u00D1"]]},
+
+{"description": "Named entity: Ntilde without a semi-colon.",
+"input":"&Ntilde",
+"output": ["ParseError", ["Character", "\u00D1"]]},
+
+{"description": "Named entity: Nu with a semi-colon.",
+"input":"&Nu;",
+"output": [["Character", "\u039D"]]},
+
+{"description": "Named entity: OElig with a semi-colon.",
+"input":"&OElig;",
+"output": [["Character", "\u0152"]]},
+
+{"description": "Named entity: Oacute with a semi-colon.",
+"input":"&Oacute;",
+"output": [["Character", "\u00D3"]]},
+
+{"description": "Named entity: Oacute without a semi-colon.",
+"input":"&Oacute",
+"output": ["ParseError", ["Character", "\u00D3"]]},
+
+{"description": "Named entity: Ocirc with a semi-colon.",
+"input":"&Ocirc;",
+"output": [["Character", "\u00D4"]]},
+
+{"description": "Named entity: Ocirc without a semi-colon.",
+"input":"&Ocirc",
+"output": ["ParseError", ["Character", "\u00D4"]]},
+
+{"description": "Named entity: Ograve with a semi-colon.",
+"input":"&Ograve;",
+"output": [["Character", "\u00D2"]]},
+
+{"description": "Named entity: Ograve without a semi-colon.",
+"input":"&Ograve",
+"output": ["ParseError", ["Character", "\u00D2"]]},
+
+{"description": "Named entity: Omega with a semi-colon.",
+"input":"&Omega;",
+"output": [["Character", "\u03A9"]]},
+
+{"description": "Named entity: Omicron with a semi-colon.",
+"input":"&Omicron;",
+"output": [["Character", "\u039F"]]},
+
+{"description": "Named entity: Oslash with a semi-colon.",
+"input":"&Oslash;",
+"output": [["Character", "\u00D8"]]},
+
+{"description": "Named entity: Oslash without a semi-colon.",
+"input":"&Oslash",
+"output": ["ParseError", ["Character", "\u00D8"]]},
+
+{"description": "Named entity: Otilde with a semi-colon.",
+"input":"&Otilde;",
+"output": [["Character", "\u00D5"]]},
+
+{"description": "Named entity: Otilde without a semi-colon.",
+"input":"&Otilde",
+"output": ["ParseError", ["Character", "\u00D5"]]},
+
+{"description": "Named entity: Ouml with a semi-colon.",
+"input":"&Ouml;",
+"output": [["Character", "\u00D6"]]},
+
+{"description": "Named entity: Ouml without a semi-colon.",
+"input":"&Ouml",
+"output": ["ParseError", ["Character", "\u00D6"]]},
+
+{"description": "Named entity: Phi with a semi-colon.",
+"input":"&Phi;",
+"output": [["Character", "\u03A6"]]},
+
+{"description": "Named entity: Pi with a semi-colon.",
+"input":"&Pi;",
+"output": [["Character", "\u03A0"]]},
+
+{"description": "Named entity: Prime with a semi-colon.",
+"input":"&Prime;",
+"output": [["Character", "\u2033"]]},
+
+{"description": "Named entity: Psi with a semi-colon.",
+"input":"&Psi;",
+"output": [["Character", "\u03A8"]]},
+
+{"description": "Named entity: QUOT with a semi-colon.",
+"input":"&QUOT;",
+"output": [["Character", "\u0022"]]},
+
+{"description": "Named entity: QUOT without a semi-colon.",
+"input":"&QUOT",
+"output": ["ParseError", ["Character", "\u0022"]]},
+
+{"description": "Named entity: REG with a semi-colon.",
+"input":"&REG;",
+"output": [["Character", "\u00AE"]]},
+
+{"description": "Named entity: REG without a semi-colon.",
+"input":"&REG",
+"output": ["ParseError", ["Character", "\u00AE"]]},
+
+{"description": "Named entity: Rho with a semi-colon.",
+"input":"&Rho;",
+"output": [["Character", "\u03A1"]]},
+
+{"description": "Named entity: Scaron with a semi-colon.",
+"input":"&Scaron;",
+"output": [["Character", "\u0160"]]},
+
+{"description": "Named entity: Sigma with a semi-colon.",
+"input":"&Sigma;",
+"output": [["Character", "\u03A3"]]},
+
+{"description": "Named entity: THORN with a semi-colon.",
+"input":"&THORN;",
+"output": [["Character", "\u00DE"]]},
+
+{"description": "Named entity: THORN without a semi-colon.",
+"input":"&THORN",
+"output": ["ParseError", ["Character", "\u00DE"]]},
+
+{"description": "Named entity: TRADE with a semi-colon.",
+"input":"&TRADE;",
+"output": [["Character", "\u2122"]]},
+
+{"description": "Named entity: Tau with a semi-colon.",
+"input":"&Tau;",
+"output": [["Character", "\u03A4"]]},
+
+{"description": "Named entity: Theta with a semi-colon.",
+"input":"&Theta;",
+"output": [["Character", "\u0398"]]},
+
+{"description": "Named entity: Uacute with a semi-colon.",
+"input":"&Uacute;",
+"output": [["Character", "\u00DA"]]},
+
+{"description": "Named entity: Uacute without a semi-colon.",
+"input":"&Uacute",
+"output": ["ParseError", ["Character", "\u00DA"]]},
+
+{"description": "Named entity: Ucirc with a semi-colon.",
+"input":"&Ucirc;",
+"output": [["Character", "\u00DB"]]},
+
+{"description": "Named entity: Ucirc without a semi-colon.",
+"input":"&Ucirc",
+"output": ["ParseError", ["Character", "\u00DB"]]},
+
+{"description": "Named entity: Ugrave with a semi-colon.",
+"input":"&Ugrave;",
+"output": [["Character", "\u00D9"]]},
+
+{"description": "Named entity: Ugrave without a semi-colon.",
+"input":"&Ugrave",
+"output": ["ParseError", ["Character", "\u00D9"]]},
+
+{"description": "Named entity: Upsilon with a semi-colon.",
+"input":"&Upsilon;",
+"output": [["Character", "\u03A5"]]},
+
+{"description": "Named entity: Uuml with a semi-colon.",
+"input":"&Uuml;",
+"output": [["Character", "\u00DC"]]},
+
+{"description": "Named entity: Uuml without a semi-colon.",
+"input":"&Uuml",
+"output": ["ParseError", ["Character", "\u00DC"]]},
+
+{"description": "Named entity: Xi with a semi-colon.",
+"input":"&Xi;",
+"output": [["Character", "\u039E"]]},
+
+{"description": "Named entity: Yacute with a semi-colon.",
+"input":"&Yacute;",
+"output": [["Character", "\u00DD"]]},
+
+{"description": "Named entity: Yacute without a semi-colon.",
+"input":"&Yacute",
+"output": ["ParseError", ["Character", "\u00DD"]]},
+
+{"description": "Named entity: Yuml with a semi-colon.",
+"input":"&Yuml;",
+"output": [["Character", "\u0178"]]},
+
+{"description": "Named entity: Zeta with a semi-colon.",
+"input":"&Zeta;",
+"output": [["Character", "\u0396"]]},
+
+{"description": "Named entity: aacute with a semi-colon.",
+"input":"&aacute;",
+"output": [["Character", "\u00E1"]]},
+
+{"description": "Named entity: aacute without a semi-colon.",
+"input":"&aacute",
+"output": ["ParseError", ["Character", "\u00E1"]]},
+
+{"description": "Named entity: acirc with a semi-colon.",
+"input":"&acirc;",
+"output": [["Character", "\u00E2"]]},
+
+{"description": "Named entity: acirc without a semi-colon.",
+"input":"&acirc",
+"output": ["ParseError", ["Character", "\u00E2"]]},
+
+{"description": "Named entity: acute with a semi-colon.",
+"input":"&acute;",
+"output": [["Character", "\u00B4"]]},
+
+{"description": "Named entity: acute without a semi-colon.",
+"input":"&acute",
+"output": ["ParseError", ["Character", "\u00B4"]]},
+
+{"description": "Named entity: aelig with a semi-colon.",
+"input":"&aelig;",
+"output": [["Character", "\u00E6"]]},
+
+{"description": "Named entity: aelig without a semi-colon.",
+"input":"&aelig",
+"output": ["ParseError", ["Character", "\u00E6"]]},
+
+{"description": "Named entity: agrave with a semi-colon.",
+"input":"&agrave;",
+"output": [["Character", "\u00E0"]]},
+
+{"description": "Named entity: agrave without a semi-colon.",
+"input":"&agrave",
+"output": ["ParseError", ["Character", "\u00E0"]]},
+
+{"description": "Named entity: alefsym with a semi-colon.",
+"input":"&alefsym;",
+"output": [["Character", "\u2135"]]},
+
+{"description": "Named entity: alpha with a semi-colon.",
+"input":"&alpha;",
+"output": [["Character", "\u03B1"]]},
+
+{"description": "Named entity: amp with a semi-colon.",
+"input":"&amp;",
+"output": [["Character", "\u0026"]]},
+
+{"description": "Named entity: amp without a semi-colon.",
+"input":"&amp",
+"output": ["ParseError", ["Character", "\u0026"]]},
+
+{"description": "Named entity: and with a semi-colon.",
+"input":"&and;",
+"output": [["Character", "\u2227"]]},
+
+{"description": "Named entity: ang with a semi-colon.",
+"input":"&ang;",
+"output": [["Character", "\u2220"]]},
+
+{"description": "Named entity: apos with a semi-colon.",
+"input":"&apos;",
+"output": [["Character", "\u0027"]]},
+
+{"description": "Named entity: aring with a semi-colon.",
+"input":"&aring;",
+"output": [["Character", "\u00E5"]]},
+
+{"description": "Named entity: aring without a semi-colon.",
+"input":"&aring",
+"output": ["ParseError", ["Character", "\u00E5"]]},
+
+{"description": "Named entity: asymp with a semi-colon.",
+"input":"&asymp;",
+"output": [["Character", "\u2248"]]},
+
+{"description": "Named entity: atilde with a semi-colon.",
+"input":"&atilde;",
+"output": [["Character", "\u00E3"]]},
+
+{"description": "Named entity: atilde without a semi-colon.",
+"input":"&atilde",
+"output": ["ParseError", ["Character", "\u00E3"]]},
+
+{"description": "Named entity: auml with a semi-colon.",
+"input":"&auml;",
+"output": [["Character", "\u00E4"]]},
+
+{"description": "Named entity: auml without a semi-colon.",
+"input":"&auml",
+"output": ["ParseError", ["Character", "\u00E4"]]},
+
+{"description": "Named entity: bdquo with a semi-colon.",
+"input":"&bdquo;",
+"output": [["Character", "\u201E"]]},
+
+{"description": "Named entity: beta with a semi-colon.",
+"input":"&beta;",
+"output": [["Character", "\u03B2"]]},
+
+{"description": "Named entity: brvbar with a semi-colon.",
+"input":"&brvbar;",
+"output": [["Character", "\u00A6"]]},
+
+{"description": "Named entity: brvbar without a semi-colon.",
+"input":"&brvbar",
+"output": ["ParseError", ["Character", "\u00A6"]]},
+
+{"description": "Named entity: bull with a semi-colon.",
+"input":"&bull;",
+"output": [["Character", "\u2022"]]},
+
+{"description": "Named entity: cap with a semi-colon.",
+"input":"&cap;",
+"output": [["Character", "\u2229"]]},
+
+{"description": "Named entity: ccedil with a semi-colon.",
+"input":"&ccedil;",
+"output": [["Character", "\u00E7"]]},
+
+{"description": "Named entity: ccedil without a semi-colon.",
+"input":"&ccedil",
+"output": ["ParseError", ["Character", "\u00E7"]]},
+
+{"description": "Named entity: cedil with a semi-colon.",
+"input":"&cedil;",
+"output": [["Character", "\u00B8"]]},
+
+{"description": "Named entity: cedil without a semi-colon.",
+"input":"&cedil",
+"output": ["ParseError", ["Character", "\u00B8"]]},
+
+{"description": "Named entity: cent with a semi-colon.",
+"input":"&cent;",
+"output": [["Character", "\u00A2"]]},
+
+{"description": "Named entity: cent without a semi-colon.",
+"input":"&cent",
+"output": ["ParseError", ["Character", "\u00A2"]]},
+
+{"description": "Named entity: chi with a semi-colon.",
+"input":"&chi;",
+"output": [["Character", "\u03C7"]]},
+
+{"description": "Named entity: circ with a semi-colon.",
+"input":"&circ;",
+"output": [["Character", "\u02C6"]]},
+
+{"description": "Named entity: clubs with a semi-colon.",
+"input":"&clubs;",
+"output": [["Character", "\u2663"]]},
+
+{"description": "Named entity: cong with a semi-colon.",
+"input":"&cong;",
+"output": [["Character", "\u2245"]]},
+
+{"description": "Named entity: copy with a semi-colon.",
+"input":"&copy;",
+"output": [["Character", "\u00A9"]]},
+
+{"description": "Named entity: copy without a semi-colon.",
+"input":"&copy",
+"output": ["ParseError", ["Character", "\u00A9"]]},
+
+{"description": "Named entity: crarr with a semi-colon.",
+"input":"&crarr;",
+"output": [["Character", "\u21B5"]]},
+
+{"description": "Named entity: cup with a semi-colon.",
+"input":"&cup;",
+"output": [["Character", "\u222A"]]},
+
+{"description": "Named entity: curren with a semi-colon.",
+"input":"&curren;",
+"output": [["Character", "\u00A4"]]},
+
+{"description": "Named entity: curren without a semi-colon.",
+"input":"&curren",
+"output": ["ParseError", ["Character", "\u00A4"]]},
+
+{"description": "Named entity: dArr with a semi-colon.",
+"input":"&dArr;",
+"output": [["Character", "\u21D3"]]},
+
+{"description": "Named entity: dagger with a semi-colon.",
+"input":"&dagger;",
+"output": [["Character", "\u2020"]]},
+
+{"description": "Named entity: darr with a semi-colon.",
+"input":"&darr;",
+"output": [["Character", "\u2193"]]},
+
+{"description": "Named entity: deg with a semi-colon.",
+"input":"&deg;",
+"output": [["Character", "\u00B0"]]},
+
+{"description": "Named entity: deg without a semi-colon.",
+"input":"&deg",
+"output": ["ParseError", ["Character", "\u00B0"]]},
+
+{"description": "Named entity: delta with a semi-colon.",
+"input":"&delta;",
+"output": [["Character", "\u03B4"]]},
+
+{"description": "Named entity: diams with a semi-colon.",
+"input":"&diams;",
+"output": [["Character", "\u2666"]]},
+
+{"description": "Named entity: divide with a semi-colon.",
+"input":"&divide;",
+"output": [["Character", "\u00F7"]]},
+
+{"description": "Named entity: divide without a semi-colon.",
+"input":"&divide",
+"output": ["ParseError", ["Character", "\u00F7"]]},
+
+{"description": "Named entity: eacute with a semi-colon.",
+"input":"&eacute;",
+"output": [["Character", "\u00E9"]]},
+
+{"description": "Named entity: eacute without a semi-colon.",
+"input":"&eacute",
+"output": ["ParseError", ["Character", "\u00E9"]]},
+
+{"description": "Named entity: ecirc with a semi-colon.",
+"input":"&ecirc;",
+"output": [["Character", "\u00EA"]]},
+
+{"description": "Named entity: ecirc without a semi-colon.",
+"input":"&ecirc",
+"output": ["ParseError", ["Character", "\u00EA"]]},
+
+{"description": "Named entity: egrave with a semi-colon.",
+"input":"&egrave;",
+"output": [["Character", "\u00E8"]]},
+
+{"description": "Named entity: egrave without a semi-colon.",
+"input":"&egrave",
+"output": ["ParseError", ["Character", "\u00E8"]]},
+
+{"description": "Named entity: empty with a semi-colon.",
+"input":"&empty;",
+"output": [["Character", "\u2205"]]},
+
+{"description": "Named entity: emsp with a semi-colon.",
+"input":"&emsp;",
+"output": [["Character", "\u2003"]]},
+
+{"description": "Named entity: ensp with a semi-colon.",
+"input":"&ensp;",
+"output": [["Character", "\u2002"]]},
+
+{"description": "Named entity: epsilon with a semi-colon.",
+"input":"&epsilon;",
+"output": [["Character", "\u03B5"]]},
+
+{"description": "Named entity: equiv with a semi-colon.",
+"input":"&equiv;",
+"output": [["Character", "\u2261"]]},
+
+{"description": "Named entity: eta with a semi-colon.",
+"input":"&eta;",
+"output": [["Character", "\u03B7"]]},
+
+{"description": "Named entity: eth with a semi-colon.",
+"input":"&eth;",
+"output": [["Character", "\u00F0"]]},
+
+{"description": "Named entity: eth without a semi-colon.",
+"input":"&eth",
+"output": ["ParseError", ["Character", "\u00F0"]]},
+
+{"description": "Named entity: euml with a semi-colon.",
+"input":"&euml;",
+"output": [["Character", "\u00EB"]]},
+
+{"description": "Named entity: euml without a semi-colon.",
+"input":"&euml",
+"output": ["ParseError", ["Character", "\u00EB"]]},
+
+{"description": "Named entity: euro with a semi-colon.",
+"input":"&euro;",
+"output": [["Character", "\u20AC"]]},
+
+{"description": "Named entity: exist with a semi-colon.",
+"input":"&exist;",
+"output": [["Character", "\u2203"]]},
+
+{"description": "Named entity: fnof with a semi-colon.",
+"input":"&fnof;",
+"output": [["Character", "\u0192"]]},
+
+{"description": "Named entity: forall with a semi-colon.",
+"input":"&forall;",
+"output": [["Character", "\u2200"]]},
+
+{"description": "Named entity: frac12 with a semi-colon.",
+"input":"&frac12;",
+"output": [["Character", "\u00BD"]]},
+
+{"description": "Named entity: frac12 without a semi-colon.",
+"input":"&frac12",
+"output": ["ParseError", ["Character", "\u00BD"]]},
+
+{"description": "Named entity: frac14 with a semi-colon.",
+"input":"&frac14;",
+"output": [["Character", "\u00BC"]]},
+
+{"description": "Named entity: frac14 without a semi-colon.",
+"input":"&frac14",
+"output": ["ParseError", ["Character", "\u00BC"]]},
+
+{"description": "Named entity: frac34 with a semi-colon.",
+"input":"&frac34;",
+"output": [["Character", "\u00BE"]]},
+
+{"description": "Named entity: frac34 without a semi-colon.",
+"input":"&frac34",
+"output": ["ParseError", ["Character", "\u00BE"]]},
+
+{"description": "Named entity: frasl with a semi-colon.",
+"input":"&frasl;",
+"output": [["Character", "\u2044"]]},
+
+{"description": "Named entity: gamma with a semi-colon.",
+"input":"&gamma;",
+"output": [["Character", "\u03B3"]]},
+
+{"description": "Named entity: ge with a semi-colon.",
+"input":"&ge;",
+"output": [["Character", "\u2265"]]},
+
+{"description": "Named entity: gt with a semi-colon.",
+"input":"&gt;",
+"output": [["Character", "\u003E"]]},
+
+{"description": "Named entity: gt without a semi-colon.",
+"input":"&gt",
+"output": ["ParseError", ["Character", "\u003E"]]},
+
+{"description": "Named entity: hArr with a semi-colon.",
+"input":"&hArr;",
+"output": [["Character", "\u21D4"]]},
+
+{"description": "Named entity: harr with a semi-colon.",
+"input":"&harr;",
+"output": [["Character", "\u2194"]]},
+
+{"description": "Named entity: hearts with a semi-colon.",
+"input":"&hearts;",
+"output": [["Character", "\u2665"]]},
+
+{"description": "Named entity: hellip with a semi-colon.",
+"input":"&hellip;",
+"output": [["Character", "\u2026"]]},
+
+{"description": "Named entity: iacute with a semi-colon.",
+"input":"&iacute;",
+"output": [["Character", "\u00ED"]]},
+
+{"description": "Named entity: iacute without a semi-colon.",
+"input":"&iacute",
+"output": ["ParseError", ["Character", "\u00ED"]]},
+
+{"description": "Named entity: icirc with a semi-colon.",
+"input":"&icirc;",
+"output": [["Character", "\u00EE"]]},
+
+{"description": "Named entity: icirc without a semi-colon.",
+"input":"&icirc",
+"output": ["ParseError", ["Character", "\u00EE"]]},
+
+{"description": "Named entity: iexcl with a semi-colon.",
+"input":"&iexcl;",
+"output": [["Character", "\u00A1"]]},
+
+{"description": "Named entity: iexcl without a semi-colon.",
+"input":"&iexcl",
+"output": ["ParseError", ["Character", "\u00A1"]]},
+
+{"description": "Named entity: igrave with a semi-colon.",
+"input":"&igrave;",
+"output": [["Character", "\u00EC"]]},
+
+{"description": "Named entity: igrave without a semi-colon.",
+"input":"&igrave",
+"output": ["ParseError", ["Character", "\u00EC"]]},
+
+{"description": "Named entity: image with a semi-colon.",
+"input":"&image;",
+"output": [["Character", "\u2111"]]},
+
+{"description": "Named entity: infin with a semi-colon.",
+"input":"&infin;",
+"output": [["Character", "\u221E"]]},
+
+{"description": "Named entity: int with a semi-colon.",
+"input":"&int;",
+"output": [["Character", "\u222B"]]},
+
+{"description": "Named entity: iota with a semi-colon.",
+"input":"&iota;",
+"output": [["Character", "\u03B9"]]},
+
+{"description": "Named entity: iquest with a semi-colon.",
+"input":"&iquest;",
+"output": [["Character", "\u00BF"]]},
+
+{"description": "Named entity: iquest without a semi-colon.",
+"input":"&iquest",
+"output": ["ParseError", ["Character", "\u00BF"]]},
+
+{"description": "Named entity: isin with a semi-colon.",
+"input":"&isin;",
+"output": [["Character", "\u2208"]]},
+
+{"description": "Named entity: iuml with a semi-colon.",
+"input":"&iuml;",
+"output": [["Character", "\u00EF"]]},
+
+{"description": "Named entity: iuml without a semi-colon.",
+"input":"&iuml",
+"output": ["ParseError", ["Character", "\u00EF"]]},
+
+{"description": "Named entity: kappa with a semi-colon.",
+"input":"&kappa;",
+"output": [["Character", "\u03BA"]]},
+
+{"description": "Named entity: lArr with a semi-colon.",
+"input":"&lArr;",
+"output": [["Character", "\u21D0"]]},
+
+{"description": "Named entity: lambda with a semi-colon.",
+"input":"&lambda;",
+"output": [["Character", "\u03BB"]]},
+
+{"description": "Named entity: lang with a semi-colon.",
+"input":"&lang;",
+"output": [["Character", "\u27E8"]]},
+
+{"description": "Named entity: laquo with a semi-colon.",
+"input":"&laquo;",
+"output": [["Character", "\u00AB"]]},
+
+{"description": "Named entity: laquo without a semi-colon.",
+"input":"&laquo",
+"output": ["ParseError", ["Character", "\u00AB"]]},
+
+{"description": "Named entity: larr with a semi-colon.",
+"input":"&larr;",
+"output": [["Character", "\u2190"]]},
+
+{"description": "Named entity: lceil with a semi-colon.",
+"input":"&lceil;",
+"output": [["Character", "\u2308"]]},
+
+{"description": "Named entity: ldquo with a semi-colon.",
+"input":"&ldquo;",
+"output": [["Character", "\u201C"]]},
+
+{"description": "Named entity: le with a semi-colon.",
+"input":"&le;",
+"output": [["Character", "\u2264"]]},
+
+{"description": "Named entity: lfloor with a semi-colon.",
+"input":"&lfloor;",
+"output": [["Character", "\u230A"]]},
+
+{"description": "Named entity: lowast with a semi-colon.",
+"input":"&lowast;",
+"output": [["Character", "\u2217"]]},
+
+{"description": "Named entity: loz with a semi-colon.",
+"input":"&loz;",
+"output": [["Character", "\u25CA"]]},
+
+{"description": "Named entity: lrm with a semi-colon.",
+"input":"&lrm;",
+"output": [["Character", "\u200E"]]},
+
+{"description": "Named entity: lsaquo with a semi-colon.",
+"input":"&lsaquo;",
+"output": [["Character", "\u2039"]]},
+
+{"description": "Named entity: lsquo with a semi-colon.",
+"input":"&lsquo;",
+"output": [["Character", "\u2018"]]},
+
+{"description": "Named entity: lt with a semi-colon.",
+"input":"&lt;",
+"output": [["Character", "\u003C"]]},
+
+{"description": "Named entity: lt without a semi-colon.",
+"input":"&lt",
+"output": ["ParseError", ["Character", "\u003C"]]},
+
+{"description": "Named entity: macr with a semi-colon.",
+"input":"&macr;",
+"output": [["Character", "\u00AF"]]},
+
+{"description": "Named entity: macr without a semi-colon.",
+"input":"&macr",
+"output": ["ParseError", ["Character", "\u00AF"]]},
+
+{"description": "Named entity: mdash with a semi-colon.",
+"input":"&mdash;",
+"output": [["Character", "\u2014"]]},
+
+{"description": "Named entity: micro with a semi-colon.",
+"input":"&micro;",
+"output": [["Character", "\u00B5"]]},
+
+{"description": "Named entity: micro without a semi-colon.",
+"input":"&micro",
+"output": ["ParseError", ["Character", "\u00B5"]]},
+
+{"description": "Named entity: middot with a semi-colon.",
+"input":"&middot;",
+"output": [["Character", "\u00B7"]]},
+
+{"description": "Named entity: middot without a semi-colon.",
+"input":"&middot",
+"output": ["ParseError", ["Character", "\u00B7"]]},
+
+{"description": "Named entity: minus with a semi-colon.",
+"input":"&minus;",
+"output": [["Character", "\u2212"]]},
+
+{"description": "Named entity: mu with a semi-colon.",
+"input":"&mu;",
+"output": [["Character", "\u03BC"]]},
+
+{"description": "Named entity: nabla with a semi-colon.",
+"input":"&nabla;",
+"output": [["Character", "\u2207"]]},
+
+{"description": "Named entity: nbsp with a semi-colon.",
+"input":"&nbsp;",
+"output": [["Character", "\u00A0"]]},
+
+{"description": "Named entity: nbsp without a semi-colon.",
+"input":"&nbsp",
+"output": ["ParseError", ["Character", "\u00A0"]]},
+
+{"description": "Named entity: ndash with a semi-colon.",
+"input":"&ndash;",
+"output": [["Character", "\u2013"]]},
+
+{"description": "Named entity: ne with a semi-colon.",
+"input":"&ne;",
+"output": [["Character", "\u2260"]]},
+
+{"description": "Named entity: ni with a semi-colon.",
+"input":"&ni;",
+"output": [["Character", "\u220B"]]},
+
+{"description": "Named entity: not with a semi-colon.",
+"input":"&not;",
+"output": [["Character", "\u00AC"]]},
+
+{"description": "Named entity: not without a semi-colon.",
+"input":"&not",
+"output": ["ParseError", ["Character", "\u00AC"]]},
+
+{"description": "Named entity: notin with a semi-colon.",
+"input":"&notin;",
+"output": [["Character", "\u2209"]]},
+
+{"description": "Named entity: nsub with a semi-colon.",
+"input":"&nsub;",
+"output": [["Character", "\u2284"]]},
+
+{"description": "Named entity: ntilde with a semi-colon.",
+"input":"&ntilde;",
+"output": [["Character", "\u00F1"]]},
+
+{"description": "Named entity: ntilde without a semi-colon.",
+"input":"&ntilde",
+"output": ["ParseError", ["Character", "\u00F1"]]},
+
+{"description": "Named entity: nu with a semi-colon.",
+"input":"&nu;",
+"output": [["Character", "\u03BD"]]},
+
+{"description": "Named entity: oacute with a semi-colon.",
+"input":"&oacute;",
+"output": [["Character", "\u00F3"]]},
+
+{"description": "Named entity: oacute without a semi-colon.",
+"input":"&oacute",
+"output": ["ParseError", ["Character", "\u00F3"]]},
+
+{"description": "Named entity: ocirc with a semi-colon.",
+"input":"&ocirc;",
+"output": [["Character", "\u00F4"]]},
+
+{"description": "Named entity: ocirc without a semi-colon.",
+"input":"&ocirc",
+"output": ["ParseError", ["Character", "\u00F4"]]},
+
+{"description": "Named entity: oelig with a semi-colon.",
+"input":"&oelig;",
+"output": [["Character", "\u0153"]]},
+
+{"description": "Named entity: ograve with a semi-colon.",
+"input":"&ograve;",
+"output": [["Character", "\u00F2"]]},
+
+{"description": "Named entity: ograve without a semi-colon.",
+"input":"&ograve",
+"output": ["ParseError", ["Character", "\u00F2"]]},
+
+{"description": "Named entity: oline with a semi-colon.",
+"input":"&oline;",
+"output": [["Character", "\u203E"]]},
+
+{"description": "Named entity: omega with a semi-colon.",
+"input":"&omega;",
+"output": [["Character", "\u03C9"]]},
+
+{"description": "Named entity: omicron with a semi-colon.",
+"input":"&omicron;",
+"output": [["Character", "\u03BF"]]},
+
+{"description": "Named entity: oplus with a semi-colon.",
+"input":"&oplus;",
+"output": [["Character", "\u2295"]]},
+
+{"description": "Named entity: or with a semi-colon.",
+"input":"&or;",
+"output": [["Character", "\u2228"]]},
+
+{"description": "Named entity: ordf with a semi-colon.",
+"input":"&ordf;",
+"output": [["Character", "\u00AA"]]},
+
+{"description": "Named entity: ordf without a semi-colon.",
+"input":"&ordf",
+"output": ["ParseError", ["Character", "\u00AA"]]},
+
+{"description": "Named entity: ordm with a semi-colon.",
+"input":"&ordm;",
+"output": [["Character", "\u00BA"]]},
+
+{"description": "Named entity: ordm without a semi-colon.",
+"input":"&ordm",
+"output": ["ParseError", ["Character", "\u00BA"]]},
+
+{"description": "Named entity: oslash with a semi-colon.",
+"input":"&oslash;",
+"output": [["Character", "\u00F8"]]},
+
+{"description": "Named entity: oslash without a semi-colon.",
+"input":"&oslash",
+"output": ["ParseError", ["Character", "\u00F8"]]},
+
+{"description": "Named entity: otilde with a semi-colon.",
+"input":"&otilde;",
+"output": [["Character", "\u00F5"]]},
+
+{"description": "Named entity: otilde without a semi-colon.",
+"input":"&otilde",
+"output": ["ParseError", ["Character", "\u00F5"]]},
+
+{"description": "Named entity: otimes with a semi-colon.",
+"input":"&otimes;",
+"output": [["Character", "\u2297"]]},
+
+{"description": "Named entity: ouml with a semi-colon.",
+"input":"&ouml;",
+"output": [["Character", "\u00F6"]]},
+
+{"description": "Named entity: ouml without a semi-colon.",
+"input":"&ouml",
+"output": ["ParseError", ["Character", "\u00F6"]]},
+
+{"description": "Named entity: para with a semi-colon.",
+"input":"&para;",
+"output": [["Character", "\u00B6"]]},
+
+{"description": "Named entity: para without a semi-colon.",
+"input":"&para",
+"output": ["ParseError", ["Character", "\u00B6"]]},
+
+{"description": "Named entity: part with a semi-colon.",
+"input":"&part;",
+"output": [["Character", "\u2202"]]},
+
+{"description": "Named entity: permil with a semi-colon.",
+"input":"&permil;",
+"output": [["Character", "\u2030"]]},
+
+{"description": "Named entity: perp with a semi-colon.",
+"input":"&perp;",
+"output": [["Character", "\u22A5"]]},
+
+{"description": "Named entity: phi with a semi-colon.",
+"input":"&phi;",
+"output": [["Character", "\u03C6"]]},
+
+{"description": "Named entity: pi with a semi-colon.",
+"input":"&pi;",
+"output": [["Character", "\u03C0"]]},
+
+{"description": "Named entity: piv with a semi-colon.",
+"input":"&piv;",
+"output": [["Character", "\u03D6"]]},
+
+{"description": "Named entity: plusmn with a semi-colon.",
+"input":"&plusmn;",
+"output": [["Character", "\u00B1"]]},
+
+{"description": "Named entity: plusmn without a semi-colon.",
+"input":"&plusmn",
+"output": ["ParseError", ["Character", "\u00B1"]]},
+
+{"description": "Named entity: pound with a semi-colon.",
+"input":"&pound;",
+"output": [["Character", "\u00A3"]]},
+
+{"description": "Named entity: pound without a semi-colon.",
+"input":"&pound",
+"output": ["ParseError", ["Character", "\u00A3"]]},
+
+{"description": "Named entity: prime with a semi-colon.",
+"input":"&prime;",
+"output": [["Character", "\u2032"]]},
+
+{"description": "Named entity: prod with a semi-colon.",
+"input":"&prod;",
+"output": [["Character", "\u220F"]]},
+
+{"description": "Named entity: prop with a semi-colon.",
+"input":"&prop;",
+"output": [["Character", "\u221D"]]},
+
+{"description": "Named entity: psi with a semi-colon.",
+"input":"&psi;",
+"output": [["Character", "\u03C8"]]},
+
+{"description": "Named entity: quot with a semi-colon.",
+"input":"&quot;",
+"output": [["Character", "\u0022"]]},
+
+{"description": "Named entity: quot without a semi-colon.",
+"input":"&quot",
+"output": ["ParseError", ["Character", "\u0022"]]},
+
+{"description": "Named entity: rArr with a semi-colon.",
+"input":"&rArr;",
+"output": [["Character", "\u21D2"]]},
+
+{"description": "Named entity: radic with a semi-colon.",
+"input":"&radic;",
+"output": [["Character", "\u221A"]]},
+
+{"description": "Named entity: rang with a semi-colon.",
+"input":"&rang;",
+"output": [["Character", "\u27E9"]]},
+
+{"description": "Named entity: raquo with a semi-colon.",
+"input":"&raquo;",
+"output": [["Character", "\u00BB"]]},
+
+{"description": "Named entity: raquo without a semi-colon.",
+"input":"&raquo",
+"output": ["ParseError", ["Character", "\u00BB"]]},
+
+{"description": "Named entity: rarr with a semi-colon.",
+"input":"&rarr;",
+"output": [["Character", "\u2192"]]},
+
+{"description": "Named entity: rceil with a semi-colon.",
+"input":"&rceil;",
+"output": [["Character", "\u2309"]]},
+
+{"description": "Named entity: rdquo with a semi-colon.",
+"input":"&rdquo;",
+"output": [["Character", "\u201D"]]},
+
+{"description": "Named entity: real with a semi-colon.",
+"input":"&real;",
+"output": [["Character", "\u211C"]]},
+
+{"description": "Named entity: reg with a semi-colon.",
+"input":"&reg;",
+"output": [["Character", "\u00AE"]]},
+
+{"description": "Named entity: reg without a semi-colon.",
+"input":"&reg",
+"output": ["ParseError", ["Character", "\u00AE"]]},
+
+{"description": "Named entity: rfloor with a semi-colon.",
+"input":"&rfloor;",
+"output": [["Character", "\u230B"]]},
+
+{"description": "Named entity: rho with a semi-colon.",
+"input":"&rho;",
+"output": [["Character", "\u03C1"]]},
+
+{"description": "Named entity: rlm with a semi-colon.",
+"input":"&rlm;",
+"output": [["Character", "\u200F"]]},
+
+{"description": "Named entity: rsaquo with a semi-colon.",
+"input":"&rsaquo;",
+"output": [["Character", "\u203A"]]},
+
+{"description": "Named entity: rsquo with a semi-colon.",
+"input":"&rsquo;",
+"output": [["Character", "\u2019"]]},
+
+{"description": "Named entity: sbquo with a semi-colon.",
+"input":"&sbquo;",
+"output": [["Character", "\u201A"]]},
+
+{"description": "Named entity: scaron with a semi-colon.",
+"input":"&scaron;",
+"output": [["Character", "\u0161"]]},
+
+{"description": "Named entity: sdot with a semi-colon.",
+"input":"&sdot;",
+"output": [["Character", "\u22C5"]]},
+
+{"description": "Named entity: sect with a semi-colon.",
+"input":"&sect;",
+"output": [["Character", "\u00A7"]]},
+
+{"description": "Named entity: sect without a semi-colon.",
+"input":"&sect",
+"output": ["ParseError", ["Character", "\u00A7"]]},
+
+{"description": "Named entity: shy with a semi-colon.",
+"input":"&shy;",
+"output": [["Character", "\u00AD"]]},
+
+{"description": "Named entity: shy without a semi-colon.",
+"input":"&shy",
+"output": ["ParseError", ["Character", "\u00AD"]]},
+
+{"description": "Named entity: sigma with a semi-colon.",
+"input":"&sigma;",
+"output": [["Character", "\u03C3"]]},
+
+{"description": "Named entity: sigmaf with a semi-colon.",
+"input":"&sigmaf;",
+"output": [["Character", "\u03C2"]]},
+
+{"description": "Named entity: sim with a semi-colon.",
+"input":"&sim;",
+"output": [["Character", "\u223C"]]},
+
+{"description": "Named entity: spades with a semi-colon.",
+"input":"&spades;",
+"output": [["Character", "\u2660"]]},
+
+{"description": "Named entity: sub with a semi-colon.",
+"input":"&sub;",
+"output": [["Character", "\u2282"]]},
+
+{"description": "Named entity: sube with a semi-colon.",
+"input":"&sube;",
+"output": [["Character", "\u2286"]]},
+
+{"description": "Named entity: sum with a semi-colon.",
+"input":"&sum;",
+"output": [["Character", "\u2211"]]},
+
+{"description": "Named entity: sup1 with a semi-colon.",
+"input":"&sup1;",
+"output": [["Character", "\u00B9"]]},
+
+{"description": "Named entity: sup1 without a semi-colon.",
+"input":"&sup1",
+"output": ["ParseError", ["Character", "\u00B9"]]},
+
+{"description": "Named entity: sup2 with a semi-colon.",
+"input":"&sup2;",
+"output": [["Character", "\u00B2"]]},
+
+{"description": "Named entity: sup2 without a semi-colon.",
+"input":"&sup2",
+"output": ["ParseError", ["Character", "\u00B2"]]},
+
+{"description": "Named entity: sup3 with a semi-colon.",
+"input":"&sup3;",
+"output": [["Character", "\u00B3"]]},
+
+{"description": "Named entity: sup3 without a semi-colon.",
+"input":"&sup3",
+"output": ["ParseError", ["Character", "\u00B3"]]},
+
+{"description": "Named entity: sup with a semi-colon.",
+"input":"&sup;",
+"output": [["Character", "\u2283"]]},
+
+{"description": "Named entity: supe with a semi-colon.",
+"input":"&supe;",
+"output": [["Character", "\u2287"]]},
+
+{"description": "Named entity: szlig with a semi-colon.",
+"input":"&szlig;",
+"output": [["Character", "\u00DF"]]},
+
+{"description": "Named entity: szlig without a semi-colon.",
+"input":"&szlig",
+"output": ["ParseError", ["Character", "\u00DF"]]},
+
+{"description": "Named entity: tau with a semi-colon.",
+"input":"&tau;",
+"output": [["Character", "\u03C4"]]},
+
+{"description": "Named entity: there4 with a semi-colon.",
+"input":"&there4;",
+"output": [["Character", "\u2234"]]},
+
+{"description": "Named entity: theta with a semi-colon.",
+"input":"&theta;",
+"output": [["Character", "\u03B8"]]},
+
+{"description": "Named entity: thetasym with a semi-colon.",
+"input":"&thetasym;",
+"output": [["Character", "\u03D1"]]},
+
+{"description": "Named entity: thinsp with a semi-colon.",
+"input":"&thinsp;",
+"output": [["Character", "\u2009"]]},
+
+{"description": "Named entity: thorn with a semi-colon.",
+"input":"&thorn;",
+"output": [["Character", "\u00FE"]]},
+
+{"description": "Named entity: thorn without a semi-colon.",
+"input":"&thorn",
+"output": ["ParseError", ["Character", "\u00FE"]]},
+
+{"description": "Named entity: tilde with a semi-colon.",
+"input":"&tilde;",
+"output": [["Character", "\u02DC"]]},
+
+{"description": "Named entity: times with a semi-colon.",
+"input":"&times;",
+"output": [["Character", "\u00D7"]]},
+
+{"description": "Named entity: times without a semi-colon.",
+"input":"&times",
+"output": ["ParseError", ["Character", "\u00D7"]]},
+
+{"description": "Named entity: trade with a semi-colon.",
+"input":"&trade;",
+"output": [["Character", "\u2122"]]},
+
+{"description": "Named entity: uArr with a semi-colon.",
+"input":"&uArr;",
+"output": [["Character", "\u21D1"]]},
+
+{"description": "Named entity: uacute with a semi-colon.",
+"input":"&uacute;",
+"output": [["Character", "\u00FA"]]},
+
+{"description": "Named entity: uacute without a semi-colon.",
+"input":"&uacute",
+"output": ["ParseError", ["Character", "\u00FA"]]},
+
+{"description": "Named entity: uarr with a semi-colon.",
+"input":"&uarr;",
+"output": [["Character", "\u2191"]]},
+
+{"description": "Named entity: ucirc with a semi-colon.",
+"input":"&ucirc;",
+"output": [["Character", "\u00FB"]]},
+
+{"description": "Named entity: ucirc without a semi-colon.",
+"input":"&ucirc",
+"output": ["ParseError", ["Character", "\u00FB"]]},
+
+{"description": "Named entity: ugrave with a semi-colon.",
+"input":"&ugrave;",
+"output": [["Character", "\u00F9"]]},
+
+{"description": "Named entity: ugrave without a semi-colon.",
+"input":"&ugrave",
+"output": ["ParseError", ["Character", "\u00F9"]]},
+
+{"description": "Named entity: uml with a semi-colon.",
+"input":"&uml;",
+"output": [["Character", "\u00A8"]]},
+
+{"description": "Named entity: uml without a semi-colon.",
+"input":"&uml",
+"output": ["ParseError", ["Character", "\u00A8"]]},
+
+{"description": "Named entity: upsih with a semi-colon.",
+"input":"&upsih;",
+"output": [["Character", "\u03D2"]]},
+
+{"description": "Named entity: upsilon with a semi-colon.",
+"input":"&upsilon;",
+"output": [["Character", "\u03C5"]]},
+
+{"description": "Named entity: uuml with a semi-colon.",
+"input":"&uuml;",
+"output": [["Character", "\u00FC"]]},
+
+{"description": "Named entity: uuml without a semi-colon.",
+"input":"&uuml",
+"output": ["ParseError", ["Character", "\u00FC"]]},
+
+{"description": "Named entity: weierp with a semi-colon.",
+"input":"&weierp;",
+"output": [["Character", "\u2118"]]},
+
+{"description": "Named entity: xi with a semi-colon.",
+"input":"&xi;",
+"output": [["Character", "\u03BE"]]},
+
+{"description": "Named entity: yacute with a semi-colon.",
+"input":"&yacute;",
+"output": [["Character", "\u00FD"]]},
+
+{"description": "Named entity: yacute without a semi-colon.",
+"input":"&yacute",
+"output": ["ParseError", ["Character", "\u00FD"]]},
+
+{"description": "Named entity: yen with a semi-colon.",
+"input":"&yen;",
+"output": [["Character", "\u00A5"]]},
+
+{"description": "Named entity: yen without a semi-colon.",
+"input":"&yen",
+"output": ["ParseError", ["Character", "\u00A5"]]},
+
+{"description": "Named entity: yuml with a semi-colon.",
+"input":"&yuml;",
+"output": [["Character", "\u00FF"]]},
+
+{"description": "Named entity: yuml without a semi-colon.",
+"input":"&yuml",
+"output": ["ParseError", ["Character", "\u00FF"]]},
+
+{"description": "Named entity: zeta with a semi-colon.",
+"input":"&zeta;",
+"output": [["Character", "\u03B6"]]},
+
+{"description": "Named entity: zwj with a semi-colon.",
+"input":"&zwj;",
+"output": [["Character", "\u200D"]]},
+
+{"description": "Named entity: zwnj with a semi-colon.",
+"input":"&zwnj;",
+"output": [["Character", "\u200C"]]},
+
+{"description": "Bad named entity: Alpha without a semi-colon.",
+"input":"&Alpha",
+"output": ["ParseError", ["Character", "&Alpha"]]},
+
+{"description": "Bad named entity: alpha without a semi-colon.",
+"input":"&alpha",
+"output": ["ParseError", ["Character", "&alpha"]]},
+
+{"description": "Bad named entity: and without a semi-colon.",
+"input":"&and",
+"output": ["ParseError", ["Character", "&and"]]},
+
+{"description": "Bad named entity: ang without a semi-colon.",
+"input":"&ang",
+"output": ["ParseError", ["Character", "&ang"]]},
+
+{"description": "Bad named entity: apos without a semi-colon.",
+"input":"&apos",
+"output": ["ParseError", ["Character", "&apos"]]},
+
+{"description": "Bad named entity: asymp without a semi-colon.",
+"input":"&asymp",
+"output": ["ParseError", ["Character", "&asymp"]]},
+
+{"description": "Bad named entity: bdquo without a semi-colon.",
+"input":"&bdquo",
+"output": ["ParseError", ["Character", "&bdquo"]]},
+
+{"description": "Bad named entity: Beta without a semi-colon.",
+"input":"&Beta",
+"output": ["ParseError", ["Character", "&Beta"]]},
+
+{"description": "Bad named entity: beta without a semi-colon.",
+"input":"&beta",
+"output": ["ParseError", ["Character", "&beta"]]},
+
+{"description": "Bad named entity: bull without a semi-colon.",
+"input":"&bull",
+"output": ["ParseError", ["Character", "&bull"]]},
+
+{"description": "Bad named entity: cap without a semi-colon.",
+"input":"&cap",
+"output": ["ParseError", ["Character", "&cap"]]},
+
+{"description": "Bad named entity: Chi without a semi-colon.",
+"input":"&Chi",
+"output": ["ParseError", ["Character", "&Chi"]]},
+
+{"description": "Bad named entity: chi without a semi-colon.",
+"input":"&chi",
+"output": ["ParseError", ["Character", "&chi"]]},
+
+{"description": "Bad named entity: circ without a semi-colon.",
+"input":"&circ",
+"output": ["ParseError", ["Character", "&circ"]]},
+
+{"description": "Bad named entity: clubs without a semi-colon.",
+"input":"&clubs",
+"output": ["ParseError", ["Character", "&clubs"]]},
+
+{"description": "Bad named entity: cong without a semi-colon.",
+"input":"&cong",
+"output": ["ParseError", ["Character", "&cong"]]},
+
+{"description": "Bad named entity: crarr without a semi-colon.",
+"input":"&crarr",
+"output": ["ParseError", ["Character", "&crarr"]]},
+
+{"description": "Bad named entity: cup without a semi-colon.",
+"input":"&cup",
+"output": ["ParseError", ["Character", "&cup"]]},
+
+{"description": "Bad named entity: dagger without a semi-colon.",
+"input":"&dagger",
+"output": ["ParseError", ["Character", "&dagger"]]},
+
+{"description": "Bad named entity: dagger without a semi-colon.",
+"input":"&dagger",
+"output": ["ParseError", ["Character", "&dagger"]]},
+
+{"description": "Bad named entity: darr without a semi-colon.",
+"input":"&darr",
+"output": ["ParseError", ["Character", "&darr"]]},
+
+{"description": "Bad named entity: darr without a semi-colon.",
+"input":"&darr",
+"output": ["ParseError", ["Character", "&darr"]]},
+
+{"description": "Bad named entity: Delta without a semi-colon.",
+"input":"&Delta",
+"output": ["ParseError", ["Character", "&Delta"]]},
+
+{"description": "Bad named entity: delta without a semi-colon.",
+"input":"&delta",
+"output": ["ParseError", ["Character", "&delta"]]},
+
+{"description": "Bad named entity: diams without a semi-colon.",
+"input":"&diams",
+"output": ["ParseError", ["Character", "&diams"]]},
+
+{"description": "Bad named entity: empty without a semi-colon.",
+"input":"&empty",
+"output": ["ParseError", ["Character", "&empty"]]},
+
+{"description": "Bad named entity: emsp without a semi-colon.",
+"input":"&emsp",
+"output": ["ParseError", ["Character", "&emsp"]]},
+
+{"description": "Bad named entity: ensp without a semi-colon.",
+"input":"&ensp",
+"output": ["ParseError", ["Character", "&ensp"]]},
+
+{"description": "Bad named entity: Epsilon without a semi-colon.",
+"input":"&Epsilon",
+"output": ["ParseError", ["Character", "&Epsilon"]]},
+
+{"description": "Bad named entity: epsilon without a semi-colon.",
+"input":"&epsilon",
+"output": ["ParseError", ["Character", "&epsilon"]]},
+
+{"description": "Bad named entity: equiv without a semi-colon.",
+"input":"&equiv",
+"output": ["ParseError", ["Character", "&equiv"]]},
+
+{"description": "Bad named entity: Eta without a semi-colon.",
+"input":"&Eta",
+"output": ["ParseError", ["Character", "&Eta"]]},
+
+{"description": "Bad named entity: eta without a semi-colon.",
+"input":"&eta",
+"output": ["ParseError", ["Character", "&eta"]]},
+
+{"description": "Bad named entity: euro without a semi-colon.",
+"input":"&euro",
+"output": ["ParseError", ["Character", "&euro"]]},
+
+{"description": "Bad named entity: exist without a semi-colon.",
+"input":"&exist",
+"output": ["ParseError", ["Character", "&exist"]]},
+
+{"description": "Bad named entity: fnof without a semi-colon.",
+"input":"&fnof",
+"output": ["ParseError", ["Character", "&fnof"]]},
+
+{"description": "Bad named entity: forall without a semi-colon.",
+"input":"&forall",
+"output": ["ParseError", ["Character", "&forall"]]},
+
+{"description": "Bad named entity: frasl without a semi-colon.",
+"input":"&frasl",
+"output": ["ParseError", ["Character", "&frasl"]]},
+
+{"description": "Bad named entity: Gamma without a semi-colon.",
+"input":"&Gamma",
+"output": ["ParseError", ["Character", "&Gamma"]]},
+
+{"description": "Bad named entity: gamma without a semi-colon.",
+"input":"&gamma",
+"output": ["ParseError", ["Character", "&gamma"]]},
+
+{"description": "Bad named entity: ge without a semi-colon.",
+"input":"&ge",
+"output": ["ParseError", ["Character", "&ge"]]},
+
+{"description": "Bad named entity: harr without a semi-colon.",
+"input":"&harr",
+"output": ["ParseError", ["Character", "&harr"]]},
+
+{"description": "Bad named entity: harr without a semi-colon.",
+"input":"&harr",
+"output": ["ParseError", ["Character", "&harr"]]},
+
+{"description": "Bad named entity: hearts without a semi-colon.",
+"input":"&hearts",
+"output": ["ParseError", ["Character", "&hearts"]]},
+
+{"description": "Bad named entity: hellip without a semi-colon.",
+"input":"&hellip",
+"output": ["ParseError", ["Character", "&hellip"]]},
+
+{"description": "Bad named entity: image without a semi-colon.",
+"input":"&image",
+"output": ["ParseError", ["Character", "&image"]]},
+
+{"description": "Bad named entity: infin without a semi-colon.",
+"input":"&infin",
+"output": ["ParseError", ["Character", "&infin"]]},
+
+{"description": "Bad named entity: int without a semi-colon.",
+"input":"&int",
+"output": ["ParseError", ["Character", "&int"]]},
+
+{"description": "Bad named entity: Iota without a semi-colon.",
+"input":"&Iota",
+"output": ["ParseError", ["Character", "&Iota"]]},
+
+{"description": "Bad named entity: iota without a semi-colon.",
+"input":"&iota",
+"output": ["ParseError", ["Character", "&iota"]]},
+
+{"description": "Bad named entity: isin without a semi-colon.",
+"input":"&isin",
+"output": ["ParseError", ["Character", "&isin"]]},
+
+{"description": "Bad named entity: Kappa without a semi-colon.",
+"input":"&Kappa",
+"output": ["ParseError", ["Character", "&Kappa"]]},
+
+{"description": "Bad named entity: kappa without a semi-colon.",
+"input":"&kappa",
+"output": ["ParseError", ["Character", "&kappa"]]},
+
+{"description": "Bad named entity: Lambda without a semi-colon.",
+"input":"&Lambda",
+"output": ["ParseError", ["Character", "&Lambda"]]},
+
+{"description": "Bad named entity: lambda without a semi-colon.",
+"input":"&lambda",
+"output": ["ParseError", ["Character", "&lambda"]]},
+
+{"description": "Bad named entity: lang without a semi-colon.",
+"input":"&lang",
+"output": ["ParseError", ["Character", "&lang"]]},
+
+{"description": "Bad named entity: larr without a semi-colon.",
+"input":"&larr",
+"output": ["ParseError", ["Character", "&larr"]]},
+
+{"description": "Bad named entity: larr without a semi-colon.",
+"input":"&larr",
+"output": ["ParseError", ["Character", "&larr"]]},
+
+{"description": "Bad named entity: lceil without a semi-colon.",
+"input":"&lceil",
+"output": ["ParseError", ["Character", "&lceil"]]},
+
+{"description": "Bad named entity: ldquo without a semi-colon.",
+"input":"&ldquo",
+"output": ["ParseError", ["Character", "&ldquo"]]},
+
+{"description": "Bad named entity: le without a semi-colon.",
+"input":"&le",
+"output": ["ParseError", ["Character", "&le"]]},
+
+{"description": "Bad named entity: lfloor without a semi-colon.",
+"input":"&lfloor",
+"output": ["ParseError", ["Character", "&lfloor"]]},
+
+{"description": "Bad named entity: lowast without a semi-colon.",
+"input":"&lowast",
+"output": ["ParseError", ["Character", "&lowast"]]},
+
+{"description": "Bad named entity: loz without a semi-colon.",
+"input":"&loz",
+"output": ["ParseError", ["Character", "&loz"]]},
+
+{"description": "Bad named entity: lrm without a semi-colon.",
+"input":"&lrm",
+"output": ["ParseError", ["Character", "&lrm"]]},
+
+{"description": "Bad named entity: lsaquo without a semi-colon.",
+"input":"&lsaquo",
+"output": ["ParseError", ["Character", "&lsaquo"]]},
+
+{"description": "Bad named entity: lsquo without a semi-colon.",
+"input":"&lsquo",
+"output": ["ParseError", ["Character", "&lsquo"]]},
+
+{"description": "Bad named entity: mdash without a semi-colon.",
+"input":"&mdash",
+"output": ["ParseError", ["Character", "&mdash"]]},
+
+{"description": "Bad named entity: minus without a semi-colon.",
+"input":"&minus",
+"output": ["ParseError", ["Character", "&minus"]]},
+
+{"description": "Bad named entity: Mu without a semi-colon.",
+"input":"&Mu",
+"output": ["ParseError", ["Character", "&Mu"]]},
+
+{"description": "Bad named entity: mu without a semi-colon.",
+"input":"&mu",
+"output": ["ParseError", ["Character", "&mu"]]},
+
+{"description": "Bad named entity: nabla without a semi-colon.",
+"input":"&nabla",
+"output": ["ParseError", ["Character", "&nabla"]]},
+
+{"description": "Bad named entity: ndash without a semi-colon.",
+"input":"&ndash",
+"output": ["ParseError", ["Character", "&ndash"]]},
+
+{"description": "Bad named entity: ne without a semi-colon.",
+"input":"&ne",
+"output": ["ParseError", ["Character", "&ne"]]},
+
+{"description": "Bad named entity: ni without a semi-colon.",
+"input":"&ni",
+"output": ["ParseError", ["Character", "&ni"]]},
+
+{"description": "Bad named entity: notin without a semi-colon.",
+"input":"&notin",
+"output": ["ParseError", ["Character", "\u00ACin"]]},
+
+{"description": "Bad named entity: nsub without a semi-colon.",
+"input":"&nsub",
+"output": ["ParseError", ["Character", "&nsub"]]},
+
+{"description": "Bad named entity: Nu without a semi-colon.",
+"input":"&Nu",
+"output": ["ParseError", ["Character", "&Nu"]]},
+
+{"description": "Bad named entity: nu without a semi-colon.",
+"input":"&nu",
+"output": ["ParseError", ["Character", "&nu"]]},
+
+{"description": "Bad named entity: OElig without a semi-colon.",
+"input":"&OElig",
+"output": ["ParseError", ["Character", "&OElig"]]},
+
+{"description": "Bad named entity: oelig without a semi-colon.",
+"input":"&oelig",
+"output": ["ParseError", ["Character", "&oelig"]]},
+
+{"description": "Bad named entity: oline without a semi-colon.",
+"input":"&oline",
+"output": ["ParseError", ["Character", "&oline"]]},
+
+{"description": "Bad named entity: Omega without a semi-colon.",
+"input":"&Omega",
+"output": ["ParseError", ["Character", "&Omega"]]},
+
+{"description": "Bad named entity: omega without a semi-colon.",
+"input":"&omega",
+"output": ["ParseError", ["Character", "&omega"]]},
+
+{"description": "Bad named entity: Omicron without a semi-colon.",
+"input":"&Omicron",
+"output": ["ParseError", ["Character", "&Omicron"]]},
+
+{"description": "Bad named entity: omicron without a semi-colon.",
+"input":"&omicron",
+"output": ["ParseError", ["Character", "&omicron"]]},
+
+{"description": "Bad named entity: oplus without a semi-colon.",
+"input":"&oplus",
+"output": ["ParseError", ["Character", "&oplus"]]},
+
+{"description": "Bad named entity: or without a semi-colon.",
+"input":"&or",
+"output": ["ParseError", ["Character", "&or"]]},
+
+{"description": "Bad named entity: otimes without a semi-colon.",
+"input":"&otimes",
+"output": ["ParseError", ["Character", "&otimes"]]},
+
+{"description": "Bad named entity: part without a semi-colon.",
+"input":"&part",
+"output": ["ParseError", ["Character", "&part"]]},
+
+{"description": "Bad named entity: permil without a semi-colon.",
+"input":"&permil",
+"output": ["ParseError", ["Character", "&permil"]]},
+
+{"description": "Bad named entity: perp without a semi-colon.",
+"input":"&perp",
+"output": ["ParseError", ["Character", "&perp"]]},
+
+{"description": "Bad named entity: Phi without a semi-colon.",
+"input":"&Phi",
+"output": ["ParseError", ["Character", "&Phi"]]},
+
+{"description": "Bad named entity: phi without a semi-colon.",
+"input":"&phi",
+"output": ["ParseError", ["Character", "&phi"]]},
+
+{"description": "Bad named entity: Pi without a semi-colon.",
+"input":"&Pi",
+"output": ["ParseError", ["Character", "&Pi"]]},
+
+{"description": "Bad named entity: pi without a semi-colon.",
+"input":"&pi",
+"output": ["ParseError", ["Character", "&pi"]]},
+
+{"description": "Bad named entity: piv without a semi-colon.",
+"input":"&piv",
+"output": ["ParseError", ["Character", "&piv"]]},
+
+{"description": "Bad named entity: prime without a semi-colon.",
+"input":"&prime",
+"output": ["ParseError", ["Character", "&prime"]]},
+
+{"description": "Bad named entity: prime without a semi-colon.",
+"input":"&prime",
+"output": ["ParseError", ["Character", "&prime"]]},
+
+{"description": "Bad named entity: prod without a semi-colon.",
+"input":"&prod",
+"output": ["ParseError", ["Character", "&prod"]]},
+
+{"description": "Bad named entity: prop without a semi-colon.",
+"input":"&prop",
+"output": ["ParseError", ["Character", "&prop"]]},
+
+{"description": "Bad named entity: Psi without a semi-colon.",
+"input":"&Psi",
+"output": ["ParseError", ["Character", "&Psi"]]},
+
+{"description": "Bad named entity: psi without a semi-colon.",
+"input":"&psi",
+"output": ["ParseError", ["Character", "&psi"]]},
+
+{"description": "Bad named entity: radic without a semi-colon.",
+"input":"&radic",
+"output": ["ParseError", ["Character", "&radic"]]},
+
+{"description": "Bad named entity: rang without a semi-colon.",
+"input":"&rang",
+"output": ["ParseError", ["Character", "&rang"]]},
+
+{"description": "Bad named entity: rarr without a semi-colon.",
+"input":"&rarr",
+"output": ["ParseError", ["Character", "&rarr"]]},
+
+{"description": "Bad named entity: rarr without a semi-colon.",
+"input":"&rarr",
+"output": ["ParseError", ["Character", "&rarr"]]},
+
+{"description": "Bad named entity: rceil without a semi-colon.",
+"input":"&rceil",
+"output": ["ParseError", ["Character", "&rceil"]]},
+
+{"description": "Bad named entity: rdquo without a semi-colon.",
+"input":"&rdquo",
+"output": ["ParseError", ["Character", "&rdquo"]]},
+
+{"description": "Bad named entity: real without a semi-colon.",
+"input":"&real",
+"output": ["ParseError", ["Character", "&real"]]},
+
+{"description": "Bad named entity: rfloor without a semi-colon.",
+"input":"&rfloor",
+"output": ["ParseError", ["Character", "&rfloor"]]},
+
+{"description": "Bad named entity: Rho without a semi-colon.",
+"input":"&Rho",
+"output": ["ParseError", ["Character", "&Rho"]]},
+
+{"description": "Bad named entity: rho without a semi-colon.",
+"input":"&rho",
+"output": ["ParseError", ["Character", "&rho"]]},
+
+{"description": "Bad named entity: rlm without a semi-colon.",
+"input":"&rlm",
+"output": ["ParseError", ["Character", "&rlm"]]},
+
+{"description": "Bad named entity: rsaquo without a semi-colon.",
+"input":"&rsaquo",
+"output": ["ParseError", ["Character", "&rsaquo"]]},
+
+{"description": "Bad named entity: rsquo without a semi-colon.",
+"input":"&rsquo",
+"output": ["ParseError", ["Character", "&rsquo"]]},
+
+{"description": "Bad named entity: sbquo without a semi-colon.",
+"input":"&sbquo",
+"output": ["ParseError", ["Character", "&sbquo"]]},
+
+{"description": "Bad named entity: Scaron without a semi-colon.",
+"input":"&Scaron",
+"output": ["ParseError", ["Character", "&Scaron"]]},
+
+{"description": "Bad named entity: scaron without a semi-colon.",
+"input":"&scaron",
+"output": ["ParseError", ["Character", "&scaron"]]},
+
+{"description": "Bad named entity: sdot without a semi-colon.",
+"input":"&sdot",
+"output": ["ParseError", ["Character", "&sdot"]]},
+
+{"description": "Bad named entity: Sigma without a semi-colon.",
+"input":"&Sigma",
+"output": ["ParseError", ["Character", "&Sigma"]]},
+
+{"description": "Bad named entity: sigma without a semi-colon.",
+"input":"&sigma",
+"output": ["ParseError", ["Character", "&sigma"]]},
+
+{"description": "Bad named entity: sigmaf without a semi-colon.",
+"input":"&sigmaf",
+"output": ["ParseError", ["Character", "&sigmaf"]]},
+
+{"description": "Bad named entity: sim without a semi-colon.",
+"input":"&sim",
+"output": ["ParseError", ["Character", "&sim"]]},
+
+{"description": "Bad named entity: spades without a semi-colon.",
+"input":"&spades",
+"output": ["ParseError", ["Character", "&spades"]]},
+
+{"description": "Bad named entity: sub without a semi-colon.",
+"input":"&sub",
+"output": ["ParseError", ["Character", "&sub"]]},
+
+{"description": "Bad named entity: sube without a semi-colon.",
+"input":"&sube",
+"output": ["ParseError", ["Character", "&sube"]]},
+
+{"description": "Bad named entity: sum without a semi-colon.",
+"input":"&sum",
+"output": ["ParseError", ["Character", "&sum"]]},
+
+{"description": "Bad named entity: sup without a semi-colon.",
+"input":"&sup",
+"output": ["ParseError", ["Character", "&sup"]]},
+
+{"description": "Bad named entity: supe without a semi-colon.",
+"input":"&supe",
+"output": ["ParseError", ["Character", "&supe"]]},
+
+{"description": "Bad named entity: Tau without a semi-colon.",
+"input":"&Tau",
+"output": ["ParseError", ["Character", "&Tau"]]},
+
+{"description": "Bad named entity: tau without a semi-colon.",
+"input":"&tau",
+"output": ["ParseError", ["Character", "&tau"]]},
+
+{"description": "Bad named entity: there4 without a semi-colon.",
+"input":"&there4",
+"output": ["ParseError", ["Character", "&there4"]]},
+
+{"description": "Bad named entity: Theta without a semi-colon.",
+"input":"&Theta",
+"output": ["ParseError", ["Character", "&Theta"]]},
+
+{"description": "Bad named entity: theta without a semi-colon.",
+"input":"&theta",
+"output": ["ParseError", ["Character", "&theta"]]},
+
+{"description": "Bad named entity: thetasym without a semi-colon.",
+"input":"&thetasym",
+"output": ["ParseError", ["Character", "&thetasym"]]},
+
+{"description": "Bad named entity: thinsp without a semi-colon.",
+"input":"&thinsp",
+"output": ["ParseError", ["Character", "&thinsp"]]},
+
+{"description": "Bad named entity: tilde without a semi-colon.",
+"input":"&tilde",
+"output": ["ParseError", ["Character", "&tilde"]]},
+
+{"description": "Bad named entity: trade without a semi-colon.",
+"input":"&trade",
+"output": ["ParseError", ["Character", "&trade"]]},
+
+{"description": "Bad named entity: uarr without a semi-colon.",
+"input":"&uarr",
+"output": ["ParseError", ["Character", "&uarr"]]},
+
+{"description": "Bad named entity: uarr without a semi-colon.",
+"input":"&uarr",
+"output": ["ParseError", ["Character", "&uarr"]]},
+
+{"description": "Bad named entity: upsih without a semi-colon.",
+"input":"&upsih",
+"output": ["ParseError", ["Character", "&upsih"]]},
+
+{"description": "Bad named entity: Upsilon without a semi-colon.",
+"input":"&Upsilon",
+"output": ["ParseError", ["Character", "&Upsilon"]]},
+
+{"description": "Bad named entity: upsilon without a semi-colon.",
+"input":"&upsilon",
+"output": ["ParseError", ["Character", "&upsilon"]]},
+
+{"description": "Bad named entity: weierp without a semi-colon.",
+"input":"&weierp",
+"output": ["ParseError", ["Character", "&weierp"]]},
+
+{"description": "Bad named entity: Xi without a semi-colon.",
+"input":"&Xi",
+"output": ["ParseError", ["Character", "&Xi"]]},
+
+{"description": "Bad named entity: xi without a semi-colon.",
+"input":"&xi",
+"output": ["ParseError", ["Character", "&xi"]]},
+
+{"description": "Bad named entity: Yuml without a semi-colon.",
+"input":"&Yuml",
+"output": ["ParseError", ["Character", "&Yuml"]]},
+
+{"description": "Bad named entity: Zeta without a semi-colon.",
+"input":"&Zeta",
+"output": ["ParseError", ["Character", "&Zeta"]]},
+
+{"description": "Bad named entity: zeta without a semi-colon.",
+"input":"&zeta",
+"output": ["ParseError", ["Character", "&zeta"]]},
+
+{"description": "Bad named entity: zwj without a semi-colon.",
+"input":"&zwj",
+"output": ["ParseError", ["Character", "&zwj"]]},
+
+{"description": "Bad named entity: zwnj without a semi-colon.",
+"input":"&zwnj",
+"output": ["ParseError", ["Character", "&zwnj"]]},
+
+{"description": "Bad named entity: zwnj without a semi-colon.",
+"input":"&zwnj",
+"output": ["ParseError", ["Character", "&zwnj"]]},
+
+{"description": "CR as numeric entity",
+"input":"&#013;",
+"output": ["ParseError", ["Character", "\n"]]},
+
+{"description": "CR as hexadecimal numeric entity",
+"input":"&#x00D;",
+"output": ["ParseError", ["Character", "\n"]]},
+
+{"description": "Windows-1252 EURO SIGN numeric entity.",
+"input":"&#0128;",
+"output": ["ParseError", ["Character", "\u20AC"]]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
+"input":"&#0129;",
+"output": ["ParseError", ["Character", "\uFFFD"]]},
+
+{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity.",
+"input":"&#0130;",
+"output": ["ParseError", ["Character", "\u201A"]]},
+
+{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK numeric entity.",
+"input":"&#0131;",
+"output": ["ParseError", ["Character", "\u0192"]]},
+
+{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK numeric entity.",
+"input":"&#0132;",
+"output": ["ParseError", ["Character", "\u201E"]]},
+
+{"description": "Windows-1252 HORIZONTAL ELLIPSIS numeric entity.",
+"input":"&#0133;",
+"output": ["ParseError", ["Character", "\u2026"]]},
+
+{"description": "Windows-1252 DAGGER numeric entity.",
+"input":"&#0134;",
+"output": ["ParseError", ["Character", "\u2020"]]},
+
+{"description": "Windows-1252 DOUBLE DAGGER numeric entity.",
+"input":"&#0135;",
+"output": ["ParseError", ["Character", "\u2021"]]},
+
+{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT numeric entity.",
+"input":"&#0136;",
+"output": ["ParseError", ["Character", "\u02C6"]]},
+
+{"description": "Windows-1252 PER MILLE SIGN numeric entity.",
+"input":"&#0137;",
+"output": ["ParseError", ["Character", "\u2030"]]},
+
+{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON numeric entity.",
+"input":"&#0138;",
+"output": ["ParseError", ["Character", "\u0160"]]},
+
+{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK numeric entity.",
+"input":"&#0139;",
+"output": ["ParseError", ["Character", "\u2039"]]},
+
+{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE numeric entity.",
+"input":"&#0140;",
+"output": ["ParseError", ["Character", "\u0152"]]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
+"input":"&#0141;",
+"output": ["ParseError", ["Character", "\uFFFD"]]},
+
+{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity.",
+"input":"&#0142;",
+"output": ["ParseError", ["Character", "\u017D"]]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
+"input":"&#0143;",
+"output": ["ParseError", ["Character", "\uFFFD"]]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
+"input":"&#0144;",
+"output": ["ParseError", ["Character", "\uFFFD"]]},
+
+{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity.",
+"input":"&#0145;",
+"output": ["ParseError", ["Character", "\u2018"]]},
+
+{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK numeric entity.",
+"input":"&#0146;",
+"output": ["ParseError", ["Character", "\u2019"]]},
+
+{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK numeric entity.",
+"input":"&#0147;",
+"output": ["ParseError", ["Character", "\u201C"]]},
+
+{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK numeric entity.",
+"input":"&#0148;",
+"output": ["ParseError", ["Character", "\u201D"]]},
+
+{"description": "Windows-1252 BULLET numeric entity.",
+"input":"&#0149;",
+"output": ["ParseError", ["Character", "\u2022"]]},
+
+{"description": "Windows-1252 EN DASH numeric entity.",
+"input":"&#0150;",
+"output": ["ParseError", ["Character", "\u2013"]]},
+
+{"description": "Windows-1252 EM DASH numeric entity.",
+"input":"&#0151;",
+"output": ["ParseError", ["Character", "\u2014"]]},
+
+{"description": "Windows-1252 SMALL TILDE numeric entity.",
+"input":"&#0152;",
+"output": ["ParseError", ["Character", "\u02DC"]]},
+
+{"description": "Windows-1252 TRADE MARK SIGN numeric entity.",
+"input":"&#0153;",
+"output": ["ParseError", ["Character", "\u2122"]]},
+
+{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON numeric entity.",
+"input":"&#0154;",
+"output": ["ParseError", ["Character", "\u0161"]]},
+
+{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK numeric entity.",
+"input":"&#0155;",
+"output": ["ParseError", ["Character", "\u203A"]]},
+
+{"description": "Windows-1252 LATIN SMALL LIGATURE OE numeric entity.",
+"input":"&#0156;",
+"output": ["ParseError", ["Character", "\u0153"]]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
+"input":"&#0157;",
+"output": ["ParseError", ["Character", "\uFFFD"]]},
+
+{"description": "Windows-1252 EURO SIGN hexadecimal numeric entity.",
+"input":"&#x080;",
+"output": ["ParseError", ["Character", "\u20AC"]]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
+"input":"&#x081;",
+"output": ["ParseError", ["Character", "\uFFFD"]]},
+
+{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x082;",
+"output": ["ParseError", ["Character", "\u201A"]]},
+
+{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK hexadecimal numeric entity.",
+"input":"&#x083;",
+"output": ["ParseError", ["Character", "\u0192"]]},
+
+{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x084;",
+"output": ["ParseError", ["Character", "\u201E"]]},
+
+{"description": "Windows-1252 HORIZONTAL ELLIPSIS hexadecimal numeric entity.",
+"input":"&#x085;",
+"output": ["ParseError", ["Character", "\u2026"]]},
+
+{"description": "Windows-1252 DAGGER hexadecimal numeric entity.",
+"input":"&#x086;",
+"output": ["ParseError", ["Character", "\u2020"]]},
+
+{"description": "Windows-1252 DOUBLE DAGGER hexadecimal numeric entity.",
+"input":"&#x087;",
+"output": ["ParseError", ["Character", "\u2021"]]},
+
+{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT hexadecimal numeric entity.",
+"input":"&#x088;",
+"output": ["ParseError", ["Character", "\u02C6"]]},
+
+{"description": "Windows-1252 PER MILLE SIGN hexadecimal numeric entity.",
+"input":"&#x089;",
+"output": ["ParseError", ["Character", "\u2030"]]},
+
+{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON hexadecimal numeric entity.",
+"input":"&#x08A;",
+"output": ["ParseError", ["Character", "\u0160"]]},
+
+{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x08B;",
+"output": ["ParseError", ["Character", "\u2039"]]},
+
+{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE hexadecimal numeric entity.",
+"input":"&#x08C;",
+"output": ["ParseError", ["Character", "\u0152"]]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
+"input":"&#x08D;",
+"output": ["ParseError", ["Character", "\uFFFD"]]},
+
+{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity.",
+"input":"&#x08E;",
+"output": ["ParseError", ["Character", "\u017D"]]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
+"input":"&#x08F;",
+"output": ["ParseError", ["Character", "\uFFFD"]]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
+"input":"&#x090;",
+"output": ["ParseError", ["Character", "\uFFFD"]]},
+
+{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x091;",
+"output": ["ParseError", ["Character", "\u2018"]]},
+
+{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x092;",
+"output": ["ParseError", ["Character", "\u2019"]]},
+
+{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x093;",
+"output": ["ParseError", ["Character", "\u201C"]]},
+
+{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x094;",
+"output": ["ParseError", ["Character", "\u201D"]]},
+
+{"description": "Windows-1252 BULLET hexadecimal numeric entity.",
+"input":"&#x095;",
+"output": ["ParseError", ["Character", "\u2022"]]},
+
+{"description": "Windows-1252 EN DASH hexadecimal numeric entity.",
+"input":"&#x096;",
+"output": ["ParseError", ["Character", "\u2013"]]},
+
+{"description": "Windows-1252 EM DASH hexadecimal numeric entity.",
+"input":"&#x097;",
+"output": ["ParseError", ["Character", "\u2014"]]},
+
+{"description": "Windows-1252 SMALL TILDE hexadecimal numeric entity.",
+"input":"&#x098;",
+"output": ["ParseError", ["Character", "\u02DC"]]},
+
+{"description": "Windows-1252 TRADE MARK SIGN hexadecimal numeric entity.",
+"input":"&#x099;",
+"output": ["ParseError", ["Character", "\u2122"]]},
+
+{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON hexadecimal numeric entity.",
+"input":"&#x09A;",
+"output": ["ParseError", ["Character", "\u0161"]]},
+
+{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x09B;",
+"output": ["ParseError", ["Character", "\u203A"]]},
+
+{"description": "Windows-1252 LATIN SMALL LIGATURE OE hexadecimal numeric entity.",
+"input":"&#x09C;",
+"output": ["ParseError", ["Character", "\u0153"]]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
+"input":"&#x09D;",
+"output": ["ParseError", ["Character", "\uFFFD"]]},
+
+{"description": "Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity.",
+"input":"&#x09E;",
+"output": ["ParseError", ["Character", "\u017E"]]},
+
+{"description": "Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity.",
+"input":"&#x09F;",
+"output": ["ParseError", ["Character", "\u0178"]]}
+
+]}
diff --git a/test/data/tokeniser2/escapeFlag.test b/test/data/tokeniser2/escapeFlag.test
new file mode 100644
index 0000000..8736c3c
--- /dev/null
+++ b/test/data/tokeniser2/escapeFlag.test
@@ -0,0 +1,33 @@
+{"tests": [
+
+{"description":"Commented close tag in [R]CDATA",
+"contentModelFlags":["RCDATA", "CDATA"],
+"lastStartTag":"pre",
+"input":"foo<!--</pre>--></pre>",
+"output":[["Character", "foo<!--</pre>-->"], ["EndTag", "pre"]]},
+
+{"description":"Bogus comment in [R]CDATA",
+"contentModelFlags":["RCDATA", "CDATA"],
+"lastStartTag":"pre",
+"input":"foo<!-->baz</pre>",
+"output":[["Character", "foo<!-->baz"], ["EndTag", "pre"]]},
+
+{"description":"End tag surrounded by bogus comment in [R]CDATA",
+"contentModelFlags":["RCDATA", "CDATA"],
+"lastStartTag":"pre",
+"input":"foo<!--></pre><!-->baz</pre>",
+"output":[["Character", "foo<!-->"], ["EndTag", "pre"], "ParseError", ["Comment", ""], ["Character", "baz"], ["EndTag", "pre"]]},
+
+{"description":"Commented entities in RCDATA",
+"contentModelFlags":["RCDATA"],
+"lastStartTag":"pre",
+"input":" &amp; <!-- &amp; --> &amp; </pre>",
+"output":[["Character", " & <!-- &amp; --> & "], ["EndTag", "pre"]]},
+
+{"description":"Incorrect comment ending sequences in [R]CDATA",
+"contentModelFlags":["RCDATA", "CDATA"],
+"lastStartTag":"pre",
+"input":"foo<!-- x --x>x-- >x--!>x--<></pre>",
+"output":[["Character", "foo<!-- x --x>x-- >x--!>x--<></pre>"]]}
+
+]}
diff --git a/test/data/tokeniser2/test1.test b/test/data/tokeniser2/test1.test
index c12ff5a..ddb9814 100644
--- a/test/data/tokeniser2/test1.test
+++ b/test/data/tokeniser2/test1.test
@@ -2,15 +2,15 @@
{"description":"Correct Doctype lowercase",
"input":"<!DOCTYPE html>",
-"output":[["DOCTYPE", "HTML", false]]},
+"output":[["DOCTYPE", "html", null, null, true]]},
{"description":"Correct Doctype uppercase",
-"input":"<!DOCTYPE HTML>",
-"output":[["DOCTYPE", "HTML", false]]},
+"input":"<!DOCTYPE HTML>",
+"output":[["DOCTYPE", "HTML", null, null, true]]},
{"description":"Correct Doctype mixed case",
"input":"<!DOCTYPE HtMl>",
-"output":[["DOCTYPE", "HTML", false]]},
+"output":[["DOCTYPE", "HtMl", null, null, true]]},
{"description":"Truncated doctype start",
"input":"<!DOC>",
@@ -18,7 +18,7 @@
{"description":"Doctype in error",
"input":"<!DOCTYPE foo>",
-"output":[["DOCTYPE", "FOO", true]]},
+"output":[["DOCTYPE", "foo", null, null, true]]},
{"description":"Single Start Tag",
"input":"<h>",
@@ -58,7 +58,7 @@
{"description":"Multiple atts no space",
"input":"<h a='b'c='d'>",
-"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
+"output":["ParseError", ["StartTag", "h", {"a":"b", "c":"d"}]]},
{"description":"Repeated attr",
"input":"<h a='b' a='d'>",
@@ -84,17 +84,38 @@
"input":"<!-",
"output":["ParseError", ["Comment", "-"]]},
-{"description":"Ampersand only",
+{"description":"Short comment",
+ "input":"<!-->",
+ "output":["ParseError", ["Comment", ""]]},
+
+{"description":"Short comment two",
+ "input":"<!--->",
+ "output":["ParseError", ["Comment", ""]]},
+
+{"description":"Short comment three",
+ "input":"<!---->",
+ "output":[["Comment", ""]]},
+
+
+{"description":"Ampersand EOF",
"input":"&",
-"output":["ParseError", ["Character", "&"]]},
+"output":[["Character", "&"]]},
+
+{"description":"Ampersand ampersand EOF",
+"input":"&&",
+"output":[["Character", "&&"]]},
+
+{"description":"Ampersand space EOF",
+"input":"& ",
+"output":[["Character", "& "]]},
{"description":"Unfinished entity",
"input":"&f",
-"output":["ParseError", ["Character", "&"], ["Character", "f"]]},
+"output":["ParseError", ["Character", "&f"]]},
{"description":"Ampersand, number sign",
"input":"&#",
-"output":["ParseError", ["Character", "&"], ["Character", "#"]]},
+"output":["ParseError", ["Character", "&#"]]},
{"description":"Unfinished numeric entity",
"input":"&#x",
@@ -102,20 +123,19 @@
{"description":"Entity with trailing semicolon (1)",
"input":"I'm &not;it",
-"output":[["Character","I'm ¬it"]]},
+"output":[["Character","I'm \u00ACit"]]},
{"description":"Entity with trailing semicolon (2)",
"input":"I'm &notin;",
-"output":[["Character","I'm ∉"]]},
+"output":[["Character","I'm \u2209"]]},
{"description":"Entity without trailing semicolon (1)",
"input":"I'm &notit",
-"output":[["Character","I'm "], "ParseError", ["Character", "¬"],
-["Character", "it"]]},
+"output":[["Character","I'm "], "ParseError", ["Character", "\u00ACit"]]},
{"description":"Entity without trailing semicolon (2)",
"input":"I'm &notin",
-"output":[["Character","I'm "], "ParseError", ["Character", "∉"]]},
+"output":[["Character","I'm "], "ParseError", ["Character", "\u00ACin"]]},
{"description":"Partial entity match at end of file",
"input":"I'm &no",
@@ -131,6 +151,22 @@
{"description":"Hexadecimal entity in attribute",
"input":"<h a='&#x3f;'></h>",
-"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]}
+"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]},
+
+{"description":"Entity in attribute without semicolon ending in x",
+"input":"<h a='&notx'>",
+"output":["ParseError", ["StartTag", "h", {"a":"&notx"}]]},
+
+{"description":"Entity in attribute without semicolon ending in 1",
+"input":"<h a='&not1'>",
+"output":["ParseError", ["StartTag", "h", {"a":"&not1"}]]},
+
+{"description":"Entity in attribute without semicolon ending in i",
+"input":"<h a='&noti'>",
+"output":["ParseError", ["StartTag", "h", {"a":"&noti"}]]},
+
+{"description":"Entity in attribute without semicolon",
+"input":"<h a='&COPY'>",
+"output":["ParseError", ["StartTag", "h", {"a":"\u00A9"}]]}
]}
diff --git a/test/data/tokeniser2/test2.test b/test/data/tokeniser2/test2.test
index 32c0f99..50c3531 100644
--- a/test/data/tokeniser2/test2.test
+++ b/test/data/tokeniser2/test2.test
@@ -1,32 +1,68 @@
{"tests": [
-{"description":"Doctype without a name",
+{"description":"DOCTYPE without name",
"input":"<!DOCTYPE>",
-"output":["ParseError", "ParseError", ["DOCTYPE", "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]},
-{"description":"Correct doctype without a space before name",
+{"description":"DOCTYPE without space before name",
"input":"<!DOCTYPEhtml>",
-"output":["ParseError", ["DOCTYPE", "HTML", false]]},
+"output":["ParseError", ["DOCTYPE", "html", null, null, true]]},
-{"description":"Incorrect doctype without a space before name",
+{"description":"Incorrect DOCTYPE without a space before name",
"input":"<!DOCTYPEfoo>",
-"output":["ParseError", ["DOCTYPE", "FOO", true]]},
+"output":["ParseError", ["DOCTYPE", "foo", null, null, true]]},
-{"description":"Bogus doctype",
+{"description":"DOCTYPE with publicId",
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
-"output":["ParseError", ["DOCTYPE", "HTML", true]]},
+"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},
+
+{"description":"DOCTYPE with EOF after PUBLIC",
+"input":"<!DOCTYPE html PUBLIC",
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
+
+{"description":"DOCTYPE with EOF after PUBLIC '",
+"input":"<!DOCTYPE html PUBLIC '",
+"output":["ParseError", ["DOCTYPE", "html", "", null, false]]},
+
+{"description":"DOCTYPE with EOF after PUBLIC 'x",
+"input":"<!DOCTYPE html PUBLIC 'x",
+"output":["ParseError", ["DOCTYPE", "html", "x", null, false]]},
+
+{"description":"DOCTYPE with systemId",
+"input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
+"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
+
+{"description":"DOCTYPE with publicId and systemId",
+"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
+"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
+
+{"description":"DOCTYPE with > in double-quoted publicId",
+"input":"<!DOCTYPE html PUBLIC \">x",
+"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]},
+
+{"description":"DOCTYPE with > in single-quoted publicId",
+"input":"<!DOCTYPE html PUBLIC '>x",
+"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]},
+
+{"description":"DOCTYPE with > in double-quoted systemId",
+"input":"<!DOCTYPE html PUBLIC \"foo\" \">x",
+"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]},
+
+{"description":"DOCTYPE with > in single-quoted systemId",
+"input":"<!DOCTYPE html PUBLIC 'foo' '>x",
+"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]},
{"description":"Incomplete doctype",
"input":"<!DOCTYPE html ",
-"output":["ParseError", ["DOCTYPE", "HTML", true]]},
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Numeric entity representing the NUL character",
"input":"&#0000;",
-"output":[["Character", "\uFFFD"]]},
+"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"Hexadecimal entity representing the NUL character",
"input":"&#x0000;",
-"output":[["Character", "\uFFFD"]]},
+"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
"input":"&#2225222;",
@@ -36,13 +72,9 @@
"input":"&#x1010FFFF;",
"output":["ParseError", ["Character", "\uFFFD"]]},
-{"description":"Numeric entity representing a Windows-1252 'codepoint'",
-"input":"&#137;",
-"output":[["Character", "\u2030"]]},
-
-{"description":"Hexadecimal entity representing a Windows-1252 'codepoint'",
-"input":"&#x89;",
-"output":[["Character", "\u2030"]]},
+{"description":"Hexadecimal entity pair representing a surrogate pair",
+"input":"&#xD869;&#xDED6;",
+"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]},
{"description":"Hexadecimal entity with mixed uppercase and lowercase",
"input":"&#xaBcD;",
@@ -54,19 +86,23 @@
{"description":"Unescaped ampersand in attribute value",
"input":"<h a='&'>",
-"output":["ParseError", ["StartTag", "h", { "a":"&" }]]},
+"output":[["StartTag", "h", { "a":"&" }]]},
{"description":"StartTag containing <",
"input":"<a<b>",
-"output":["ParseError", ["StartTag", "a", { }], ["StartTag", "b", { }]]},
+"output":[["StartTag", "a<b", { }]]},
{"description":"Non-void element containing trailing /",
"input":"<h/>",
-"output":["ParseError", ["StartTag", "h", { }]]},
+"output":[["StartTag","h",{},true]]},
{"description":"Void element with permitted slash",
"input":"<br/>",
-"output":[["StartTag", "br", { }]]},
+"output":[["StartTag","br",{},true]]},
+
+{"description":"Void element with permitted slash (with attribute)",
+"input":"<br foo='bar'/>",
+"output":[["StartTag","br",{"foo":"bar"},true]]},
{"description":"StartTag containing /",
"input":"<h/a='b'>",
@@ -96,12 +132,17 @@
"input":"foo < bar",
"output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]},
-/* jmb -- libjson uses C strings internally, thus the input gets truncated before the
- * data is fed to the input stream (and thus the tokeniser)
{"description":"Null Byte Replacement",
"input":"\u0000",
-"output":[["Character", "\ufffd"]]}
-*/
+"output":["ParseError", ["Character", "\ufffd"]]},
+
+{"description":"Comment with dash",
+"input":"<!---x",
+"output":["ParseError", ["Comment", "-x"]]},
+
+{"description":"Entity + newline",
+"input":"\nx\n&gt;\n",
+"output":[["Character","\nx\n>\n"]]}
]}
diff --git a/test/data/tokeniser2/test3.test b/test/data/tokeniser2/test3.test
new file mode 100644
index 0000000..5931274
--- /dev/null
+++ b/test/data/tokeniser2/test3.test
@@ -0,0 +1,367 @@
+{"tests": [
+
+{"description":"<",
+"input":"<",
+"output":["ParseError", ["Character", "<"]]},
+
+{"description":"<>",
+"input":"<>",
+"output":["ParseError", ["Character", "<>"]]},
+
+{"description":"<!",
+"input":"<!",
+"output":["ParseError", ["Comment", ""]]},
+
+{"description":"<!>",
+"input":"<!>",
+"output":["ParseError", ["Comment", ""]]},
+
+{"description":"<!--",
+"input":"<!--",
+"output":["ParseError", ["Comment", ""]]},
+
+{"description":"<!-->",
+"input":"<!-->",
+"output":["ParseError", ["Comment", ""]]},
+
+{"description":"<!---",
+"input":"<!---",
+"output":["ParseError", ["Comment", ""]]},
+
+{"description":"<!--->",
+"input":"<!--->",
+"output":["ParseError", ["Comment", ""]]},
+
+{"description":"<!---->",
+"input":"<!---->",
+"output":[["Comment", ""]]},
+
+{"description":"<!-----",
+"input":"<!-----",
+"output":["ParseError", "ParseError", ["Comment", "-"]]},
+
+{"description":"<!----.",
+"input":"<!----.",
+"output":["ParseError", "ParseError", ["Comment", "--."]]},
+
+{"description":"<!---?",
+"input":"<!---?",
+"output":["ParseError", ["Comment", "-?"]]},
+
+{"description":"<!--?-",
+"input":"<!--?-",
+"output":["ParseError", ["Comment", "?"]]},
+
+{"description":"<!--?--",
+"input":"<!--?--",
+"output":["ParseError", ["Comment", "?"]]},
+
+{"description":"<!--?-.",
+"input":"<!--?-.",
+"output":["ParseError", ["Comment", "?-."]]},
+
+{"description":"<!--?.",
+"input":"<!--?.",
+"output":["ParseError", ["Comment", "?."]]},
+
+{"description":"<?>",
+"input":"<?>",
+"output":["ParseError", ["Comment", "?"]]},
+
+{"description":"<??",
+"input":"<??",
+"output":["ParseError", ["Comment", "??"]]},
+
+{"description":"</",
+"input":"</",
+"output":["ParseError", ["Character", "</"]]},
+
+{"description":"</>",
+"input":"</>",
+"output":["ParseError"]},
+
+{"description":"</?",
+"input":"</?",
+"output":["ParseError", ["Comment", "?"]]},
+
+{"description":">",
+"input":">",
+"output":[["Character", ">"]]},
+
+{"description":"-",
+"input":"-",
+"output":[["Character", "-"]]},
+
+{"description":"?",
+"input":"?",
+"output":[["Character", "?"]]},
+
+{"description":"&",
+"input":"&",
+"output":[["Character", "&"]]},
+
+{"description":"&#",
+"input":"&#",
+"output":["ParseError", ["Character", "&#"]]},
+
+{"description":"&#9",
+"input":"&#9",
+"output":["ParseError", ["Character", "\t"]]},
+
+{"description":"<!doctype >",
+"input":"<!doctype >",
+"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+
+{"description":"<!doctype ",
+"input":"<!doctype ",
+"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+
+{"description":"<!doctype!>",
+"input":"<!doctype!>",
+"output":["ParseError", ["DOCTYPE", "!", null, null, true]]},
+
+{"description":"<!doctype! >",
+"input":"<!doctype! >",
+"output":["ParseError", ["DOCTYPE", "!", null, null, true]]},
+
+{"description":"<!doctype! ",
+"input":"<!doctype! ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
+
+{"description":"<!doctype! ?>",
+"input":"<!doctype! ?>",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
+
+{"description":"<!doctype! ??",
+"input":"<!doctype! ??",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
+
+{"description":"<!doctype!?",
+"input":"<!doctype!?",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!?", null, null, false]]},
+
+{"description":"<!doctype! public>",
+"input":"<!doctype! public>",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
+
+{"description":"<!doctype! public ",
+"input":"<!doctype! public ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
+
+{"description":"<!doctype! public?",
+"input":"<!doctype! public?",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
+
+{"description":"<!doctype! public''",
+"input":"<!doctype! public''",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
+
+{"description":"<!doctype! public'(",
+"input":"<!doctype! public'(",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", "(", null, false]]},
+
+{"description":"<!doctype! public\"\">",
+"input":"<!doctype! public\"\">",
+"output":["ParseError", ["DOCTYPE", "!", "", null, true]]},
+
+{"description":"<!doctype! public\"\" ",
+"input":"<!doctype! public\"\" ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
+
+{"description":"<!doctype! public\"\"?",
+"input":"<!doctype! public\"\"?",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
+
+{"description":"<!doctype! public\"\"'",
+"input":"<!doctype! public\"\"'",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", "", false]]},
+
+{"description":"<!doctype! public\"\"\"",
+"input":"<!doctype! public\"\"\"",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", "", false]]},
+
+{"description":"<!doctype! public\"#",
+"input":"<!doctype! public\"#",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", "#", null, false]]},
+
+{"description":"<!doctype! system>",
+"input":"<!doctype! system>",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
+
+{"description":"<!doctype! system ",
+"input":"<!doctype! system ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
+
+{"description":"<!doctype! system?",
+"input":"<!doctype! system?",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
+
+{"description":"<!doctype! system''",
+"input":"<!doctype! system''",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
+
+{"description":"<!doctype! system'(",
+"input":"<!doctype! system'(",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "(", false]]},
+
+{"description":"<!doctype! system\"\">",
+"input":"<!doctype! system\"\">",
+"output":["ParseError", ["DOCTYPE", "!", null, "", true]]},
+
+{"description":"<!doctype! system\"\" ",
+"input":"<!doctype! system\"\" ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
+
+{"description":"<!doctype! system\"\"?",
+"input":"<!doctype! system\"\"?",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, "", true]]},
+
+{"description":"<!doctype! system\"#",
+"input":"<!doctype! system\"#",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "#", false]]},
+
+{"description":"</z",
+"input":"</z",
+"output":["ParseError", ["EndTag", "z"]]},
+
+{"description":"<z>",
+"input":"<z>",
+"output":[["StartTag", "z", {}]]},
+
+{"description":"<z ",
+"input":"<z ",
+"output":["ParseError", ["StartTag", "z", {}]]},
+
+{"description":"<z/>",
+"input":"<z/>",
+"output":[["StartTag","z",{},true]]},
+
+{"description":"<z/ ",
+"input":"<z/ ",
+"output":["ParseError", "ParseError", ["StartTag", "z", {}]]},
+
+{"description":"<z//",
+"input":"<z//",
+"output":["ParseError","ParseError",["StartTag","z",{}]]},
+
+{"description":"<z",
+"input":"<z",
+"output":["ParseError", ["StartTag", "z", {}]]},
+
+{"description":"</z",
+"input":"</z",
+"output":["ParseError", ["EndTag", "z"]]},
+
+{"description":"<z0",
+"input":"<z0",
+"output":["ParseError", ["StartTag", "z0", {}]]},
+
+{"description":"<z/0=>",
+"input":"<z/0=>",
+"output":["ParseError", ["StartTag", "z", {"0": ""}]]},
+
+{"description":"<z/0= ",
+"input":"<z/0= ",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
+
+{"description":"<z/0=?>",
+"input":"<z/0=?>",
+"output":["ParseError", ["StartTag", "z", {"0": "?"}]]},
+
+{"description":"<z/0=? ",
+"input":"<z/0=? ",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "?"}]]},
+
+{"description":"<z/0=??",
+"input":"<z/0=??",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "??"}]]},
+
+{"description":"<z/0=''",
+"input":"<z/0=''",
+"output":["ParseError","ParseError",["StartTag","z",{"0":""}]]},
+
+{"description":"<z/0='&",
+"input":"<z/0='&",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
+
+{"description":"<z/0='%",
+"input":"<z/0='%",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "%"}]]},
+
+{"description":"<z/0=\"'",
+"input":"<z/0=\"'",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "'"}]]},
+
+{"description":"<z/0=\"\"",
+"input":"<z/0=\"\"",
+"output":["ParseError","ParseError",["StartTag","z",{"0":""}]]},
+
+{"description":"<z/0=\"&",
+"input":"<z/0=\"&",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
+
+{"description":"<z/0=&",
+"input":"<z/0=&",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
+
+{"description":"<z/0>",
+"input":"<z/0>",
+"output":["ParseError", ["StartTag", "z", {"0": ""}]]},
+
+{"description":"<z/0 =",
+"input":"<z/0 =",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
+
+{"description":"<z/0 >",
+"input":"<z/0 >",
+"output":["ParseError", ["StartTag", "z", {"0": ""}]]},
+
+{"description":"<z/0 ",
+"input":"<z/0 ",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
+
+{"description":"<z/0 /",
+"input":"<z/0 /",
+"output":["ParseError","ParseError",["StartTag","z",{"0":""}]]},
+
+{"description":"<z/0/",
+"input":"<z/0/",
+"output":["ParseError","ParseError",["StartTag","z",{"0":""}]]},
+
+{"description":"<z/00",
+"input":"<z/00",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"00": ""}]]},
+
+{"description":"<z/0 0",
+"input":"<z/0 0",
+"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
+
+{"description":"<z/0='&#9",
+"input":"<z/0='&#9",
+"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
+
+{"description":"<z/0=\"&#9",
+"input":"<z/0=\"&#9",
+"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
+
+{"description":"<z/0=&#9",
+"input":"<z/0=&#9",
+"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
+
+{"description":"<z/0z",
+"input":"<z/0z",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0z": ""}]]},
+
+{"description":"<z/0 z",
+"input":"<z/0 z",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "z": ""}]]},
+
+{"description":"<zz",
+"input":"<zz",
+"output":["ParseError", ["StartTag", "zz", {}]]},
+
+{"description":"<z/z",
+"input":"<z/z",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"z": ""}]]}
+
+]}
diff --git a/test/data/tokeniser2/test4.test b/test/data/tokeniser2/test4.test
new file mode 100644
index 0000000..32d8da5
--- /dev/null
+++ b/test/data/tokeniser2/test4.test
@@ -0,0 +1,289 @@
+{"tests": [
+
+{"description":"< in attribute name",
+"input":"<z/0 <",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "<": ""}]]},
+
+{"description":"< in attribute value",
+"input":"<z x=<",
+"output":["ParseError", ["StartTag", "z", {"x": "<"}]]},
+
+{"description":"= in unquoted attribute value",
+"input":"<z z=z=z>",
+"output":["ParseError", ["StartTag", "z", {"z": "z=z"}]]},
+
+{"description":"= attribute",
+"input":"<z =>",
+"output":["ParseError", ["StartTag", "z", {"=": ""}]]},
+
+{"description":"== attribute",
+"input":"<z ==>",
+"output":["ParseError", ["StartTag", "z", {"=": ""}]]},
+
+{"description":"=== attribute",
+"input":"<z ===>",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"=": "="}]]},
+
+{"description":"==== attribute",
+"input":"<z ====>",
+"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"=": "=="}]]},
+
+{"description":"Allowed \" after ampersand in attribute value",
+"input":"<z z=\"&\">",
+"output":[["StartTag", "z", {"z": "&"}]]},
+
+{"description":"Non-allowed ' after ampersand in attribute value",
+"input":"<z z=\"&'\">",
+"output":["ParseError", ["StartTag", "z", {"z": "&'"}]]},
+
+{"description":"Allowed ' after ampersand in attribute value",
+"input":"<z z='&'>",
+"output":[["StartTag", "z", {"z": "&"}]]},
+
+{"description":"Non-allowed \" after ampersand in attribute value",
+"input":"<z z='&\"'>",
+"output":["ParseError", ["StartTag", "z", {"z": "&\""}]]},
+
+{"description":"Attribute name starting with \"",
+"input":"<foo \"='bar'>",
+"output":["ParseError", ["StartTag", "foo", {"\"": "bar"}]]},
+
+{"description":"Attribute name starting with '",
+"input":"<foo '='bar'>",
+"output":["ParseError", ["StartTag", "foo", {"'": "bar"}]]},
+
+{"description":"Attribute name containing \"",
+"input":"<foo a\"b='bar'>",
+"output":["ParseError", ["StartTag", "foo", {"a\"b": "bar"}]]},
+
+{"description":"Attribute name containing '",
+"input":"<foo a'b='bar'>",
+"output":["ParseError", ["StartTag", "foo", {"a'b": "bar"}]]},
+
+{"description":"Unquoted attribute value containing '",
+"input":"<foo a=b'c>",
+"output":["ParseError", ["StartTag", "foo", {"a": "b'c"}]]},
+
+{"description":"Unquoted attribute value containing \"",
+"input":"<foo a=b\"c>",
+"output":["ParseError", ["StartTag", "foo", {"a": "b\"c"}]]},
+
+{"description":"Double-quoted attribute value not followed by whitespace",
+"input":"<foo a=\"b\"c>",
+"output":["ParseError", ["StartTag", "foo", {"a": "b", "c": ""}]]},
+
+{"description":"Single-quoted attribute value not followed by whitespace",
+"input":"<foo a='b'c>",
+"output":["ParseError", ["StartTag", "foo", {"a": "b", "c": ""}]]},
+
+{"description":"Quoted attribute followed by permitted /",
+"input":"<br a='b'/>",
+"output":[["StartTag","br",{"a":"b"},true]]},
+
+{"description":"Quoted attribute followed by non-permitted /",
+"input":"<bar a='b'/>",
+"output":[["StartTag","bar",{"a":"b"},true]]},
+
+{"description":"CR EOF after doctype name",
+"input":"<!doctype html \r",
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
+
+{"description":"CR EOF in tag name",
+"input":"<z\r",
+"output":["ParseError", ["StartTag", "z", {}]]},
+
+{"description":"Zero hex numeric entity",
+"input":"&#x0",
+"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"Zero decimal numeric entity",
+"input":"&#0",
+"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"Zero-prefixed hex numeric entity",
+"input":"&#x000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000041;",
+"output":[["Character", "A"]]},
+
+{"description":"Zero-prefixed decimal numeric entity",
+"input":"&#000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000065;",
+"output":[["Character", "A"]]},
+
+{"description":"Empty hex numeric entities",
+"input":"&#x &#X ",
+"output":["ParseError", ["Character", "&#x "], "ParseError", ["Character", "&#X "]]},
+
+{"description":"Empty decimal numeric entities",
+"input":"&# &#; ",
+"output":["ParseError", ["Character", "&# "], "ParseError", ["Character", "&#; "]]},
+
+{"description":"Non-BMP numeric entity",
+"input":"&#x10000;",
+"output":[["Character", "\uD800\uDC00"]]},
+
+{"description":"Maximum non-BMP numeric entity",
+"input":"&#X10FFFF;",
+"output":[["Character", "\uDBFF\uDFFF"]]},
+
+{"description":"Above maximum numeric entity",
+"input":"&#x110000;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"32-bit hex numeric entity",
+"input":"&#x80000041;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"33-bit hex numeric entity",
+"input":"&#x100000041;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"33-bit decimal numeric entity",
+"input":"&#4294967361;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"65-bit hex numeric entity",
+"input":"&#x10000000000000041;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"65-bit decimal numeric entity",
+"input":"&#18446744073709551681;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"Surrogate code point edge cases",
+"input":"&#xD7FF;&#xD800;&#xD801;&#xDFFE;&#xDFFF;&#xE000;",
+"output":[["Character", "\uD7FF"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD\uE000"]]},
+
+{"description":"Uppercase start tag name",
+"input":"<X>",
+"output":[["StartTag", "x", {}]]},
+
+{"description":"Uppercase end tag name",
+"input":"</X>",
+"output":[["EndTag", "x"]]},
+
+{"description":"Uppercase attribute name",
+"input":"<x X>",
+"output":[["StartTag", "x", { "x":"" }]]},
+
+{"description":"Tag/attribute name case edge values",
+"input":"<x@AZ[`az{ @AZ[`az{>",
+"output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]},
+
+{"description":"Duplicate different-case attributes",
+"input":"<x x=1 x=2 X=3>",
+"output":["ParseError", "ParseError", ["StartTag", "x", { "x":"1" }]]},
+
+{"description":"Uppercase close tag attributes",
+"input":"</x X>",
+"output":["ParseError", ["EndTag", "x"]]},
+
+{"description":"Duplicate close tag attributes",
+"input":"</x x x>",
+"output":["ParseError", "ParseError", ["EndTag", "x"]]},
+
+{"description":"Permitted slash",
+"input":"<br/>",
+"output":[["StartTag","br",{},true]]},
+
+{"description":"Non-permitted slash",
+"input":"<xr/>",
+"output":[["StartTag","xr",{},true]]},
+
+{"description":"Permitted slash but in close tag",
+"input":"</br/>",
+"output":["ParseError", ["EndTag", "br"]]},
+
+{"description":"Doctype public case-sensitivity (1)",
+"input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">",
+"output":[["DOCTYPE", "HtMl", "AbC", "XyZ", true]]},
+
+{"description":"Doctype public case-sensitivity (2)",
+"input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">",
+"output":[["DOCTYPE", "hTmL", "aBc", "xYz", true]]},
+
+{"description":"Doctype system case-sensitivity (1)",
+"input":"<!DoCtYpE HtMl SyStEm \"XyZ\">",
+"output":[["DOCTYPE", "HtMl", null, "XyZ", true]]},
+
+{"description":"Doctype system case-sensitivity (2)",
+"input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
+"output":[["DOCTYPE", "hTmL", null, "xYz", true]]},
+
+{"description":"U+0000 in lookahead region after non-matching character",
+"input":"<!doc>\u0000",
+"output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\uFFFD"]],
+"ignoreErrorOrder":true},
+
+{"description":"U+0000 in lookahead region",
+"input":"<!doc\u0000",
+"output":["ParseError", "ParseError", ["Comment", "doc\uFFFD"]],
+"ignoreErrorOrder":true},
+
+{"description":"U+0080 in lookahead region",
+"input":"<!doc\u0080",
+"output":["ParseError", "ParseError", ["Comment", "doc\u0080"]],
+"ignoreErrorOrder":true},
+
+{"description":"U+FDD1 in lookahead region",
+"input":"<!doc\uFDD1",
+"output":["ParseError", "ParseError", ["Comment", "doc\uFDD1"]],
+"ignoreErrorOrder":true},
+
+{"description":"U+1FFFF in lookahead region",
+"input":"<!doc\uD83F\uDFFF",
+"output":["ParseError", "ParseError", ["Comment", "doc\uD83F\uDFFF"]],
+"ignoreErrorOrder":true},
+
+{"description":"CR followed by U+0000",
+"input":"\r\u0000",
+"output":["ParseError", ["Character", "\n\uFFFD"]],
+"ignoreErrorOrder":true},
+
+{"description":"CR followed by non-LF",
+"input":"\r?",
+"output":[["Character", "\n?"]]},
+
+{"description":"CR at EOF",
+"input":"\r",
+"output":[["Character", "\n"]]},
+
+{"description":"LF at EOF",
+"input":"\n",
+"output":[["Character", "\n"]]},
+
+{"description":"CR LF",
+"input":"\r\n",
+"output":[["Character", "\n"]]},
+
+{"description":"CR CR",
+"input":"\r\r",
+"output":[["Character", "\n\n"]]},
+
+{"description":"LF LF",
+"input":"\n\n",
+"output":[["Character", "\n\n"]]},
+
+{"description":"LF CR",
+"input":"\n\r",
+"output":[["Character", "\n\n"]]},
+
+{"description":"text CR CR CR text",
+"input":"text\r\r\rtext",
+"output":[["Character", "text\n\n\ntext"]]},
+
+{"description":"Doctype publik",
+"input":"<!DOCTYPE html PUBLIK \"AbC\" \"XyZ\">",
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
+
+{"description":"Doctype publi",
+"input":"<!DOCTYPE html PUBLI",
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
+
+{"description":"Doctype sistem",
+"input":"<!DOCTYPE html SISTEM \"AbC\">",
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
+
+{"description":"Doctype sys",
+"input":"<!DOCTYPE html SYS",
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]}
+
+]}