From a6a2f7a824491d82af40f1ce8699ef4eb7d192e3 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Tue, 10 Mar 2009 12:49:49 +0000 Subject: Sync tokeniser tests with html5lib. Sync tokeniser implementation with the spec. Fix handling of \0 in the tag open state. The unicodeCharacters test is disabled, as json-c doesn't like it. svn path=/trunk/hubbub/; revision=6755 --- src/tokeniser/tokeniser.c | 31 +- test/data/tokeniser2/INDEX | 2 + test/data/tokeniser2/contentModelFlags.test | 56 +- test/data/tokeniser2/escapeFlag.test | 30 +- test/data/tokeniser2/numericEntities.test | 1311 ++++++ test/data/tokeniser2/test1.test | 14 +- test/data/tokeniser2/test2.test | 10 +- test/data/tokeniser2/test3.test | 6134 +++++++++++++++++++++++++-- test/data/tokeniser2/test4.test | 24 +- test/data/tokeniser2/unicodeChars.test | 1303 ++++++ 10 files changed, 8610 insertions(+), 305 deletions(-) create mode 100644 test/data/tokeniser2/numericEntities.test create mode 100644 test/data/tokeniser2/unicodeChars.test diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c index 7f68676..3b2fa9e 100644 --- a/src/tokeniser/tokeniser.c +++ b/src/tokeniser/tokeniser.c @@ -869,15 +869,6 @@ hubbub_error hubbub_tokeniser_handle_tag_open(hubbub_tokeniser *tokeniser) START_BUF(ctag->name, cptr, len); ctag->n_attributes = 0; - tokeniser->state = STATE_TAG_NAME; - } else if (c == '\0') { - tokeniser->context.pending += len; - tokeniser->context.current_tag_type = - HUBBUB_TOKEN_START_TAG; - - START_BUF(ctag->name, u_fffd, sizeof(u_fffd)); - ctag->n_attributes = 0; - tokeniser->state = STATE_TAG_NAME; } else if (c == '>') { /** \todo parse error */ @@ -1274,7 +1265,7 @@ hubbub_error hubbub_tokeniser_handle_after_attribute_name( } else { hubbub_attribute *attr; - if (c == '"' || c == '\'' || c == '=') { + if (c == '"' || c == '\'') { /** \todo parse error */ } @@ -1327,6 +1318,7 @@ hubbub_error hubbub_tokeniser_handle_before_attribute_value( if (error != PARSERUTILS_OK) { if (error == PARSERUTILS_EOF) { + /** \todo parse error */ tokeniser->state = STATE_DATA; return emit_current_tag(tokeniser); } else { @@ -1347,6 +1339,7 @@ hubbub_error hubbub_tokeniser_handle_before_attribute_value( tokeniser->context.pending += len; tokeniser->state = STATE_ATTRIBUTE_VALUE_SQ; } else if (c == '>') { + /** \todo parse error */ tokeniser->context.pending += len; tokeniser->state = STATE_DATA; @@ -1357,6 +1350,10 @@ hubbub_error hubbub_tokeniser_handle_before_attribute_value( u_fffd, sizeof(u_fffd)); tokeniser->state = STATE_ATTRIBUTE_VALUE_UQ; } else { + if (c == '=') { + /** \todo parse error */ + } + tokeniser->context.pending += len; START_BUF(ctag->attributes[ctag->n_attributes - 1].value, cptr, len); @@ -1991,6 +1988,7 @@ hubbub_error hubbub_tokeniser_handle_before_doctype_name( if (error != PARSERUTILS_OK) { if (error == PARSERUTILS_EOF) { + /** \todo parse error */ /* Emit current doctype, force-quirks on */ tokeniser->state = STATE_DATA; return emit_current_doctype(tokeniser, true); @@ -2005,11 +2003,16 @@ hubbub_error hubbub_tokeniser_handle_before_doctype_name( if (c == '\t' || c == '\n' || c == '\f' || c == ' ' || c == '\r') { /* pass over in silence */ } else if (c == '>') { + /** \todo parse error */ tokeniser->state = STATE_DATA; return emit_current_doctype(tokeniser, true); } else { if (c == '\0') { START_BUF(cdoc->name, u_fffd, sizeof(u_fffd)); + } else if ('A' <= c && c <= 'Z') { + uint8_t lc = c + 0x20; + + START_BUF(cdoc->name, &lc, len); } else { START_BUF(cdoc->name, cptr, len); } @@ -2050,6 +2053,9 @@ hubbub_error hubbub_tokeniser_handle_doctype_name(hubbub_tokeniser *tokeniser) return emit_current_doctype(tokeniser, false); } else if (c == '\0') { COLLECT(cdoc->name, u_fffd, sizeof(u_fffd)); + } else if ('A' <= c && c <= 'Z') { + uint8_t lc = c + 0x20; + COLLECT(cdoc->name, &lc, len); } else { COLLECT(cdoc->name, cptr, len); } @@ -2886,11 +2892,12 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity( cp = cp1252Table[cp - 0x80]; } else if (cp == 0x0D) { cp = 0x000A; - } else if (ctx->match_entity.overflow || cp <= 0x0008 || + } else if (ctx->match_entity.overflow || + cp <= 0x0008 || cp == 0x000B || (0x000E <= cp && cp <= 0x001F) || (0x007F <= cp && cp <= 0x009F) || (0xD800 <= cp && cp <= 0xDFFF) || - (0xFDD0 <= cp && cp <= 0xFDDF) || + (0xFDD0 <= cp && cp <= 0xFDEF) || (cp & 0xFFFE) == 0xFFFE) { /* the check for cp > 0x10FFFF per spec is performed * in the loop above to avoid overflow */ diff --git a/test/data/tokeniser2/INDEX b/test/data/tokeniser2/INDEX index 49dc102..f16feb4 100644 --- a/test/data/tokeniser2/INDEX +++ b/test/data/tokeniser2/INDEX @@ -9,5 +9,7 @@ test4.test html5lib tests (part 4) contentModelFlags.test html5lib content model tests entities.test html5lib entity tests escapeFlag.test html5lib escape flag tests +numericEntities.test html5lib numeric entities tests +#unicodeChars.test html5lib unicode character tests cdata.test CDATA section tests regression.test Regression tests diff --git a/test/data/tokeniser2/contentModelFlags.test b/test/data/tokeniser2/contentModelFlags.test index b4f450a..1dec3e8 100644 --- a/test/data/tokeniser2/contentModelFlags.test +++ b/test/data/tokeniser2/contentModelFlags.test @@ -8,37 +8,61 @@ {"description":"End tag closing RCDATA or CDATA", "contentModelFlags":["RCDATA", "CDATA"], -"lastStartTag":"pre", -"input":"foo", -"output":[["Character", "foo"], ["EndTag", "pre"]]}, +"lastStartTag":"xmp", +"input":"foo", +"output":[["Character", "foo"], ["EndTag", "xmp"]]}, {"description":"End tag closing RCDATA or CDATA (case-insensitivity)", "contentModelFlags":["RCDATA", "CDATA"], -"lastStartTag":"pre", -"input":"foo", -"output":[["Character", "foo"], ["EndTag", "pre"]]}, +"lastStartTag":"xmp", +"input":"foo", +"output":[["Character", "foo"], ["EndTag", "xmp"]]}, + +{"description":"End tag closing RCDATA or CDATA (ending with space)", +"contentModelFlags":["RCDATA", "CDATA"], +"lastStartTag":"xmp", +"input":"foobar", -"output":[["Character", "bar"], ["EndTag", "pre"]]}, +"lastStartTag":"xmp", +"input":"bar", +"output":[["Character", "bar"], ["EndTag", "xmp"]]}, {"description":"End tag with incorrect name in RCDATA or CDATA (starting like correct name)", "contentModelFlags":["RCDATA", "CDATA"], -"lastStartTag":"pre", -"input":"bar", -"output":[["Character", "bar"]]}, +"lastStartTag":"xmp", +"input":"bar", +"output":[["Character", "bar"]]}, {"description":"End tag closing RCDATA or CDATA, switching back to PCDATA", "contentModelFlags":["RCDATA", "CDATA"], -"lastStartTag":"pre", -"input":"foo", -"output":[["Character", "foo"], ["EndTag", "pre"], ["EndTag", "baz"]]}, +"lastStartTag":"xmp", +"input":"foo", +"output":[["Character", "foo"], ["EndTag", "xmp"], ["EndTag", "baz"]]}, {"description":"CDATA w/ something looking like an entity", "contentModelFlags":["CDATA"], -"lastStartTag":"pre", +"lastStartTag":"xmp", "input":"&foo;", "output":[["Character", "&foo;"]]}, diff --git a/test/data/tokeniser2/escapeFlag.test b/test/data/tokeniser2/escapeFlag.test index 8736c3c..4c4bf51 100644 --- a/test/data/tokeniser2/escapeFlag.test +++ b/test/data/tokeniser2/escapeFlag.test @@ -2,32 +2,32 @@ {"description":"Commented close tag in [R]CDATA", "contentModelFlags":["RCDATA", "CDATA"], -"lastStartTag":"pre", -"input":"foo", -"output":[["Character", "foo"], ["EndTag", "pre"]]}, +"lastStartTag":"xmp", +"input":"foo", +"output":[["Character", "foo"], ["EndTag", "xmp"]]}, {"description":"Bogus comment in [R]CDATA", "contentModelFlags":["RCDATA", "CDATA"], -"lastStartTag":"pre", -"input":"foobaz", -"output":[["Character", "foobaz"], ["EndTag", "pre"]]}, +"lastStartTag":"xmp", +"input":"foobaz", +"output":[["Character", "foobaz"], ["EndTag", "xmp"]]}, {"description":"End tag surrounded by bogus comment in [R]CDATA", "contentModelFlags":["RCDATA", "CDATA"], -"lastStartTag":"pre", -"input":"foobaz", -"output":[["Character", "foo"], ["EndTag", "pre"], "ParseError", ["Comment", ""], ["Character", "baz"], ["EndTag", "pre"]]}, +"lastStartTag":"xmp", +"input":"foobaz", +"output":[["Character", "foo"], ["EndTag", "xmp"], "ParseError", ["Comment", ""], ["Character", "baz"], ["EndTag", "xmp"]]}, {"description":"Commented entities in RCDATA", "contentModelFlags":["RCDATA"], -"lastStartTag":"pre", -"input":" & & ", -"output":[["Character", " & & "], ["EndTag", "pre"]]}, +"lastStartTag":"xmp", +"input":" & & ", +"output":[["Character", " & & "], ["EndTag", "xmp"]]}, {"description":"Incorrect comment ending sequences in [R]CDATA", "contentModelFlags":["RCDATA", "CDATA"], -"lastStartTag":"pre", -"input":"foox--<>", -"output":[["Character", "foox--<>"]]} +"lastStartTag":"xmp", +"input":"foox--<>", +"output":[["Character", "foox--<>"]]} ]} diff --git a/test/data/tokeniser2/numericEntities.test b/test/data/tokeniser2/numericEntities.test new file mode 100644 index 0000000..78a8a13 --- /dev/null +++ b/test/data/tokeniser2/numericEntities.test @@ -0,0 +1,1311 @@ +{"tests": [ + +{"description": "Invalid numeric entity character U+0000", +"input": "�", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+0001", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+0002", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+0003", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+0004", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+0005", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+0006", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+0007", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+0008", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+000B", +"input": " ", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+000E", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+000F", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+0010", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+0011", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+0012", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+0013", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+0014", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+0015", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+0016", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+0017", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+0018", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+0019", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+001A", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+001B", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+001C", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+001D", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+001E", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+001F", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+007F", +"input": "", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+D800", +"input": "�", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+DFFF", +"input": "�", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDD0", +"input": "﷐", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDD1", +"input": "﷑", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDD2", +"input": "﷒", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDD3", +"input": "﷓", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDD4", +"input": "﷔", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDD5", +"input": "﷕", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDD6", +"input": "﷖", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDD7", +"input": "﷗", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDD8", +"input": "﷘", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDD9", +"input": "﷙", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDDA", +"input": "﷚", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDDB", +"input": "﷛", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDDC", +"input": "﷜", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDDD", +"input": "﷝", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDDE", +"input": "﷞", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDDF", +"input": "﷟", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDE0", +"input": "﷠", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDE1", +"input": "﷡", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDE2", +"input": "﷢", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDE3", +"input": "﷣", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDE4", +"input": "﷤", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDE5", +"input": "﷥", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDE6", +"input": "﷦", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDE7", +"input": "﷧", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDE8", +"input": "﷨", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDE9", +"input": "﷩", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDEA", +"input": "﷪", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDEB", +"input": "﷫", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDEC", +"input": "﷬", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDED", +"input": "﷭", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDEE", +"input": "﷮", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FDEF", +"input": "﷯", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FFFE", +"input": "￾", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FFFF", +"input": "￿", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+1FFFE", +"input": "🿾", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+1FFFF", +"input": "🿿", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+2FFFE", +"input": "𯿾", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+2FFFF", +"input": "𯿿", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+3FFFE", +"input": "𿿾", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+3FFFF", +"input": "𿿿", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+4FFFE", +"input": "񏿾", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+4FFFF", +"input": "񏿿", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+5FFFE", +"input": "񟿾", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+5FFFF", +"input": "񟿿", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+6FFFE", +"input": "񯿾", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+6FFFF", +"input": "񯿿", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+7FFFE", +"input": "񿿾", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+7FFFF", +"input": "񿿿", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+8FFFE", +"input": "򏿾", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+8FFFF", +"input": "򏿿", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+9FFFE", +"input": "򟿾", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+9FFFF", +"input": "򟿿", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+AFFFE", +"input": "򯿾", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+AFFFF", +"input": "򯿿", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+BFFFE", +"input": "򿿾", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+BFFFF", +"input": "򿿿", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+CFFFE", +"input": "󏿾", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+CFFFF", +"input": "󏿿", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+DFFFE", +"input": "󟿾", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+DFFFF", +"input": "󟿿", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+EFFFE", +"input": "󯿾", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+EFFFF", +"input": "󯿿", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FFFFE", +"input": "󿿾", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+FFFFF", +"input": "󿿿", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+10FFFE", +"input": "􏿾", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Invalid numeric entity character U+10FFFF", +"input": "􏿿", +"output": ["ParseError", ["Character", "\uFFFD"]]}, + +{"description": "Valid numeric entity character U+0009", +"input": " ", +"output": [["Character", "\u0009"]]}, + +{"description": "Valid numeric entity character U+000A", +"input": " ", +"output": [["Character", "\u000A"]]}, + +{"description": "Valid numeric entity character U+0020", +"input": " ", +"output": [["Character", "\u0020"]]}, + +{"description": "Valid numeric entity character U+0021", +"input": "!", +"output": [["Character", "\u0021"]]}, + +{"description": "Valid numeric entity character U+0022", +"input": """, +"output": [["Character", "\u0022"]]}, + +{"description": "Valid numeric entity character U+0023", +"input": "#", +"output": [["Character", "\u0023"]]}, + +{"description": "Valid numeric entity character U+0024", +"input": "$", +"output": [["Character", "\u0024"]]}, + +{"description": "Valid numeric entity character U+0025", +"input": "%", +"output": [["Character", "\u0025"]]}, + +{"description": "Valid numeric entity character U+0026", +"input": "&", +"output": [["Character", "\u0026"]]}, + +{"description": "Valid numeric entity character U+0027", +"input": "'", +"output": [["Character", "\u0027"]]}, + +{"description": "Valid numeric entity character U+0028", +"input": "(", +"output": [["Character", "\u0028"]]}, + +{"description": "Valid numeric entity character U+0029", +"input": ")", +"output": [["Character", "\u0029"]]}, + +{"description": "Valid numeric entity character U+002A", +"input": "*", +"output": [["Character", "\u002A"]]}, + +{"description": "Valid numeric entity character U+002B", +"input": "+", +"output": [["Character", "\u002B"]]}, + +{"description": "Valid numeric entity character U+002C", +"input": ",", +"output": [["Character", "\u002C"]]}, + +{"description": "Valid numeric entity character U+002D", +"input": "-", +"output": [["Character", "\u002D"]]}, + +{"description": "Valid numeric entity character U+002E", +"input": ".", +"output": [["Character", "\u002E"]]}, + +{"description": "Valid numeric entity character U+002F", +"input": "/", +"output": [["Character", "\u002F"]]}, + +{"description": "Valid numeric entity character U+0030", +"input": "0", +"output": [["Character", "\u0030"]]}, + +{"description": "Valid numeric entity character U+0031", +"input": "1", +"output": [["Character", "\u0031"]]}, + +{"description": "Valid numeric entity character U+0032", +"input": "2", +"output": [["Character", "\u0032"]]}, + +{"description": "Valid numeric entity character U+0033", +"input": "3", +"output": [["Character", "\u0033"]]}, + +{"description": "Valid numeric entity character U+0034", +"input": "4", +"output": [["Character", "\u0034"]]}, + +{"description": "Valid numeric entity character U+0035", +"input": "5", +"output": [["Character", "\u0035"]]}, + +{"description": "Valid numeric entity character U+0036", +"input": "6", +"output": [["Character", "\u0036"]]}, + +{"description": "Valid numeric entity character U+0037", +"input": "7", +"output": [["Character", "\u0037"]]}, + +{"description": "Valid numeric entity character U+0038", +"input": "8", +"output": [["Character", "\u0038"]]}, + +{"description": "Valid numeric entity character U+0039", +"input": "9", +"output": [["Character", "\u0039"]]}, + +{"description": "Valid numeric entity character U+003A", +"input": ":", +"output": [["Character", "\u003A"]]}, + +{"description": "Valid numeric entity character U+003B", +"input": ";", +"output": [["Character", "\u003B"]]}, + +{"description": "Valid numeric entity character U+003C", +"input": "<", +"output": [["Character", "\u003C"]]}, + +{"description": "Valid numeric entity character U+003D", +"input": "=", +"output": [["Character", "\u003D"]]}, + +{"description": "Valid numeric entity character U+003E", +"input": ">", +"output": [["Character", "\u003E"]]}, + +{"description": "Valid numeric entity character U+003F", +"input": "?", +"output": [["Character", "\u003F"]]}, + +{"description": "Valid numeric entity character U+0040", +"input": "@", +"output": [["Character", "\u0040"]]}, + +{"description": "Valid numeric entity character U+0041", +"input": "A", +"output": [["Character", "\u0041"]]}, + +{"description": "Valid numeric entity character U+0042", +"input": "B", +"output": [["Character", "\u0042"]]}, + +{"description": "Valid numeric entity character U+0043", +"input": "C", +"output": [["Character", "\u0043"]]}, + +{"description": "Valid numeric entity character U+0044", +"input": "D", +"output": [["Character", "\u0044"]]}, + +{"description": "Valid numeric entity character U+0045", +"input": "E", +"output": [["Character", "\u0045"]]}, + +{"description": "Valid numeric entity character U+0046", +"input": "F", +"output": [["Character", "\u0046"]]}, + +{"description": "Valid numeric entity character U+0047", +"input": "G", +"output": [["Character", "\u0047"]]}, + +{"description": "Valid numeric entity character U+0048", +"input": "H", +"output": [["Character", "\u0048"]]}, + +{"description": "Valid numeric entity character U+0049", +"input": "I", +"output": [["Character", "\u0049"]]}, + +{"description": "Valid numeric entity character U+004A", +"input": "J", +"output": [["Character", "\u004A"]]}, + +{"description": "Valid numeric entity character U+004B", +"input": "K", +"output": [["Character", "\u004B"]]}, + +{"description": "Valid numeric entity character U+004C", +"input": "L", +"output": [["Character", "\u004C"]]}, + +{"description": "Valid numeric entity character U+004D", +"input": "M", +"output": [["Character", "\u004D"]]}, + +{"description": "Valid numeric entity character U+004E", +"input": "N", +"output": [["Character", "\u004E"]]}, + +{"description": "Valid numeric entity character U+004F", +"input": "O", +"output": [["Character", "\u004F"]]}, + +{"description": "Valid numeric entity character U+0050", +"input": "P", +"output": [["Character", "\u0050"]]}, + +{"description": "Valid numeric entity character U+0051", +"input": "Q", +"output": [["Character", "\u0051"]]}, + +{"description": "Valid numeric entity character U+0052", +"input": "R", +"output": [["Character", "\u0052"]]}, + +{"description": "Valid numeric entity character U+0053", +"input": "S", +"output": [["Character", "\u0053"]]}, + +{"description": "Valid numeric entity character U+0054", +"input": "T", +"output": [["Character", "\u0054"]]}, + +{"description": "Valid numeric entity character U+0055", +"input": "U", +"output": [["Character", "\u0055"]]}, + +{"description": "Valid numeric entity character U+0056", +"input": "V", +"output": [["Character", "\u0056"]]}, + +{"description": "Valid numeric entity character U+0057", +"input": "W", +"output": [["Character", "\u0057"]]}, + +{"description": "Valid numeric entity character U+0058", +"input": "X", +"output": [["Character", "\u0058"]]}, + +{"description": "Valid numeric entity character U+0059", +"input": "Y", +"output": [["Character", "\u0059"]]}, + +{"description": "Valid numeric entity character U+005A", +"input": "Z", +"output": [["Character", "\u005A"]]}, + +{"description": "Valid numeric entity character U+005B", +"input": "[", +"output": [["Character", "\u005B"]]}, + +{"description": "Valid numeric entity character U+005C", +"input": "\", +"output": [["Character", "\u005C"]]}, + +{"description": "Valid numeric entity character U+005D", +"input": "]", +"output": [["Character", "\u005D"]]}, + +{"description": "Valid numeric entity character U+005E", +"input": "^", +"output": [["Character", "\u005E"]]}, + +{"description": "Valid numeric entity character U+005F", +"input": "_", +"output": [["Character", "\u005F"]]}, + +{"description": "Valid numeric entity character U+0060", +"input": "`", +"output": [["Character", "\u0060"]]}, + +{"description": "Valid numeric entity character U+0061", +"input": "a", +"output": [["Character", "\u0061"]]}, + +{"description": "Valid numeric entity character U+0062", +"input": "b", +"output": [["Character", "\u0062"]]}, + +{"description": "Valid numeric entity character U+0063", +"input": "c", +"output": [["Character", "\u0063"]]}, + +{"description": "Valid numeric entity character U+0064", +"input": "d", +"output": [["Character", "\u0064"]]}, + +{"description": "Valid numeric entity character U+0065", +"input": "e", +"output": [["Character", "\u0065"]]}, + +{"description": "Valid numeric entity character U+0066", +"input": "f", +"output": [["Character", "\u0066"]]}, + +{"description": "Valid numeric entity character U+0067", +"input": "g", +"output": [["Character", "\u0067"]]}, + +{"description": "Valid numeric entity character U+0068", +"input": "h", +"output": [["Character", "\u0068"]]}, + +{"description": "Valid numeric entity character U+0069", +"input": "i", +"output": [["Character", "\u0069"]]}, + +{"description": "Valid numeric entity character U+006A", +"input": "j", +"output": [["Character", "\u006A"]]}, + +{"description": "Valid numeric entity character U+006B", +"input": "k", +"output": [["Character", "\u006B"]]}, + +{"description": "Valid numeric entity character U+006C", +"input": "l", +"output": [["Character", "\u006C"]]}, + +{"description": "Valid numeric entity character U+006D", +"input": "m", +"output": [["Character", "\u006D"]]}, + +{"description": "Valid numeric entity character U+006E", +"input": "n", +"output": [["Character", "\u006E"]]}, + +{"description": "Valid numeric entity character U+006F", +"input": "o", +"output": [["Character", "\u006F"]]}, + +{"description": "Valid numeric entity character U+0070", +"input": "p", +"output": [["Character", "\u0070"]]}, + +{"description": "Valid numeric entity character U+0071", +"input": "q", +"output": [["Character", "\u0071"]]}, + +{"description": "Valid numeric entity character U+0072", +"input": "r", +"output": [["Character", "\u0072"]]}, + +{"description": "Valid numeric entity character U+0073", +"input": "s", +"output": [["Character", "\u0073"]]}, + +{"description": "Valid numeric entity character U+0074", +"input": "t", +"output": [["Character", "\u0074"]]}, + +{"description": "Valid numeric entity character U+0075", +"input": "u", +"output": [["Character", "\u0075"]]}, + +{"description": "Valid numeric entity character U+0076", +"input": "v", +"output": [["Character", "\u0076"]]}, + +{"description": "Valid numeric entity character U+0077", +"input": "w", +"output": [["Character", "\u0077"]]}, + +{"description": "Valid numeric entity character U+0078", +"input": "x", +"output": [["Character", "\u0078"]]}, + +{"description": "Valid numeric entity character U+0079", +"input": "y", +"output": [["Character", "\u0079"]]}, + +{"description": "Valid numeric entity character U+007A", +"input": "z", +"output": [["Character", "\u007A"]]}, + +{"description": "Valid numeric entity character U+007B", +"input": "{", +"output": [["Character", "\u007B"]]}, + +{"description": "Valid numeric entity character U+007C", +"input": "|", +"output": [["Character", "\u007C"]]}, + +{"description": "Valid numeric entity character U+007D", +"input": "}", +"output": [["Character", "\u007D"]]}, + +{"description": "Valid numeric entity character U+007E", +"input": "~", +"output": [["Character", "\u007E"]]}, + +{"description": "Valid numeric entity character U+00A0", +"input": " ", +"output": [["Character", "\u00A0"]]}, + +{"description": "Valid numeric entity character U+00A1", +"input": "¡", +"output": [["Character", "\u00A1"]]}, + +{"description": "Valid numeric entity character U+00A2", +"input": "¢", +"output": [["Character", "\u00A2"]]}, + +{"description": "Valid numeric entity character U+00A3", +"input": "£", +"output": [["Character", "\u00A3"]]}, + +{"description": "Valid numeric entity character U+00A4", +"input": "¤", +"output": [["Character", "\u00A4"]]}, + +{"description": "Valid numeric entity character U+00A5", +"input": "¥", +"output": [["Character", "\u00A5"]]}, + +{"description": "Valid numeric entity character U+00A6", +"input": "¦", +"output": [["Character", "\u00A6"]]}, + +{"description": "Valid numeric entity character U+00A7", +"input": "§", +"output": [["Character", "\u00A7"]]}, + +{"description": "Valid numeric entity character U+00A8", +"input": "¨", +"output": [["Character", "\u00A8"]]}, + +{"description": "Valid numeric entity character U+00A9", +"input": "©", +"output": [["Character", "\u00A9"]]}, + +{"description": "Valid numeric entity character U+00AA", +"input": "ª", +"output": [["Character", "\u00AA"]]}, + +{"description": "Valid numeric entity character U+00AB", +"input": "«", +"output": [["Character", "\u00AB"]]}, + +{"description": "Valid numeric entity character U+00AC", +"input": "¬", +"output": [["Character", "\u00AC"]]}, + +{"description": "Valid numeric entity character U+00AD", +"input": "­", +"output": [["Character", "\u00AD"]]}, + +{"description": "Valid numeric entity character U+00AE", +"input": "®", +"output": [["Character", "\u00AE"]]}, + +{"description": "Valid numeric entity character U+00AF", +"input": "¯", +"output": [["Character", "\u00AF"]]}, + +{"description": "Valid numeric entity character U+00B0", +"input": "°", +"output": [["Character", "\u00B0"]]}, + +{"description": "Valid numeric entity character U+00B1", +"input": "±", +"output": [["Character", "\u00B1"]]}, + +{"description": "Valid numeric entity character U+00B2", +"input": "²", +"output": [["Character", "\u00B2"]]}, + +{"description": "Valid numeric entity character U+00B3", +"input": "³", +"output": [["Character", "\u00B3"]]}, + +{"description": "Valid numeric entity character U+00B4", +"input": "´", +"output": [["Character", "\u00B4"]]}, + +{"description": "Valid numeric entity character U+00B5", +"input": "µ", +"output": [["Character", "\u00B5"]]}, + +{"description": "Valid numeric entity character U+00B6", +"input": "¶", +"output": [["Character", "\u00B6"]]}, + +{"description": "Valid numeric entity character U+00B7", +"input": "·", +"output": [["Character", "\u00B7"]]}, + +{"description": "Valid numeric entity character U+00B8", +"input": "¸", +"output": [["Character", "\u00B8"]]}, + +{"description": "Valid numeric entity character U+00B9", +"input": "¹", +"output": [["Character", "\u00B9"]]}, + +{"description": "Valid numeric entity character U+00BA", +"input": "º", +"output": [["Character", "\u00BA"]]}, + +{"description": "Valid numeric entity character U+00BB", +"input": "»", +"output": [["Character", "\u00BB"]]}, + +{"description": "Valid numeric entity character U+00BC", +"input": "¼", +"output": [["Character", "\u00BC"]]}, + +{"description": "Valid numeric entity character U+00BD", +"input": "½", +"output": [["Character", "\u00BD"]]}, + +{"description": "Valid numeric entity character U+00BE", +"input": "¾", +"output": [["Character", "\u00BE"]]}, + +{"description": "Valid numeric entity character U+00BF", +"input": "¿", +"output": [["Character", "\u00BF"]]}, + +{"description": "Valid numeric entity character U+00C0", +"input": "À", +"output": [["Character", "\u00C0"]]}, + +{"description": "Valid numeric entity character U+00C1", +"input": "Á", +"output": [["Character", "\u00C1"]]}, + +{"description": "Valid numeric entity character U+00C2", +"input": "Â", +"output": [["Character", "\u00C2"]]}, + +{"description": "Valid numeric entity character U+00C3", +"input": "Ã", +"output": [["Character", "\u00C3"]]}, + +{"description": "Valid numeric entity character U+00C4", +"input": "Ä", +"output": [["Character", "\u00C4"]]}, + +{"description": "Valid numeric entity character U+00C5", +"input": "Å", +"output": [["Character", "\u00C5"]]}, + +{"description": "Valid numeric entity character U+00C6", +"input": "Æ", +"output": [["Character", "\u00C6"]]}, + +{"description": "Valid numeric entity character U+00C7", +"input": "Ç", +"output": [["Character", "\u00C7"]]}, + +{"description": "Valid numeric entity character U+00C8", +"input": "È", +"output": [["Character", "\u00C8"]]}, + +{"description": "Valid numeric entity character U+00C9", +"input": "É", +"output": [["Character", "\u00C9"]]}, + +{"description": "Valid numeric entity character U+00CA", +"input": "Ê", +"output": [["Character", "\u00CA"]]}, + +{"description": "Valid numeric entity character U+00CB", +"input": "Ë", +"output": [["Character", "\u00CB"]]}, + +{"description": "Valid numeric entity character U+00CC", +"input": "Ì", +"output": [["Character", "\u00CC"]]}, + +{"description": "Valid numeric entity character U+00CD", +"input": "Í", +"output": [["Character", "\u00CD"]]}, + +{"description": "Valid numeric entity character U+00CE", +"input": "Î", +"output": [["Character", "\u00CE"]]}, + +{"description": "Valid numeric entity character U+00CF", +"input": "Ï", +"output": [["Character", "\u00CF"]]}, + +{"description": "Valid numeric entity character U+00D0", +"input": "Ð", +"output": [["Character", "\u00D0"]]}, + +{"description": "Valid numeric entity character U+00D1", +"input": "Ñ", +"output": [["Character", "\u00D1"]]}, + +{"description": "Valid numeric entity character U+00D2", +"input": "Ò", +"output": [["Character", "\u00D2"]]}, + +{"description": "Valid numeric entity character U+00D3", +"input": "Ó", +"output": [["Character", "\u00D3"]]}, + +{"description": "Valid numeric entity character U+00D4", +"input": "Ô", +"output": [["Character", "\u00D4"]]}, + +{"description": "Valid numeric entity character U+00D5", +"input": "Õ", +"output": [["Character", "\u00D5"]]}, + +{"description": "Valid numeric entity character U+00D6", +"input": "Ö", +"output": [["Character", "\u00D6"]]}, + +{"description": "Valid numeric entity character U+00D7", +"input": "×", +"output": [["Character", "\u00D7"]]}, + +{"description": "Valid numeric entity character U+00D8", +"input": "Ø", +"output": [["Character", "\u00D8"]]}, + +{"description": "Valid numeric entity character U+00D9", +"input": "Ù", +"output": [["Character", "\u00D9"]]}, + +{"description": "Valid numeric entity character U+00DA", +"input": "Ú", +"output": [["Character", "\u00DA"]]}, + +{"description": "Valid numeric entity character U+00DB", +"input": "Û", +"output": [["Character", "\u00DB"]]}, + +{"description": "Valid numeric entity character U+00DC", +"input": "Ü", +"output": [["Character", "\u00DC"]]}, + +{"description": "Valid numeric entity character U+00DD", +"input": "Ý", +"output": [["Character", "\u00DD"]]}, + +{"description": "Valid numeric entity character U+00DE", +"input": "Þ", +"output": [["Character", "\u00DE"]]}, + +{"description": "Valid numeric entity character U+00DF", +"input": "ß", +"output": [["Character", "\u00DF"]]}, + +{"description": "Valid numeric entity character U+00E0", +"input": "à", +"output": [["Character", "\u00E0"]]}, + +{"description": "Valid numeric entity character U+00E1", +"input": "á", +"output": [["Character", "\u00E1"]]}, + +{"description": "Valid numeric entity character U+00E2", +"input": "â", +"output": [["Character", "\u00E2"]]}, + +{"description": "Valid numeric entity character U+00E3", +"input": "ã", +"output": [["Character", "\u00E3"]]}, + +{"description": "Valid numeric entity character U+00E4", +"input": "ä", +"output": [["Character", "\u00E4"]]}, + +{"description": "Valid numeric entity character U+00E5", +"input": "å", +"output": [["Character", "\u00E5"]]}, + +{"description": "Valid numeric entity character U+00E6", +"input": "æ", +"output": [["Character", "\u00E6"]]}, + +{"description": "Valid numeric entity character U+00E7", +"input": "ç", +"output": [["Character", "\u00E7"]]}, + +{"description": "Valid numeric entity character U+00E8", +"input": "è", +"output": [["Character", "\u00E8"]]}, + +{"description": "Valid numeric entity character U+00E9", +"input": "é", +"output": [["Character", "\u00E9"]]}, + +{"description": "Valid numeric entity character U+00EA", +"input": "ê", +"output": [["Character", "\u00EA"]]}, + +{"description": "Valid numeric entity character U+00EB", +"input": "ë", +"output": [["Character", "\u00EB"]]}, + +{"description": "Valid numeric entity character U+00EC", +"input": "ì", +"output": [["Character", "\u00EC"]]}, + +{"description": "Valid numeric entity character U+00ED", +"input": "í", +"output": [["Character", "\u00ED"]]}, + +{"description": "Valid numeric entity character U+00EE", +"input": "î", +"output": [["Character", "\u00EE"]]}, + +{"description": "Valid numeric entity character U+00EF", +"input": "ï", +"output": [["Character", "\u00EF"]]}, + +{"description": "Valid numeric entity character U+00F0", +"input": "ð", +"output": [["Character", "\u00F0"]]}, + +{"description": "Valid numeric entity character U+00F1", +"input": "ñ", +"output": [["Character", "\u00F1"]]}, + +{"description": "Valid numeric entity character U+00F2", +"input": "ò", +"output": [["Character", "\u00F2"]]}, + +{"description": "Valid numeric entity character U+00F3", +"input": "ó", +"output": [["Character", "\u00F3"]]}, + +{"description": "Valid numeric entity character U+00F4", +"input": "ô", +"output": [["Character", "\u00F4"]]}, + +{"description": "Valid numeric entity character U+00F5", +"input": "õ", +"output": [["Character", "\u00F5"]]}, + +{"description": "Valid numeric entity character U+00F6", +"input": "ö", +"output": [["Character", "\u00F6"]]}, + +{"description": "Valid numeric entity character U+00F7", +"input": "÷", +"output": [["Character", "\u00F7"]]}, + +{"description": "Valid numeric entity character U+00F8", +"input": "ø", +"output": [["Character", "\u00F8"]]}, + +{"description": "Valid numeric entity character U+00F9", +"input": "ù", +"output": [["Character", "\u00F9"]]}, + +{"description": "Valid numeric entity character U+00FA", +"input": "ú", +"output": [["Character", "\u00FA"]]}, + +{"description": "Valid numeric entity character U+00FB", +"input": "û", +"output": [["Character", "\u00FB"]]}, + +{"description": "Valid numeric entity character U+00FC", +"input": "ü", +"output": [["Character", "\u00FC"]]}, + +{"description": "Valid numeric entity character U+00FD", +"input": "ý", +"output": [["Character", "\u00FD"]]}, + +{"description": "Valid numeric entity character U+00FE", +"input": "þ", +"output": [["Character", "\u00FE"]]}, + +{"description": "Valid numeric entity character U+00FF", +"input": "ÿ", +"output": [["Character", "\u00FF"]]}, + +{"description": "Valid numeric entity character U+D7FF", +"input": "퟿", +"output": [["Character", "\uD7FF"]]}, + +{"description": "Valid numeric entity character U+E000", +"input": "", +"output": [["Character", "\uE000"]]}, + +{"description": "Valid numeric entity character U+FDCF", +"input": "﷏", +"output": [["Character", "\uFDCF"]]}, + +{"description": "Valid numeric entity character U+FDF0", +"input": "ﷰ", +"output": [["Character", "\uFDF0"]]}, + +{"description": "Valid numeric entity character U+FFFD", +"input": "�", +"output": [["Character", "\uFFFD"]]}, + +{"description": "Valid numeric entity character U+10000", +"input": "𐀀", +"output": [["Character", "\uD800\uDC00"]]}, + +{"description": "Valid numeric entity character U+1FFFD", +"input": "🿽", +"output": [["Character", "\uD83F\uDFFD"]]}, + +{"description": "Valid numeric entity character U+20000", +"input": "𠀀", +"output": [["Character", "\uD840\uDC00"]]}, + +{"description": "Valid numeric entity character U+2FFFD", +"input": "𯿽", +"output": [["Character", "\uD87F\uDFFD"]]}, + +{"description": "Valid numeric entity character U+30000", +"input": "𰀀", +"output": [["Character", "\uD880\uDC00"]]}, + +{"description": "Valid numeric entity character U+3FFFD", +"input": "𿿽", +"output": [["Character", "\uD8BF\uDFFD"]]}, + +{"description": "Valid numeric entity character U+40000", +"input": "񀀀", +"output": [["Character", "\uD8C0\uDC00"]]}, + +{"description": "Valid numeric entity character U+4FFFD", +"input": "񏿽", +"output": [["Character", "\uD8FF\uDFFD"]]}, + +{"description": "Valid numeric entity character U+50000", +"input": "񐀀", +"output": [["Character", "\uD900\uDC00"]]}, + +{"description": "Valid numeric entity character U+5FFFD", +"input": "񟿽", +"output": [["Character", "\uD93F\uDFFD"]]}, + +{"description": "Valid numeric entity character U+60000", +"input": "񠀀", +"output": [["Character", "\uD940\uDC00"]]}, + +{"description": "Valid numeric entity character U+6FFFD", +"input": "񯿽", +"output": [["Character", "\uD97F\uDFFD"]]}, + +{"description": "Valid numeric entity character U+70000", +"input": "񰀀", +"output": [["Character", "\uD980\uDC00"]]}, + +{"description": "Valid numeric entity character U+7FFFD", +"input": "񿿽", +"output": [["Character", "\uD9BF\uDFFD"]]}, + +{"description": "Valid numeric entity character U+80000", +"input": "򀀀", +"output": [["Character", "\uD9C0\uDC00"]]}, + +{"description": "Valid numeric entity character U+8FFFD", +"input": "򏿽", +"output": [["Character", "\uD9FF\uDFFD"]]}, + +{"description": "Valid numeric entity character U+90000", +"input": "򐀀", +"output": [["Character", "\uDA00\uDC00"]]}, + +{"description": "Valid numeric entity character U+9FFFD", +"input": "򟿽", +"output": [["Character", "\uDA3F\uDFFD"]]}, + +{"description": "Valid numeric entity character U+A0000", +"input": "򠀀", +"output": [["Character", "\uDA40\uDC00"]]}, + +{"description": "Valid numeric entity character U+AFFFD", +"input": "򯿽", +"output": [["Character", "\uDA7F\uDFFD"]]}, + +{"description": "Valid numeric entity character U+B0000", +"input": "򰀀", +"output": [["Character", "\uDA80\uDC00"]]}, + +{"description": "Valid numeric entity character U+BFFFD", +"input": "򿿽", +"output": [["Character", "\uDABF\uDFFD"]]}, + +{"description": "Valid numeric entity character U+C0000", +"input": "󀀀", +"output": [["Character", "\uDAC0\uDC00"]]}, + +{"description": "Valid numeric entity character U+CFFFD", +"input": "󏿽", +"output": [["Character", "\uDAFF\uDFFD"]]}, + +{"description": "Valid numeric entity character U+D0000", +"input": "󐀀", +"output": [["Character", "\uDB00\uDC00"]]}, + +{"description": "Valid numeric entity character U+DFFFD", +"input": "󟿽", +"output": [["Character", "\uDB3F\uDFFD"]]}, + +{"description": "Valid numeric entity character U+E0000", +"input": "󠀀", +"output": [["Character", "\uDB40\uDC00"]]}, + +{"description": "Valid numeric entity character U+EFFFD", +"input": "󯿽", +"output": [["Character", "\uDB7F\uDFFD"]]}, + +{"description": "Valid numeric entity character U+F0000", +"input": "󰀀", +"output": [["Character", "\uDB80\uDC00"]]}, + +{"description": "Valid numeric entity character U+FFFFD", +"input": "󿿽", +"output": [["Character", "\uDBBF\uDFFD"]]}, + +{"description": "Valid numeric entity character U+100000", +"input": "􀀀", +"output": [["Character", "\uDBC0\uDC00"]]}, + +{"description": "Valid numeric entity character U+10FFFD", +"input": "􏿽", +"output": [["Character", "\uDBFF\uDFFD"]]} + +]} diff --git a/test/data/tokeniser2/test1.test b/test/data/tokeniser2/test1.test index 46ad7ca..9431863 100644 --- a/test/data/tokeniser2/test1.test +++ b/test/data/tokeniser2/test1.test @@ -6,11 +6,15 @@ {"description":"Correct Doctype uppercase", "input":"", -"output":[["DOCTYPE", "HTML", null, null, true]]}, +"output":[["DOCTYPE", "html", null, null, true]]}, {"description":"Correct Doctype mixed case", "input":"", -"output":[["DOCTYPE", "HtMl", null, null, true]]}, +"output":[["DOCTYPE", "html", null, null, true]]}, + +{"description":"Correct Doctype case with EOF", +"input":"", @@ -171,6 +175,10 @@ {"description":"Entity in attribute without semicolon", "input":"", -"output":["ParseError", ["StartTag", "h", {"a":"\u00A9"}]]} +"output":["ParseError", ["StartTag", "h", {"a":"\u00A9"}]]}, + +{"description":"Unquoted attribute ending in ampersand", + "input":"", -"output":[["StartTag", "h", {"a":"", "b":""}]]} +"output":[["StartTag", "h", {"a":"", "b":""}]]}, -]} +{"description":"Double-quote after attribute name", +"input":"", +"output":["ParseError", ["StartTag", "h", {"a":"", "\"":""}]]}, +{"description":"Single-quote after attribute name", +"input":"", +"output":["ParseError", ["StartTag", "h", {"a":"", "'":""}]]} +]} diff --git a/test/data/tokeniser2/test3.test b/test/data/tokeniser2/test3.test index 516442d..593fc93 100644 --- a/test/data/tokeniser2/test3.test +++ b/test/data/tokeniser2/test3.test @@ -1,40 +1,568 @@ {"tests": [ +{"description":"", +"input":"", +"output":[]}, + +{"description":"\\u0000", +"input":"\u0000", +"output":["ParseError", ["Character", "\uFFFD"]]}, + +{"description":"\\u0009", +"input":"\u0009", +"output":[["Character", "\u0009"]]}, + +{"description":"\\u000A", +"input":"\u000A", +"output":[["Character", "\u000A"]]}, + +{"description":"\\u000B", +"input":"\u000B", +"output":["ParseError", ["Character", "\u000B"]]}, + +{"description":"\\u000C", +"input":"\u000C", +"output":[["Character", "\u000C"]]}, + +{"description":" ", +"input":" ", +"output":[["Character", " "]]}, + +{"description":"!", +"input":"!", +"output":[["Character", "!"]]}, + +{"description":"\"", +"input":"\"", +"output":[["Character", "\""]]}, + +{"description":"%", +"input":"%", +"output":[["Character", "%"]]}, + +{"description":"&", +"input":"&", +"output":[["Character", "&"]]}, + +{"description":"'", +"input":"'", +"output":[["Character", "'"]]}, + +{"description":",", +"input":",", +"output":[["Character", ","]]}, + +{"description":"-", +"input":"-", +"output":[["Character", "-"]]}, + +{"description":".", +"input":".", +"output":[["Character", "."]]}, + +{"description":"/", +"input":"/", +"output":[["Character", "/"]]}, + +{"description":"0", +"input":"0", +"output":[["Character", "0"]]}, + +{"description":"1", +"input":"1", +"output":[["Character", "1"]]}, + +{"description":"9", +"input":"9", +"output":[["Character", "9"]]}, + +{"description":";", +"input":";", +"output":[["Character", ";"]]}, + {"description":"<", "input":"<", "output":["ParseError", ["Character", "<"]]}, -{"description":"<>", -"input":"<>", -"output":["ParseError", ["Character", "<>"]]}, +{"description":"<\\u0000", +"input":"<\u0000", +"output":["ParseError", "ParseError", ["Character", "<\uFFFD"]]}, + +{"description":"<\\u0009", +"input":"<\u0009", +"output":["ParseError", ["Character", "<\u0009"]]}, + +{"description":"<\\u000A", +"input":"<\u000A", +"output":["ParseError", ["Character", "<\u000A"]]}, + +{"description":"<\\u000B", +"input":"<\u000B", +"output":["ParseError", "ParseError", ["Character", "<\u000B"]]}, + +{"description":"<\\u000C", +"input":"<\u000C", +"output":["ParseError", ["Character", "<\u000C"]]}, + +{"description":"< ", +"input":"< ", +"output":["ParseError", ["Character", "< "]]}, {"description":"", -"input":"", -"output":["ParseError", ["Comment", ""]]}, +{"description":"", -"input":"", -"output":["ParseError", ["Comment", ""]]}, +{"description":"", -"input":"", +{"description":"", -"input":"", -"output":[["Comment", ""]]}, +{"description":"", +"input":"", +"output":[["Comment", ""]]}, + +{"description":"", +"input":"", +"output":["ParseError", ["Comment", ""]]}, + {"description":"", +"input":"", +"output":["ParseError", ["Comment", ""]]}, -{"description":"", -"input":"", -"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]}, +{"description":"