diff options
author | Andrew Sidwell <andy@entai.co.uk> | 2008-07-31 15:47:01 +0000 |
---|---|---|
committer | Andrew Sidwell <andy@entai.co.uk> | 2008-07-31 15:47:01 +0000 |
commit | 682ae68041597e9327ccf1b09cfa6a2b016679fc (patch) | |
tree | c5df29932ffade1bccab364eca4ae3efa671f70b /src | |
parent | 4394a6787388d8837ff89eb29878b04cdc6a93a4 (diff) | |
download | libhubbub-682ae68041597e9327ccf1b09cfa6a2b016679fc.tar.gz libhubbub-682ae68041597e9327ccf1b09cfa6a2b016679fc.tar.bz2 |
Handle CRs correctly everwhere.
svn path=/trunk/hubbub/; revision=4844
Diffstat (limited to 'src')
-rw-r--r-- | src/tokeniser/tokeniser.c | 126 |
1 files changed, 123 insertions, 3 deletions
diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c index d6a061c..c96863a 100644 --- a/src/tokeniser/tokeniser.c +++ b/src/tokeniser/tokeniser.c @@ -1515,6 +1515,22 @@ bool hubbub_tokeniser_handle_attribute_value_dq(hubbub_tokeniser *tokeniser) COLLECT_NOBUF(tokeniser->context.chars, len); COLLECT_CHAR(ctag->attributes[ctag->n_attributes - 1].value, u_fffd, sizeof(u_fffd)); + } else if (c == '\r') { + cptr = parserutils_inputstream_peek( + tokeniser->input, + tokeniser->context.chars.len + len, + &len); + + if (cptr == PARSERUTILS_INPUTSTREAM_OOD) { + return false; + } else if (cptr == PARSERUTILS_INPUTSTREAM_EOF || + CHAR(cptr) != '\n') { + COLLECT_CHAR(ctag->attributes[ + ctag->n_attributes - 1].value, + &lf, sizeof(lf)); + } + + COLLECT_NOBUF(tokeniser->context.chars, len); } else { COLLECT_NOBUF(tokeniser->context.chars, len); COLLECT_MS(ctag->attributes[ctag->n_attributes - 1].value, @@ -1557,6 +1573,22 @@ bool hubbub_tokeniser_handle_attribute_value_sq(hubbub_tokeniser *tokeniser) COLLECT_NOBUF(tokeniser->context.chars, len); COLLECT_CHAR(ctag->attributes[ctag->n_attributes - 1].value, u_fffd, sizeof(u_fffd)); + } else if (c == '\r') { + cptr = parserutils_inputstream_peek( + tokeniser->input, + tokeniser->context.chars.len + len, + &len); + + if (cptr == PARSERUTILS_INPUTSTREAM_OOD) { + return false; + } else if (cptr == PARSERUTILS_INPUTSTREAM_EOF || + CHAR(cptr) != '\n') { + COLLECT_CHAR(ctag->attributes[ + ctag->n_attributes - 1].value, + &lf, sizeof(lf)); + } + + COLLECT_NOBUF(tokeniser->context.chars, len); } else { COLLECT_NOBUF(tokeniser->context.chars, len); COLLECT_MS(ctag->attributes[ctag->n_attributes - 1].value, @@ -1767,6 +1799,22 @@ bool hubbub_tokeniser_handle_bogus_comment(hubbub_tokeniser *tokeniser) parserutils_buffer_append(tokeniser->buffer, u_fffd, sizeof(u_fffd)); comment->len += sizeof(u_fffd); + } else if (c == '\r') { + cptr = parserutils_inputstream_peek( + tokeniser->input, + tokeniser->context.chars.len + len, + &len); + + if (cptr == PARSERUTILS_INPUTSTREAM_OOD) { + return false; + } else if (cptr == PARSERUTILS_INPUTSTREAM_EOF || + CHAR(cptr) != '\n') { + parserutils_buffer_append(tokeniser->buffer, + &lf, sizeof(lf)); + comment->len += sizeof(lf); + } + + COLLECT_NOBUF(tokeniser->context.chars, len); } else { parserutils_buffer_append(tokeniser->buffer, (uint8_t *)cptr, len); @@ -1912,7 +1960,7 @@ bool hubbub_tokeniser_handle_comment(hubbub_tokeniser *tokeniser) } else if (c == '\r') { cptr = parserutils_inputstream_peek( tokeniser->input, - tokeniser->context.chars.len + 1, + tokeniser->context.chars.len + len, &len); if (cptr == PARSERUTILS_INPUTSTREAM_OOD) { return false; @@ -2234,9 +2282,21 @@ bool hubbub_tokeniser_handle_doctype_public_dq(hubbub_tokeniser *tokeniser) tokeniser->state = STATE_DATA; } else if (c == '\0') { if (cdoc->public_id.len == 0) { - START_BUF(cdoc->name, u_fffd, sizeof(u_fffd)); + START_BUF(cdoc->public_id, u_fffd, sizeof(u_fffd)); } else { - COLLECT_CHAR(cdoc->name, u_fffd, sizeof(u_fffd)); + COLLECT_CHAR(cdoc->public_id, u_fffd, sizeof(u_fffd)); + } + } else if (c == '\r') { + cptr = parserutils_inputstream_peek( + tokeniser->input, + tokeniser->context.chars.len + len, + &len); + + if (cptr == PARSERUTILS_INPUTSTREAM_OOD) { + return false; + } else if (cptr == PARSERUTILS_INPUTSTREAM_EOF || + CHAR(cptr) != '\n') { + COLLECT_CHAR(cdoc->public_id, &lf, sizeof(lf)); } } else { COLLECT_MS(cdoc->public_id, cptr, len); @@ -2279,6 +2339,18 @@ bool hubbub_tokeniser_handle_doctype_public_sq(hubbub_tokeniser *tokeniser) COLLECT_CHAR(cdoc->public_id, u_fffd, sizeof(u_fffd)); } + } else if (c == '\r') { + cptr = parserutils_inputstream_peek( + tokeniser->input, + tokeniser->context.chars.len + len, + &len); + + if (cptr == PARSERUTILS_INPUTSTREAM_OOD) { + return false; + } else if (cptr == PARSERUTILS_INPUTSTREAM_EOF || + CHAR(cptr) != '\n') { + COLLECT_CHAR(cdoc->public_id, &lf, sizeof(lf)); + } } else { COLLECT_MS(cdoc->public_id, cptr, len); } @@ -2444,6 +2516,18 @@ bool hubbub_tokeniser_handle_doctype_system_dq(hubbub_tokeniser *tokeniser) COLLECT_CHAR(cdoc->system_id, u_fffd, sizeof(u_fffd)); } + } else if (c == '\r') { + cptr = parserutils_inputstream_peek( + tokeniser->input, + tokeniser->context.chars.len + len, + &len); + + if (cptr == PARSERUTILS_INPUTSTREAM_OOD) { + return false; + } else if (cptr == PARSERUTILS_INPUTSTREAM_EOF || + CHAR(cptr) != '\n') { + COLLECT_CHAR(cdoc->system_id, &lf, sizeof(lf)); + } } else { COLLECT_MS(cdoc->system_id, cptr, len); } @@ -2484,6 +2568,18 @@ bool hubbub_tokeniser_handle_doctype_system_sq(hubbub_tokeniser *tokeniser) COLLECT_CHAR(cdoc->system_id, u_fffd, sizeof(u_fffd)); } + } else if (c == '\r') { + cptr = parserutils_inputstream_peek( + tokeniser->input, + tokeniser->context.chars.len + len, + &len); + + if (cptr == PARSERUTILS_INPUTSTREAM_OOD) { + return false; + } else if (cptr == PARSERUTILS_INPUTSTREAM_EOF || + CHAR(cptr) != '\n') { + COLLECT_CHAR(cdoc->system_id, &lf, sizeof(lf)); + } } else { COLLECT_MS(cdoc->system_id, cptr, len); } @@ -2641,6 +2737,30 @@ bool hubbub_tokeniser_handle_cdata_block(hubbub_tokeniser *tokeniser) parserutils_inputstream_advance(tokeniser->input, len); tokeniser->context.match_cdata.end = 0; + } else if (c == '\r') { + cptr = parserutils_inputstream_peek( + tokeniser->input, + tokeniser->context.chars.len + len, + &len); + + if (cptr == PARSERUTILS_INPUTSTREAM_OOD) { + break; + } + + if (tokeniser->context.chars.len > 0) { + /* Emit any pending characters */ + emit_current_chars(tokeniser); + } + + c = CHAR(cptr); + if (c != '\n') { + /* Emit newline */ + emit_character_token(tokeniser, &lf_str); + } + + /* Advance over */ + parserutils_inputstream_advance(tokeniser->input, len); + tokeniser->context.match_cdata.end = 0; } else { COLLECT_MS_NOBUF(tokeniser->context.chars, cptr, len); tokeniser->context.match_cdata.end = 0; |