From 9dd96deaf640232f359a001c56914e40104689d3 Mon Sep 17 00:00:00 2001 From: Andrew Sidwell Date: Tue, 17 Jun 2008 03:22:18 +0000 Subject: Fix up the comment states some more. svn path=/trunk/hubbub/; revision=4370 --- src/tokeniser/tokeniser.c | 70 ++++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 31 deletions(-) (limited to 'src/tokeniser') diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c index edfa26e..20eea4e 100644 --- a/src/tokeniser/tokeniser.c +++ b/src/tokeniser/tokeniser.c @@ -4,7 +4,6 @@ * http://www.opensource.org/licenses/mit-license.php * Copyright 2007 John-Mark Bell */ - #include #include @@ -1812,6 +1811,9 @@ bool hubbub_tokeniser_handle_match_comment(hubbub_tokeniser *tokeniser) if (c == HUBBUB_INPUTSTREAM_OOD) return false; + tokeniser->context.current_comment.data.off = 0; + tokeniser->context.current_comment.len = 0; + if (c == '-') { tokeniser->state = HUBBUB_TOKENISER_STATE_COMMENT_START; hubbub_inputstream_advance(tokeniser->input); @@ -1819,9 +1821,6 @@ bool hubbub_tokeniser_handle_match_comment(hubbub_tokeniser *tokeniser) /* Rewind to the first '-' */ hubbub_inputstream_rewind(tokeniser->input, 1); - tokeniser->context.current_comment.data.off = 0; - tokeniser->context.current_comment.len = 0; - tokeniser->state = HUBBUB_TOKENISER_STATE_BOGUS_COMMENT; } @@ -1836,10 +1835,6 @@ bool hubbub_tokeniser_handle_comment_start(hubbub_tokeniser *tokeniser) if (c == HUBBUB_INPUTSTREAM_OOD) return false; - tokeniser->context.current_comment.data.off = 0; - tokeniser->context.current_comment.len = 0; - - if (c == '-') { tokeniser->state = HUBBUB_TOKENISER_STATE_COMMENT_START_DASH; hubbub_inputstream_advance(tokeniser->input); @@ -1870,12 +1865,12 @@ bool hubbub_tokeniser_handle_comment_start(hubbub_tokeniser *tokeniser) pos = hubbub_inputstream_cur_pos(tokeniser->input, &len); - tokeniser->context.current_comment.data.off = pos; - tokeniser->context.current_comment.len = len; - - hubbub_inputstream_advance(tokeniser->input); + if (tokeniser->context.current_comment.len == 0) + tokeniser->context.current_comment.data.off = pos; + tokeniser->context.current_comment.len += len; tokeniser->state = HUBBUB_TOKENISER_STATE_COMMENT; + hubbub_inputstream_advance(tokeniser->input); } return true; @@ -1924,7 +1919,9 @@ bool hubbub_tokeniser_handle_comment_start_dash(hubbub_tokeniser *tokeniser) pos = hubbub_inputstream_cur_pos(tokeniser->input, &len); - tokeniser->context.current_comment.data.off = pos; + if (tokeniser->context.current_comment.len == 0) + tokeniser->context.current_comment.data.off = pos; + tokeniser->context.current_comment.len += len; hubbub_inputstream_advance(tokeniser->input); @@ -1995,21 +1992,24 @@ bool hubbub_tokeniser_handle_comment_end_dash(hubbub_tokeniser *tokeniser) uint32_t pos; size_t len; + /* In order to get to this state, the previous character must + * be '-'. This means we can safely rewind and add to the + * comment buffer. */ + + hubbub_inputstream_rewind(tokeniser->input, 1); + pos = hubbub_inputstream_cur_pos(tokeniser->input, &len); - if (tokeniser->context.current_comment.len == 0) { + if (tokeniser->context.current_comment.len == 0) tokeniser->context.current_comment.data.off = pos; - } else { - /* Need to do this to get length of '-' */ - len += pos - - tokeniser->context.current_comment.data.off; - } + tokeniser->context.current_comment.len += len; + hubbub_inputstream_advance(tokeniser->input); - tokeniser->context.current_comment.len = len; + pos = hubbub_inputstream_cur_pos(tokeniser->input, &len); + tokeniser->context.current_comment.len += len; + hubbub_inputstream_advance(tokeniser->input); tokeniser->state = HUBBUB_TOKENISER_STATE_COMMENT; - - hubbub_inputstream_advance(tokeniser->input); } return true; @@ -2066,21 +2066,29 @@ bool hubbub_tokeniser_handle_comment_end(hubbub_tokeniser *tokeniser) uint32_t pos; size_t len; + /* In order to have got here, the previous two characters + * must be '--', so rewind two characters */ + hubbub_inputstream_rewind(tokeniser->input, 2); + + /* Add first '-' */ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len); - if (tokeniser->context.current_comment.len == 0) { + if (tokeniser->context.current_comment.len == 0) tokeniser->context.current_comment.data.off = pos; - } else { - /* Need to do this to get length of '--' */ - len += pos - - tokeniser->context.current_comment.data.off; - } - - tokeniser->context.current_comment.len = len; + tokeniser->context.current_comment.len += len; + hubbub_inputstream_advance(tokeniser->input); - tokeniser->state = HUBBUB_TOKENISER_STATE_COMMENT; + /* Add second '-' */ + pos = hubbub_inputstream_cur_pos(tokeniser->input, &len); + tokeniser->context.current_comment.len += len; + hubbub_inputstream_advance(tokeniser->input); + /* Add input character */ + pos = hubbub_inputstream_cur_pos(tokeniser->input, &len); + tokeniser->context.current_comment.len += len; hubbub_inputstream_advance(tokeniser->input); + + tokeniser->state = HUBBUB_TOKENISER_STATE_COMMENT; } return true; -- cgit v1.2.3