From 9dd96deaf640232f359a001c56914e40104689d3 Mon Sep 17 00:00:00 2001
From: Andrew Sidwell <andy@entai.co.uk>
Date: Tue, 17 Jun 2008 03:22:18 +0000
Subject: Fix up the comment states some more.

svn path=/trunk/hubbub/; revision=4370
---
 src/tokeniser/tokeniser.c | 70 ++++++++++++++++++++++++++---------------------
 1 file changed, 39 insertions(+), 31 deletions(-)

(limited to 'src/tokeniser')

diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index edfa26e..20eea4e 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -4,7 +4,6 @@
  *                http://www.opensource.org/licenses/mit-license.php
  * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
  */
-
 #include <stdbool.h>
 #include <string.h>
 
@@ -1812,6 +1811,9 @@ bool hubbub_tokeniser_handle_match_comment(hubbub_tokeniser *tokeniser)
 	if (c == HUBBUB_INPUTSTREAM_OOD)
 		return false;
 
+	tokeniser->context.current_comment.data.off = 0;
+	tokeniser->context.current_comment.len = 0;
+
 	if (c == '-') {
 		tokeniser->state = HUBBUB_TOKENISER_STATE_COMMENT_START;
 		hubbub_inputstream_advance(tokeniser->input);
@@ -1819,9 +1821,6 @@ bool hubbub_tokeniser_handle_match_comment(hubbub_tokeniser *tokeniser)
 		/* Rewind to the first '-' */
 		hubbub_inputstream_rewind(tokeniser->input, 1);
 
-		tokeniser->context.current_comment.data.off = 0;
-		tokeniser->context.current_comment.len = 0;
-
 		tokeniser->state = HUBBUB_TOKENISER_STATE_BOGUS_COMMENT;
 	}
 
@@ -1836,10 +1835,6 @@ bool hubbub_tokeniser_handle_comment_start(hubbub_tokeniser *tokeniser)
 	if (c == HUBBUB_INPUTSTREAM_OOD)
 		return false;
 
-	tokeniser->context.current_comment.data.off = 0;
-	tokeniser->context.current_comment.len = 0;
-
-
 	if (c == '-') {
 		tokeniser->state = HUBBUB_TOKENISER_STATE_COMMENT_START_DASH;
 		hubbub_inputstream_advance(tokeniser->input);
@@ -1870,12 +1865,12 @@ bool hubbub_tokeniser_handle_comment_start(hubbub_tokeniser *tokeniser)
 
 		pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
 
-		tokeniser->context.current_comment.data.off = pos;
-		tokeniser->context.current_comment.len = len;
-
-		hubbub_inputstream_advance(tokeniser->input);
+		if (tokeniser->context.current_comment.len == 0)
+			tokeniser->context.current_comment.data.off = pos;
+		tokeniser->context.current_comment.len += len;
 
 		tokeniser->state = HUBBUB_TOKENISER_STATE_COMMENT;
+		hubbub_inputstream_advance(tokeniser->input);
 	}
 
 	return true;
@@ -1924,7 +1919,9 @@ bool hubbub_tokeniser_handle_comment_start_dash(hubbub_tokeniser *tokeniser)
 
 		pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
 
-		tokeniser->context.current_comment.data.off = pos;
+		if (tokeniser->context.current_comment.len == 0)
+			tokeniser->context.current_comment.data.off = pos;
+
 		tokeniser->context.current_comment.len += len;
 		hubbub_inputstream_advance(tokeniser->input);
 
@@ -1995,21 +1992,24 @@ bool hubbub_tokeniser_handle_comment_end_dash(hubbub_tokeniser *tokeniser)
 		uint32_t pos;
 		size_t len;
 
+		/* In order to get to this state, the previous character must
+		 * be '-'.  This means we can safely rewind and add to the
+		 * comment buffer. */
+
+		hubbub_inputstream_rewind(tokeniser->input, 1);
+
 		pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
 
-		if (tokeniser->context.current_comment.len == 0) {
+		if (tokeniser->context.current_comment.len == 0)
 			tokeniser->context.current_comment.data.off = pos;
-		} else {
-			/* Need to do this to get length of '-' */
-			len += pos -
-				tokeniser->context.current_comment.data.off;
-		}
+		tokeniser->context.current_comment.len += len;
+		hubbub_inputstream_advance(tokeniser->input);
 
-		tokeniser->context.current_comment.len = len;
+		pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+		tokeniser->context.current_comment.len += len;
+		hubbub_inputstream_advance(tokeniser->input);
 
 		tokeniser->state = HUBBUB_TOKENISER_STATE_COMMENT;
-
-		hubbub_inputstream_advance(tokeniser->input);
 	}
 
 	return true;
@@ -2066,21 +2066,29 @@ bool hubbub_tokeniser_handle_comment_end(hubbub_tokeniser *tokeniser)
 		uint32_t pos;
 		size_t len;
 
+		/* In order to have got here, the previous two characters
+		 * must be '--', so rewind two characters */
+		hubbub_inputstream_rewind(tokeniser->input, 2);
+
+		/* Add first '-' */
 		pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
 
-		if (tokeniser->context.current_comment.len == 0) {
+		if (tokeniser->context.current_comment.len == 0)
 			tokeniser->context.current_comment.data.off = pos;
-		} else {
-			/* Need to do this to get length of '--' */
-			len += pos -
-				tokeniser->context.current_comment.data.off;
-		}
-
-		tokeniser->context.current_comment.len = len;
+		tokeniser->context.current_comment.len += len;
+		hubbub_inputstream_advance(tokeniser->input);
 
-		tokeniser->state = HUBBUB_TOKENISER_STATE_COMMENT;
+		/* Add second '-' */
+		pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+		tokeniser->context.current_comment.len += len;
+		hubbub_inputstream_advance(tokeniser->input);
 
+		/* Add input character */
+		pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+		tokeniser->context.current_comment.len += len;
 		hubbub_inputstream_advance(tokeniser->input);
+
+		tokeniser->state = HUBBUB_TOKENISER_STATE_COMMENT;
 	}
 
 	return true;
-- 
cgit v1.2.3