From 2ff7b51e2c437f4aed94211b3c1de33da8ad5e87 Mon Sep 17 00:00:00 2001
From: Andrew Sidwell <andy@entai.co.uk>
Date: Mon, 16 Jun 2008 06:06:48 +0000
Subject:  - Move away from using inputstream_push_back() and instead use
 _rewind() for both clarity and efficiency.  - Fix a bug where the
 CDATA-matching code was looking at the doctype-matching count.

svn path=/trunk/hubbub/; revision=4359
---
 src/tokeniser/tokeniser.c | 67 ++++++++++++++++++-----------------------------
 1 file changed, 25 insertions(+), 42 deletions(-)

diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index d71a80d..3a0a0d6 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -1817,7 +1817,8 @@ bool hubbub_tokeniser_handle_match_comment(hubbub_tokeniser *tokeniser)
 		tokeniser->state = HUBBUB_TOKENISER_STATE_COMMENT_START;
 		hubbub_inputstream_advance(tokeniser->input);
 	} else {
-		hubbub_inputstream_push_back(tokeniser->input, '-');
+		/* Rewind to the first '-' */
+		hubbub_inputstream_rewind(tokeniser->input, 1);
 
 		tokeniser->context.current_comment.data.off = 0;
 		tokeniser->context.current_comment.len = 0;
@@ -2122,14 +2123,9 @@ bool hubbub_tokeniser_handle_match_doctype(hubbub_tokeniser *tokeniser)
 		tokeniser->state = HUBBUB_TOKENISER_STATE_DOCTYPE;
 		hubbub_inputstream_advance(tokeniser->input);
 	} else {
-		switch (tokeniser->context.match_doctype.count) {
-		case 6: hubbub_inputstream_push_back(tokeniser->input, 'P');
-		case 5: hubbub_inputstream_push_back(tokeniser->input, 'Y');
-		case 4: hubbub_inputstream_push_back(tokeniser->input, 'T');
-		case 3: hubbub_inputstream_push_back(tokeniser->input, 'C');
-		case 2: hubbub_inputstream_push_back(tokeniser->input, 'O');
-		case 1: hubbub_inputstream_push_back(tokeniser->input, 'D');
-		}
+		/* Rewind as many characters as have been matched */
+		hubbub_inputstream_rewind(tokeniser->input,
+			tokeniser->context.match_doctype.count);
 
 		tokeniser->context.current_comment.data.off = 0;
 		tokeniser->context.current_comment.len = 0;
@@ -2340,13 +2336,9 @@ bool hubbub_tokeniser_handle_match_public(hubbub_tokeniser *tokeniser)
 		tokeniser->state = HUBBUB_TOKENISER_STATE_BEFORE_DOCTYPE_PUBLIC;
 		hubbub_inputstream_advance(tokeniser->input);
 	} else {
-		switch (tokeniser->context.match_doctype.count) {
-		case 5: hubbub_inputstream_push_back(tokeniser->input, 'I');
-		case 4: hubbub_inputstream_push_back(tokeniser->input, 'L');
-		case 3: hubbub_inputstream_push_back(tokeniser->input, 'B');
-		case 2: hubbub_inputstream_push_back(tokeniser->input, 'U');
-		case 1: hubbub_inputstream_push_back(tokeniser->input, 'P');
-		}
+		/* Rewind as many characters as have been matched */
+		hubbub_inputstream_rewind(tokeniser->input,
+				tokeniser->context.match_doctype.count);
 
 		tokeniser->state = HUBBUB_TOKENISER_STATE_BOGUS_DOCTYPE;
 	}
@@ -2587,13 +2579,9 @@ bool hubbub_tokeniser_handle_match_system(hubbub_tokeniser *tokeniser)
 		tokeniser->state = HUBBUB_TOKENISER_STATE_BEFORE_DOCTYPE_SYSTEM;
 		hubbub_inputstream_advance(tokeniser->input);
 	} else {
-		switch (tokeniser->context.match_doctype.count) {
-		case 5: hubbub_inputstream_push_back(tokeniser->input, 'E');
-		case 4: hubbub_inputstream_push_back(tokeniser->input, 'T');
-		case 3: hubbub_inputstream_push_back(tokeniser->input, 'S');
-		case 2: hubbub_inputstream_push_back(tokeniser->input, 'Y');
-		case 1: hubbub_inputstream_push_back(tokeniser->input, 'S');
-		}
+		/* Rewind as many characters as have been matched */
+		hubbub_inputstream_rewind(tokeniser->input,
+				tokeniser->context.match_doctype.count);
 
 		tokeniser->state = HUBBUB_TOKENISER_STATE_BOGUS_DOCTYPE;
 	}
@@ -2840,36 +2828,31 @@ bool hubbub_tokeniser_handle_match_cdata(hubbub_tokeniser *tokeniser)
 	if (c == HUBBUB_INPUTSTREAM_OOD)
 		return false;
 
-	if (tokeniser->context.match_doctype.count == 1 && c == 'C') {
-		tokeniser->context.match_doctype.count++;
+	if (tokeniser->context.match_cdata.count == 1 && c == 'C') {
+		tokeniser->context.match_cdata.count++;
 		hubbub_inputstream_advance(tokeniser->input);
-	} else if (tokeniser->context.match_doctype.count == 2 && c == 'D') {
-		tokeniser->context.match_doctype.count++;
+	} else if (tokeniser->context.match_cdata.count == 2 && c == 'D') {
+		tokeniser->context.match_cdata.count++;
 		hubbub_inputstream_advance(tokeniser->input);
-	} else if (tokeniser->context.match_doctype.count == 3 && c == 'A') {
-		tokeniser->context.match_doctype.count++;
+	} else if (tokeniser->context.match_cdata.count == 3 && c == 'A') {
+		tokeniser->context.match_cdata.count++;
 		hubbub_inputstream_advance(tokeniser->input);
-	} else if (tokeniser->context.match_doctype.count == 4 && c == 'T') {
-		tokeniser->context.match_doctype.count++;
+	} else if (tokeniser->context.match_cdata.count == 4 && c == 'T') {
+		tokeniser->context.match_cdata.count++;
 		hubbub_inputstream_advance(tokeniser->input);
-	} else if (tokeniser->context.match_doctype.count == 5 && c == 'A') {
-		tokeniser->context.match_doctype.count++;
+	} else if (tokeniser->context.match_cdata.count == 5 && c == 'A') {
+		tokeniser->context.match_cdata.count++;
 		hubbub_inputstream_advance(tokeniser->input);
-	} else if (tokeniser->context.match_doctype.count == 6 && c == '[') {
+	} else if (tokeniser->context.match_cdata.count == 6 && c == '[') {
 		tokeniser->context.current_chars.data.off = 0;
 		tokeniser->context.current_chars.len = 0;
 
 		tokeniser->state = HUBBUB_TOKENISER_STATE_CDATA_BLOCK;
 		hubbub_inputstream_advance(tokeniser->input);
 	} else {
-		switch (tokeniser->context.match_doctype.count) {
-		case 6: hubbub_inputstream_push_back(tokeniser->input, 'A');
-		case 5: hubbub_inputstream_push_back(tokeniser->input, 'T');
-		case 4: hubbub_inputstream_push_back(tokeniser->input, 'A');
-		case 3: hubbub_inputstream_push_back(tokeniser->input, 'D');
-		case 2: hubbub_inputstream_push_back(tokeniser->input, 'C');
-		case 1: hubbub_inputstream_push_back(tokeniser->input, '[');
-		}
+		/* Rewind as many characters as we matched */
+		hubbub_inputstream_rewind(tokeniser->input,
+				tokeniser->context.match_cdata.count);
 
 		tokeniser->context.current_comment.data.off = 0;
 		tokeniser->context.current_comment.len = 0;
-- 
cgit v1.2.3