From 137238e40c47e8e828d85d6d1cc0ffb9a6b3fe38 Mon Sep 17 00:00:00 2001
From: Andrew Sidwell <andy@entai.co.uk>
Date: Sun, 3 Aug 2008 15:47:08 +0000
Subject: Remove tokeniser->to_buf, SWITCH(), and COLLECT_CHAR(), none of which
 are now necessary.  Should should provide a small speedup.

svn path=/trunk/hubbub/; revision=4873
---
 src/tokeniser/tokeniser.c | 80 +++++++++++++++--------------------------------
 1 file changed, 26 insertions(+), 54 deletions(-)

(limited to 'src')

diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index 3a461d1..6a9396d 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -108,8 +108,6 @@ typedef struct hubbub_tokeniser_context {
 						 * emitted */
 	size_t last_start_tag_len;
 
-	bool to_buf;
-
 	struct {
 		uint32_t count;
 		bool match;
@@ -574,22 +572,13 @@ hubbub_error hubbub_tokeniser_run(hubbub_tokeniser *tokeniser)
 				cptr, (lengt)); \
 		(str).ptr = data; \
 		(str).len = (lengt); \
-		tokeniser->context.to_buf = true; \
 	} while (0)
 
 #define COLLECT(str, cptr, length) \
 	do { \
 		assert(str.len != 0); \
-		if (tokeniser->context.to_buf == true) { \
-			parserutils_buffer_append(tokeniser->buffer, \
-					(uint8_t *) cptr, (length)); \
-		} \
-		(str).len += (length); \
-	} while (0)
-
-#define COLLECT_NOBUF(str, length) \
-	do { \
-		assert(str.len != 0); \
+		parserutils_buffer_append(tokeniser->buffer, \
+				(uint8_t *) cptr, (length)); \
 		(str).len += (length); \
 	} while (0)
 
@@ -602,35 +591,19 @@ hubbub_error hubbub_tokeniser_run(hubbub_tokeniser *tokeniser)
 		} \
 	} while (0)
 
-#define COLLECT_MS_NOBUF(str, cptr, length) \
+#define COLLECT_NOBUF(str, length) \
 	do { \
+		assert(str.len != 0); \
 		(str).len += (length); \
 	} while (0)
 
-#define FINISH(str) \
-	tokeniser->context.to_buf = false
-
-#define SWITCH(str) \
-	do { \
-		uint8_t *data = tokeniser->buffer->data + \
-				tokeniser->buffer->length; \
-		parserutils_buffer_append( \
-				tokeniser->buffer, \
-				(str).ptr, (str).len); \
-		(str).ptr = data; \
-		tokeniser->context.to_buf = true; \
-	} while (0)
-
-#define COLLECT_CHAR(str, cptr, length) \
+#define COLLECT_MS_NOBUF(str, cptr, length) \
 	do { \
-		assert(str.len != 0); \
-		if (tokeniser->context.to_buf == false) { \
-			SWITCH(str); \
-		} \
-		parserutils_buffer_append(tokeniser->buffer, cptr, (length)); \
-		str.len += (length); \
+		(str).len += (length); \
 	} while (0)
 
+#define FINISH(str) \
+	/* no-op */
 
 
 /**
@@ -1211,13 +1184,13 @@ bool hubbub_tokeniser_handle_tag_name(hubbub_tokeniser *tokeniser)
 		emit_current_tag(tokeniser);
 		tokeniser->state = STATE_DATA;
 	} else if (c == '\0') {
-		COLLECT_CHAR(ctag->name, u_fffd, sizeof(u_fffd));
+		COLLECT(ctag->name, u_fffd, sizeof(u_fffd));
 	} else if (c == '/') {
 		FINISH(ctag->name);
 		tokeniser->state = STATE_SELF_CLOSING_START_TAG;
 	} else if ('A' <= c && c <= 'Z') {
 		uint8_t lc = (c + 0x20);
-		COLLECT_CHAR(ctag->name, &lc, len);
+		COLLECT(ctag->name, &lc, len);
 	} else {
 		COLLECT(ctag->name, cptr, len);
 	}
@@ -1328,11 +1301,11 @@ bool hubbub_tokeniser_handle_attribute_name(hubbub_tokeniser *tokeniser)
 		FINISH(ctag->attributes[ctag->n_attributes - 1].name);
 		tokeniser->state = STATE_SELF_CLOSING_START_TAG;
 	} else if (c == '\0') {
-		COLLECT_CHAR(ctag->attributes[ctag->n_attributes - 1].name,
+		COLLECT(ctag->attributes[ctag->n_attributes - 1].name,
 				u_fffd, sizeof(u_fffd));
 	} else if ('A' <= c && c <= 'Z') {
 		uint8_t lc = (c + 0x20);
-		COLLECT_CHAR(ctag->attributes[ctag->n_attributes - 1].name,
+		COLLECT(ctag->attributes[ctag->n_attributes - 1].name,
 				&lc, len);
 	} else {
 		COLLECT(ctag->attributes[ctag->n_attributes - 1].name,
@@ -1492,7 +1465,7 @@ bool hubbub_tokeniser_handle_attribute_value_dq(hubbub_tokeniser *tokeniser)
 		/* Don't eat the '&'; it'll be handled by entity consumption */
 	} else if (c == '\0') {
 		COLLECT_NOBUF(tokeniser->context.chars, len);
-		COLLECT_CHAR(ctag->attributes[ctag->n_attributes - 1].value,
+		COLLECT(ctag->attributes[ctag->n_attributes - 1].value,
 				u_fffd, sizeof(u_fffd));
 	} else if (c == '\r') {
 		cptr = parserutils_inputstream_peek(
@@ -1504,7 +1477,7 @@ bool hubbub_tokeniser_handle_attribute_value_dq(hubbub_tokeniser *tokeniser)
 			return false;
 		} else if (cptr == PARSERUTILS_INPUTSTREAM_EOF ||
 				CHAR(cptr) != '\n') {
-			COLLECT_CHAR(ctag->attributes[
+			COLLECT(ctag->attributes[
 					ctag->n_attributes - 1].value,
 					&lf, sizeof(lf));
 		}
@@ -1550,7 +1523,7 @@ bool hubbub_tokeniser_handle_attribute_value_sq(hubbub_tokeniser *tokeniser)
 		/* Don't eat the '&'; it'll be handled by entity consumption */
 	} else if (c == '\0') {
 		COLLECT_NOBUF(tokeniser->context.chars, len);
-		COLLECT_CHAR(ctag->attributes[ctag->n_attributes - 1].value,
+		COLLECT(ctag->attributes[ctag->n_attributes - 1].value,
 				u_fffd, sizeof(u_fffd));
 	} else if (c == '\r') {
 		cptr = parserutils_inputstream_peek(
@@ -1562,7 +1535,7 @@ bool hubbub_tokeniser_handle_attribute_value_sq(hubbub_tokeniser *tokeniser)
 			return false;
 		} else if (cptr == PARSERUTILS_INPUTSTREAM_EOF ||
 				CHAR(cptr) != '\n') {
-			COLLECT_CHAR(ctag->attributes[
+			COLLECT(ctag->attributes[
 					ctag->n_attributes - 1].value,
 					&lf, sizeof(lf));
 		}
@@ -1613,7 +1586,7 @@ bool hubbub_tokeniser_handle_attribute_value_uq(hubbub_tokeniser *tokeniser)
 		tokeniser->state = STATE_DATA;
 	} else if (c == '\0') {
 		COLLECT_NOBUF(tokeniser->context.chars, len);
-		COLLECT_CHAR(ctag->attributes[ctag->n_attributes - 1].value,
+		COLLECT(ctag->attributes[ctag->n_attributes - 1].value,
 				u_fffd, sizeof(u_fffd));
 	} else {
 		if (c == '"' || c == '\'' || c == '=') {
@@ -1657,7 +1630,6 @@ bool hubbub_tokeniser_handle_character_reference_in_attribute_value(
 				START_BUF(attr->value,
 						utf8, sizeof(utf8) - len);
 			} else {
-				SWITCH(attr->value);
 				COLLECT(attr->value, utf8, sizeof(utf8) - len);
 			}
 		} else {
@@ -2109,7 +2081,7 @@ bool hubbub_tokeniser_handle_doctype_name(hubbub_tokeniser *tokeniser)
 		emit_current_doctype(tokeniser, false);
 		tokeniser->state = STATE_DATA;
 	} else if (c == '\0') {
-		COLLECT_CHAR(cdoc->name, u_fffd, sizeof(u_fffd));
+		COLLECT(cdoc->name, u_fffd, sizeof(u_fffd));
 	} else {
 		COLLECT(cdoc->name, cptr, len);
 	}
@@ -2263,7 +2235,7 @@ bool hubbub_tokeniser_handle_doctype_public_dq(hubbub_tokeniser *tokeniser)
 		if (cdoc->public_id.len == 0) {
 			START_BUF(cdoc->public_id, u_fffd, sizeof(u_fffd));
 		} else {
-			COLLECT_CHAR(cdoc->public_id, u_fffd, sizeof(u_fffd));
+			COLLECT(cdoc->public_id, u_fffd, sizeof(u_fffd));
 		}
 	} else if (c == '\r') {
 		cptr = parserutils_inputstream_peek(
@@ -2275,7 +2247,7 @@ bool hubbub_tokeniser_handle_doctype_public_dq(hubbub_tokeniser *tokeniser)
 			return false;
 		} else if (cptr == PARSERUTILS_INPUTSTREAM_EOF ||
 				CHAR(cptr) != '\n') {
-			COLLECT_CHAR(cdoc->public_id, &lf, sizeof(lf));
+			COLLECT(cdoc->public_id, &lf, sizeof(lf));
 		}
 	} else {
 		COLLECT_MS(cdoc->public_id, cptr, len);
@@ -2315,7 +2287,7 @@ bool hubbub_tokeniser_handle_doctype_public_sq(hubbub_tokeniser *tokeniser)
 			START_BUF(cdoc->public_id,
 					u_fffd, sizeof(u_fffd));
 		} else {
-			COLLECT_CHAR(cdoc->public_id,
+			COLLECT(cdoc->public_id,
 					u_fffd, sizeof(u_fffd));
 		}
 	} else if (c == '\r') {
@@ -2328,7 +2300,7 @@ bool hubbub_tokeniser_handle_doctype_public_sq(hubbub_tokeniser *tokeniser)
 			return false;
 		} else if (cptr == PARSERUTILS_INPUTSTREAM_EOF ||
 				CHAR(cptr) != '\n') {
-			COLLECT_CHAR(cdoc->public_id, &lf, sizeof(lf));
+			COLLECT(cdoc->public_id, &lf, sizeof(lf));
 		}
 	} else {
 		COLLECT_MS(cdoc->public_id, cptr, len);
@@ -2492,7 +2464,7 @@ bool hubbub_tokeniser_handle_doctype_system_dq(hubbub_tokeniser *tokeniser)
 		if (cdoc->public_id.len == 0) {
 			START_BUF(cdoc->system_id, u_fffd, sizeof(u_fffd));
 		} else {
-			COLLECT_CHAR(cdoc->system_id,
+			COLLECT(cdoc->system_id,
 					u_fffd, sizeof(u_fffd));
 		}
 	} else if (c == '\r') {
@@ -2505,7 +2477,7 @@ bool hubbub_tokeniser_handle_doctype_system_dq(hubbub_tokeniser *tokeniser)
 			return false;
 		} else if (cptr == PARSERUTILS_INPUTSTREAM_EOF ||
 				CHAR(cptr) != '\n') {
-			COLLECT_CHAR(cdoc->system_id, &lf, sizeof(lf));
+			COLLECT(cdoc->system_id, &lf, sizeof(lf));
 		}
 	} else {
 		COLLECT_MS(cdoc->system_id, cptr, len);
@@ -2544,7 +2516,7 @@ bool hubbub_tokeniser_handle_doctype_system_sq(hubbub_tokeniser *tokeniser)
 		if (cdoc->public_id.len == 0) {
 			START_BUF(cdoc->system_id, u_fffd, sizeof(u_fffd));
 		} else {
-			COLLECT_CHAR(cdoc->system_id,
+			COLLECT(cdoc->system_id,
 					u_fffd, sizeof(u_fffd));
 		}
 	} else if (c == '\r') {
@@ -2557,7 +2529,7 @@ bool hubbub_tokeniser_handle_doctype_system_sq(hubbub_tokeniser *tokeniser)
 			return false;
 		} else if (cptr == PARSERUTILS_INPUTSTREAM_EOF ||
 				CHAR(cptr) != '\n') {
-			COLLECT_CHAR(cdoc->system_id, &lf, sizeof(lf));
+			COLLECT(cdoc->system_id, &lf, sizeof(lf));
 		}
 	} else {
 		COLLECT_MS(cdoc->system_id, cptr, len);
-- 
cgit v1.2.3