6 files changed, 41 insertions, 27 deletions
diff --git a/docs/Treebuilder b/docs/Treebuilder
index 9c7dce4..d8924f5 100644
--- a/docs/Treebuilder
+++ b/docs/Treebuilder
@@ -187,4 +187,16 @@ Callback behaviour
 
   This function must set the quirks mode flag of the document to "mode".
 
-
+  | int hubbub_tree_encoding_change(void *ctx,
+  |                                 const char *name);
+  
+  This function is called when a meta tag which specifies a charset is seen
+  in the treebuilder. [1]  The client is responsible for checking if the
+  encoding the document is being processed as should actually be changed, and
+  if it should, this function should return 1.  In this case, the parser
+  instance will return the error code HUBBUB_ENCODINGCHANGE when it returns
+  from parsing the chunk that triggered the encoding change.  The parser
+  instance should then be destroyed and a new one created with that encoding
+  specified.
+  
+  [1] http://www.whatwg.org/specs/web-apps/current-work/#in-head
diff --git a/include/hubbub/types.h b/include/hubbub/types.h
index 42d1460..07ef2ab 100644
--- a/include/hubbub/types.h
+++ b/include/hubbub/types.h
@@ -15,11 +15,10 @@
  * A client-dictated charset will override all others.
  * A document-specified charset will override autodetection or the default */
 typedef enum hubbub_charset_source {
-	HUBBUB_CHARSET_UNKNOWN          = 0,	/**< Unknown */
-	HUBBUB_CHARSET_DEFAULT          = 1,	/**< Default setting */
-	HUBBUB_CHARSET_DETECTED         = 2,	/**< Autodetected */
-	HUBBUB_CHARSET_DOCUMENT         = 3,	/**< Defined in document */
-	HUBBUB_CHARSET_DICTATED         = 4,	/**< Dictated by client */
+	HUBBUB_CHARSET_UNKNOWN		= 0,	/**< Unknown */
+	HUBBUB_CHARSET_TENTATIVE	= 1,	/**< Charset may be changed
+						 * with further data */
+	HUBBUB_CHARSET_CONFIDENT	= 2,	/**< Charset definite */
 } hubbub_charset_source;
 
 /**
diff --git a/src/charset/detect.c b/src/charset/detect.c
index 7d3459f..f3f2e4f 100644
--- a/src/charset/detect.c
+++ b/src/charset/detect.c
@@ -49,19 +49,18 @@ parserutils_error hubbub_charset_extract(const uint8_t *data, size_t len,
 	if (data == NULL || mibenum == NULL || source == NULL)
 		return PARSERUTILS_BADPARM;
 
-	/* 1 */
+	/* 1. */
 
 	/* If the source is dictated, there's nothing for us to do */
-	if (*source == HUBBUB_CHARSET_DICTATED) {
-		/* confidence = certain; */
+	if (*source == HUBBUB_CHARSET_CONFIDENT) {
 		return PARSERUTILS_OK;
 	}
 
-	/* 2 */
+	/* 2. */
 
 	/** \todo We probably want to wait for ~512 bytes of data / 500ms here */
 
-	/* 3 */
+	/* 3. */
 
 	/* We need at least 3 bytes of data */
 	if (len < 3)
@@ -71,13 +70,12 @@ parserutils_error hubbub_charset_extract(const uint8_t *data, size_t len,
 	charset = hubbub_charset_read_bom(data, len);
 	if (charset != 0) {
 		*mibenum = charset;
-		*source = HUBBUB_CHARSET_DOCUMENT;
-		/* confidence = certain; */
+		*source = HUBBUB_CHARSET_CONFIDENT;
 
 		return PARSERUTILS_OK;
 	}
 
-	/* 4 */
+	/* 4. */
 
 	/* No BOM was found, so we must look for a meta charset within
 	 * the document itself. */
@@ -111,8 +109,7 @@ parserutils_error hubbub_charset_extract(const uint8_t *data, size_t len,
 					"UTF-32BE", SLEN("UTF-32BE"))) {
 
 			*mibenum = charset;
-			*source = HUBBUB_CHARSET_DOCUMENT;
-			/* confidence = tentative; */
+			*source = HUBBUB_CHARSET_TENTATIVE;
 
 			return PARSERUTILS_OK;
 		}
@@ -126,7 +123,7 @@ parserutils_error hubbub_charset_extract(const uint8_t *data, size_t len,
 	/* We failed to autodetect a charset, so use the default fallback */
 default_encoding:
 
-	/* 7 */
+	/* 7. */
 
 	charset = parserutils_charset_mibenum_from_name("Windows-1252",
 			SLEN("Windows-1252"));
@@ -135,7 +132,7 @@ default_encoding:
 				SLEN("ISO-8859-1"));
 
 	*mibenum = charset;
-	*source = HUBBUB_CHARSET_DEFAULT;
+	*source = HUBBUB_CHARSET_TENTATIVE;
 
 	return PARSERUTILS_OK;
 }
diff --git a/src/parser.c b/src/parser.c
index 7f187a6..e43a309 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -46,7 +46,7 @@ hubbub_parser *hubbub_parser_create(const char *enc,
 		return NULL;
 
 	parser->stream = parserutils_inputstream_create(enc,
-		enc != NULL ? HUBBUB_CHARSET_DICTATED : HUBBUB_CHARSET_UNKNOWN,
+		enc != NULL ? HUBBUB_CHARSET_CONFIDENT : HUBBUB_CHARSET_UNKNOWN,
 		hubbub_charset_extract, alloc, pw);
 	if (parser->stream == NULL) {
 		alloc(parser, 0, pw);
@@ -105,7 +105,7 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser,
 		hubbub_parser_opttype type,
 		hubbub_parser_optparams *params)
 {
-	hubbub_error result = HUBBUB_OK;;
+	hubbub_error result = HUBBUB_OK;
 
 	if (parser == NULL || params == NULL)
 		return HUBBUB_BADPARM;
diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index 26fc1fb..0bf72ef 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -558,7 +558,7 @@ hubbub_error hubbub_tokeniser_run(hubbub_tokeniser *tokeniser)
 		}
 	}
 
-	return HUBBUB_OK;
+	return (cont == HUBBUB_OOD) ? HUBBUB_OK : cont;
 }
 
 
diff --git a/src/treebuilder/in_head.c b/src/treebuilder/in_head.c
index 88fcff5..897610b 100644
--- a/src/treebuilder/in_head.c
+++ b/src/treebuilder/in_head.c
@@ -64,20 +64,26 @@ static hubbub_error process_meta_in_head(hubbub_treebuilder *treebuilder,
 		if (treebuilder->tree_handler->encoding_change) {
 			const char *name = parserutils_charset_mibenum_to_name(
 					charset_enc);
-			treebuilder->tree_handler->encoding_change(
+
+			/* 1 indicates the encoding should actually change */
+			if (treebuilder->tree_handler->encoding_change(
 					treebuilder->tree_handler->ctx,
-					name);
+					name) == 1) {
+				return HUBBUB_ENCODINGCHANGE;
+			}
 		}
-		return HUBBUB_ENCODINGCHANGE;
 	} else if (content_type_enc != 0) {
 		if (treebuilder->tree_handler->encoding_change) {
 			const char *name = parserutils_charset_mibenum_to_name(
 					content_type_enc);
-			treebuilder->tree_handler->encoding_change(
+
+			/* 1 indicates the encoding should actually change */
+			if (treebuilder->tree_handler->encoding_change(
 					treebuilder->tree_handler->ctx,
-					name);
+					name) == 1) {
+				return HUBBUB_ENCODINGCHANGE;
+			}
 		}
-		return HUBBUB_ENCODINGCHANGE;
 	}
 
 	return HUBBUB_OK;