From 6d3950b141b1cdb1f29e929c8c8156562c7ccbc7 Mon Sep 17 00:00:00 2001
From: John Mark Bell <jmb@netsurf-browser.org>
Date: Thu, 31 Jul 2008 14:43:32 +0000
Subject: Merged revisions 4631-4838 via svnmerge from
 svn://source.netsurf-browser.org/branches/takkaria/hubbub-parserutils

........
  r4631 | takkaria | 2008-07-13 12:54:30 +0100 (Sun, 13 Jul 2008) | 2 lines

  Initial hatchet job moving to libparserutils (search and replace and a bit of cleaning up).  This doesn't compile.
........
  r4632 | takkaria | 2008-07-13 15:28:52 +0100 (Sun, 13 Jul 2008) | 2 lines

  libparserutilize everything up to the "before attribute name" state.  (Not compiling)
........
  r4633 | takkaria | 2008-07-13 15:32:14 +0100 (Sun, 13 Jul 2008) | 2 lines

  Replace all uses of "current_{comment|chars}" with just "chars".
........
  r4634 | takkaria | 2008-07-13 16:12:06 +0100 (Sun, 13 Jul 2008) | 2 lines

  Fix lots of compile errors, lpuise "before attribute name" state.
........
  r4636 | takkaria | 2008-07-13 17:23:17 +0100 (Sun, 13 Jul 2008) | 2 lines

  Finish lpuising the tag states, apart from character references.
........
  r4637 | takkaria | 2008-07-13 19:58:52 +0100 (Sun, 13 Jul 2008) | 2 lines

  lpuise the comment states.
........
  r4638 | takkaria | 2008-07-13 20:04:31 +0100 (Sun, 13 Jul 2008) | 2 lines

  Switch to setting hubbub_string::len to 0 instead of hubbub_string::ptr to NULL to indicate an empty buffer, as it was previously.
........
  r4639 | takkaria | 2008-07-13 21:02:11 +0100 (Sun, 13 Jul 2008) | 2 lines

  "lpu up" about half of the DOCTYPE handling stages.
........
  r4640 | takkaria | 2008-07-13 21:23:00 +0100 (Sun, 13 Jul 2008) | 2 lines

  Finish off LPUing the doctype modes.
........
  r4641 | takkaria | 2008-07-13 21:37:33 +0100 (Sun, 13 Jul 2008) | 2 lines

  The tokeniser uses lpu apart from the entity matcher, now.
........
  r4643 | takkaria | 2008-07-14 01:20:36 +0100 (Mon, 14 Jul 2008) | 2 lines

  Fix up the character reference matching stuff--still not properly dealt with, but compiles futher.
........
  r4644 | takkaria | 2008-07-14 01:24:49 +0100 (Mon, 14 Jul 2008) | 2 lines

  Get the tokeniser compiling in its LPU'd form.
........
  r4645 | takkaria | 2008-07-14 01:26:34 +0100 (Mon, 14 Jul 2008) | 2 lines

  Remember to advance the stream position after emitting tokens.
........
  r4646 | takkaria | 2008-07-14 01:34:36 +0100 (Mon, 14 Jul 2008) | 2 lines

  Nuke the src/input directory and start work on the treebuilder.
........
  r4647 | takkaria | 2008-07-14 01:56:27 +0100 (Mon, 14 Jul 2008) | 2 lines

  Get hubbub building in its LPU'd form.
........
  r4648 | takkaria | 2008-07-14 02:41:03 +0100 (Mon, 14 Jul 2008) | 2 lines

  Get the tokeniser2 testrunner working.
........
  r4649 | takkaria | 2008-07-14 02:48:55 +0100 (Mon, 14 Jul 2008) | 2 lines

  Fix test LDFLAGS so things link properly.
........
  r4650 | takkaria | 2008-07-14 16:25:51 +0100 (Mon, 14 Jul 2008) | 2 lines

  Get testcases compiling, remove ones now covered by libparserutils.
........
  r4651 | takkaria | 2008-07-14 16:37:09 +0100 (Mon, 14 Jul 2008) | 2 lines

  Remove more tests covered by libpu.
........
  r4652 | takkaria | 2008-07-14 17:53:18 +0100 (Mon, 14 Jul 2008) | 2 lines

  Fix up the tokeniser a bit.
........
  r4653 | takkaria | 2008-07-14 19:02:15 +0100 (Mon, 14 Jul 2008) | 3 lines

   - Remove the buffer_handler stuff from hubbub
   - Add the basics of a buffer for attribute values and text.
........
  r4654 | takkaria | 2008-07-14 20:00:45 +0100 (Mon, 14 Jul 2008) | 2 lines

  Get character references working in attribute values, start trying to make them work in character tokens.
........
  r4656 | takkaria | 2008-07-14 23:28:52 +0100 (Mon, 14 Jul 2008) | 2 lines

  Get entities working a bit better.
........
  r4657 | takkaria | 2008-07-14 23:37:16 +0100 (Mon, 14 Jul 2008) | 2 lines

  Get entities working properly.  (!)
........
  r4658 | takkaria | 2008-07-14 23:56:10 +0100 (Mon, 14 Jul 2008) | 2 lines

  Make doctypes work a bit better.
........
  r4659 | takkaria | 2008-07-15 00:18:49 +0100 (Tue, 15 Jul 2008) | 2 lines

  Get DOCTYPEs working.
........
  r4660 | takkaria | 2008-07-15 00:26:36 +0100 (Tue, 15 Jul 2008) | 2 lines

  Fix CDATA sections.
........
  r4661 | takkaria | 2008-07-15 01:01:16 +0100 (Tue, 15 Jul 2008) | 2 lines

  Get comments working again.
........
  r4662 | takkaria | 2008-07-15 01:14:19 +0100 (Tue, 15 Jul 2008) | 2 lines

  Fix EOF in "after attribute name" state.
........
  r4664 | takkaria | 2008-07-15 01:30:27 +0100 (Tue, 15 Jul 2008) | 2 lines

  Put the tests in better order, remove one now superceded with libpu.
........
  r4665 | takkaria | 2008-07-15 01:46:29 +0100 (Tue, 15 Jul 2008) | 2 lines

  Remove a lot of now-redunant clearings of the current stream offset.
........
  r4667 | jmb | 2008-07-15 11:56:54 +0100 (Tue, 15 Jul 2008) | 2 lines

  Completely purge charset stuff from hubbub. Parserutils handles this now.
........
  r4677 | takkaria | 2008-07-15 21:03:42 +0100 (Tue, 15 Jul 2008) | 2 lines

  Get more tests passing, handle NUL bytes in data state.
........
  r4694 | takkaria | 2008-07-18 17:55:44 +0100 (Fri, 18 Jul 2008) | 3 lines

   - Handle CRs correctly in some token states.
   - Handle NULs correctly in the CDATA state.
........
  r4706 | takkaria | 2008-07-19 14:58:48 +0100 (Sat, 19 Jul 2008) | 2 lines

  Improve the tokeniser2 output a bit.
........
  r4721 | takkaria | 2008-07-21 20:57:29 +0100 (Mon, 21 Jul 2008) | 2 lines

  Get a better framework in place to allow switching to using a buffer mid-collect.  This fails a couple of testcases and doesn't implement proper CR or NUL support yet.
........
  r4725 | takkaria | 2008-07-23 17:20:07 +0100 (Wed, 23 Jul 2008) | 2 lines

  Make comment tokens in tokeniser2 display both expected and actual output.
........
  r4726 | takkaria | 2008-07-23 19:10:23 +0100 (Wed, 23 Jul 2008) | 4 lines

   - Add FINISH() macro which stops using buffered character collection.
   - Make the encoding U+FFFD in UTF-8 a global varabile, for sanity
   - Make the bogus comment state deal with NULs correctly.
........
  r4730 | takkaria | 2008-07-24 00:35:16 +0100 (Thu, 24 Jul 2008) | 2 lines

  Try to get NUL bytes handled as the spec says.
........
  r4731 | takkaria | 2008-07-24 00:40:59 +0100 (Thu, 24 Jul 2008) | 2 lines

  Get CRs working in the data state.
........
  r4732 | takkaria | 2008-07-24 00:47:45 +0100 (Thu, 24 Jul 2008) | 2 lines

  Set force-quirks correctly when failing to match PUBLIC or SYSTEM in DOCTYPEs.
........
  r4773 | takkaria | 2008-07-28 15:34:41 +0100 (Mon, 28 Jul 2008) | 2 lines

  Fix up the tokeniser, finally.
........
  r4801 | takkaria | 2008-07-29 15:59:31 +0100 (Tue, 29 Jul 2008) | 2 lines

  Refactor macros a bit.
........
  r4802 | takkaria | 2008-07-29 16:04:17 +0100 (Tue, 29 Jul 2008) | 2 lines

  Do s/HUBBUB_TOKENISER_STATE_/STATE_/, for shorter line lengths.
........
  r4805 | takkaria | 2008-07-29 16:58:37 +0100 (Tue, 29 Jul 2008) | 4 lines

  Start cleaning up the hubbub tokeniser;
   - refactor to use new inline emit_character_token() and emit_current_tag() functions; makes code clearer
   - check EOF before using the CHAR() macro, so eventually it can be removed.
........
  r4806 | takkaria | 2008-07-29 17:45:36 +0100 (Tue, 29 Jul 2008) | 2 lines

  More cleanup like the previous commit.
........
  r4807 | takkaria | 2008-07-29 19:48:44 +0100 (Tue, 29 Jul 2008) | 2 lines

  Rewrite comment-handling code to be just the one function, whilst updating it to handle CRs and NULs properly.  (All comments now always use the buffer.)
........
  r4820 | takkaria | 2008-07-30 14:14:49 +0100 (Wed, 30 Jul 2008) | 2 lines

  Finish off the first sweep of cleaning up and refactoring the tokeniser.
........
  r4821 | takkaria | 2008-07-30 15:12:22 +0100 (Wed, 30 Jul 2008) | 2 lines

  Add copyright statement.
........
  r4822 | takkaria | 2008-07-30 17:23:01 +0100 (Wed, 30 Jul 2008) | 2 lines

  Apply changes made to tokeniser2 to tokeniser3.
........
  r4829 | takkaria | 2008-07-31 01:59:07 +0100 (Thu, 31 Jul 2008) | 4 lines

   - Make the tokeniser save everything into the buffer, at least for now.
   - Fix logic errors introduced in refactoring
   - Avoid emitting more tokens than we have to (e.g. instead of emitting "<>" and switching back to the data state, just switch back to the data state and let it take care of it)
........
  r4830 | takkaria | 2008-07-31 02:03:08 +0100 (Thu, 31 Jul 2008) | 2 lines

  Small treebuilder <isindex> fix.
........
  r4831 | takkaria | 2008-07-31 02:32:29 +0100 (Thu, 31 Jul 2008) | 2 lines

  Stop holding on to pointers to character data across treebuilder calls.
........
  r4832 | takkaria | 2008-07-31 02:45:09 +0100 (Thu, 31 Jul 2008) | 18 lines

  Merge revisions 4620-4831 from trunk hubbub to libinputstream hubbub, modulo one change to test/Makefile which makes the linker choke when linking tests.

  	------------------------------------------------------------------------
  	r4666 | jmb | 2008-07-15 11:52:13 +0100 (Tue, 15 Jul 2008) | 3 lines

  	Make tree2 perform reference counting.
  	Fix bits of the treebuilder to perform reference counting correctly in the face of *result not pointing to the same object as the node passed in to the treebuilder client callbacks.
  	------------------------------------------------------------------------
  	r4668 | jmb | 2008-07-15 12:37:30 +0100 (Tue, 15 Jul 2008) | 2 lines

  	Fully document treebuilder callbacks.
  	------------------------------------------------------------------------
  	r4675 | takkaria | 2008-07-15 21:01:03 +0100 (Tue, 15 Jul 2008) | 2 lines

  	Fix memory leak in tokeniser2.
  	------------------------------------------------------------------------
........
  r4834 | jmb | 2008-07-31 09:57:51 +0100 (Thu, 31 Jul 2008) | 2 lines

  Fix infinite loop in charset detector
........
  r4835 | jmb | 2008-07-31 13:01:24 +0100 (Thu, 31 Jul 2008) | 2 lines

  Actually store namespaces on formatting list. Otherwise we read uninitialised memory. Add some semblance of filling allocations with junk to myrealloc().
........
  r4836 | jmb | 2008-07-31 13:06:07 +0100 (Thu, 31 Jul 2008) | 2 lines

  Lose debug again
........
  r4837 | jmb | 2008-07-31 15:09:19 +0100 (Thu, 31 Jul 2008) | 2 lines

  Lose obsolete testdata (this is now part of lpu)
........

svn path=/trunk/hubbub/; revision=4839
---
 src/charset/codec.c | 188 ----------------------------------------------------
 1 file changed, 188 deletions(-)
 delete mode 100644 src/charset/codec.c

(limited to 'src/charset/codec.c')

diff --git a/src/charset/codec.c b/src/charset/codec.c
deleted file mode 100644
index 727d600..0000000
--- a/src/charset/codec.c
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * This file is part of Hubbub.
- * Licensed under the MIT License,
- *                http://www.opensource.org/licenses/mit-license.php
- * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
- */
-
-#include <string.h>
-
-#include "charset/aliases.h"
-
-#include "codec_impl.h"
-
-extern hubbub_charsethandler hubbub_iconv_codec_handler;
-extern hubbub_charsethandler hubbub_utf8_codec_handler;
-extern hubbub_charsethandler hubbub_utf16_codec_handler;
-
-static hubbub_charsethandler *handler_table[] = {
-	&hubbub_utf8_codec_handler,
-	&hubbub_utf16_codec_handler,
-	&hubbub_iconv_codec_handler,
-	NULL,
-};
-
-/**
- * Create a charset codec
- *
- * \param charset  Target charset
- * \param alloc    Memory (de)allocation function
- * \param pw       Pointer to client-specific private data (may be NULL)
- * \return Pointer to codec instance, or NULL on failure
- */
-hubbub_charsetcodec *hubbub_charsetcodec_create(const char *charset,
-		hubbub_alloc alloc, void *pw)
-{
-	hubbub_charsetcodec *codec;
-	hubbub_charsethandler **handler;
-	const hubbub_aliases_canon * canon;
-
-	if (charset == NULL || alloc == NULL)
-		return NULL;
-
-	/* Canonicalise charset name. */
-	canon = hubbub_alias_canonicalise(charset, strlen(charset));
-	if (canon == NULL)
-		return NULL;
-
-	/* Search for handler class */
-	for (handler = handler_table; *handler != NULL; handler++) {
-		if ((*handler)->handles_charset(canon->name))
-			break;
-	}
-
-	/* None found */
-	if ((*handler) == NULL)
-		return NULL;
-
-	/* Instantiate class */
-	codec = (*handler)->create(canon->name, alloc, pw);
-	if (codec == NULL)
-		return NULL;
-
-	/* and initialise it */
-	codec->mibenum = canon->mib_enum;
-
-	codec->filter = NULL;
-	codec->filter_pw = NULL;
-
-	codec->errormode = HUBBUB_CHARSETCODEC_ERROR_LOOSE;
-
-	codec->alloc = alloc;
-	codec->alloc_pw = pw;
-
-	return codec;
-}
-
-/**
- * Destroy a charset codec
- *
- * \param codec  The codec to destroy
- */
-void hubbub_charsetcodec_destroy(hubbub_charsetcodec *codec)
-{
-	if (codec == NULL)
-		return;
-
-	codec->handler.destroy(codec);
-
-	codec->alloc(codec, 0, codec->alloc_pw);
-}
-
-/**
- * Configure a charset codec
- *
- * \param codec   The codec to configure
- * \parem type    The codec option type to configure
- * \param params  Option-specific parameters
- * \return HUBBUB_OK on success, appropriate error otherwise
- */
-hubbub_error hubbub_charsetcodec_setopt(hubbub_charsetcodec *codec,
-		hubbub_charsetcodec_opttype type,
-		hubbub_charsetcodec_optparams *params)
-{
-	if (codec == NULL || params == NULL)
-		return HUBBUB_BADPARM;
-
-	switch (type) {
-	case HUBBUB_CHARSETCODEC_FILTER_FUNC:
-		codec->filter = params->filter_func.filter;
-		codec->filter_pw = params->filter_func.pw;
-		break;
-
-	case HUBBUB_CHARSETCODEC_ERROR_MODE:
-		codec->errormode = params->error_mode.mode;
-		break;
-	}
-
-	return HUBBUB_OK;
-}
-
-/**
- * Encode a chunk of UCS4 data into a codec's charset
- *
- * \param codec      The codec to use
- * \param source     Pointer to pointer to source data
- * \param sourcelen  Pointer to length (in bytes) of source data
- * \param dest       Pointer to pointer to output buffer
- * \param destlen    Pointer to length (in bytes) of output buffer
- * \return HUBBUB_OK on success, appropriate error otherwise.
- *
- * source, sourcelen, dest and destlen will be updated appropriately on exit
- */
-hubbub_error hubbub_charsetcodec_encode(hubbub_charsetcodec *codec,
-		const uint8_t **source, size_t *sourcelen,
-		uint8_t **dest, size_t *destlen)
-{
-	if (codec == NULL || source == NULL || *source == NULL ||
-			sourcelen == NULL || dest == NULL || *dest == NULL ||
-			destlen == NULL)
-		return HUBBUB_BADPARM;
-
-	return codec->handler.encode(codec, source, sourcelen, dest, destlen);
-}
-
-/**
- * Decode a chunk of data in a codec's charset into UCS4
- *
- * \param codec      The codec to use
- * \param source     Pointer to pointer to source data
- * \param sourcelen  Pointer to length (in bytes) of source data
- * \param dest       Pointer to pointer to output buffer
- * \param destlen    Pointer to length (in bytes) of output buffer
- * \return HUBBUB_OK on success, appropriate error otherwise.
- *
- * source, sourcelen, dest and destlen will be updated appropriately on exit
- *
- * Call this with a source length of 0 to flush any buffers.
- */
-hubbub_error hubbub_charsetcodec_decode(hubbub_charsetcodec *codec,
-		const uint8_t **source, size_t *sourcelen,
-		uint8_t **dest, size_t *destlen)
-{
-	if (codec == NULL || source == NULL || *source == NULL ||
-			sourcelen == NULL || dest == NULL || *dest == NULL ||
-			destlen == NULL)
-		return HUBBUB_BADPARM;
-
-	return codec->handler.decode(codec, source, sourcelen, dest, destlen);
-}
-
-/**
- * Clear a charset codec's encoding state
- *
- * \param codec  The codec to reset
- * \return HUBBUB_OK on success, appropriate error otherwise
- */
-hubbub_error hubbub_charsetcodec_reset(hubbub_charsetcodec *codec)
-{
-	if (codec == NULL)
-		return HUBBUB_BADPARM;
-
-	/* Reset filter */
-	if (codec->filter)
-		codec->filter(HUBBUB_CHARSETCODEC_NULL, NULL, NULL, NULL);
-
-	return codec->handler.reset(codec);
-}
-
-- 
cgit v1.2.3