From 2ef742b2bbe323e50001bece2116734ec2b01ee0 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Mon, 7 Apr 2008 01:56:17 +0000 Subject: Fix recalculation of used document buffer length after resizing when the last attempt to write into the buffer failed mid-way through a multibyte sequence. Add regression test for this Include regression tests in the testsuite index. svn path=/trunk/hubbub/; revision=4075 --- test/INDEX | 5 +++ test/Makefile | 2 +- test/regression/stream-nomem.c | 88 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 test/regression/stream-nomem.c (limited to 'test') diff --git a/test/INDEX b/test/INDEX index fc45511..73b8039 100644 --- a/test/INDEX +++ b/test/INDEX @@ -15,3 +15,8 @@ parser-utf16 Public parser API (utf-16 internally) html tokeniser HTML tokeniser html tokeniser2 HTML tokeniser (again) tokeniser2 tree Treebuilding API html + +# Regression tests +regression/cscodec-segv Segfault in charset codecs +regression/filter-segv Segfault in input filtering +regression/stream-nomem Inputstream buffer expansion diff --git a/test/Makefile b/test/Makefile index 6df42d7..675b043 100644 --- a/test/Makefile +++ b/test/Makefile @@ -34,7 +34,7 @@ DEBUG = OBJS = aliases cscodec csdetect dict entities filter hubbub \ inputstream parser parser-utf16 tokeniser tokeniser2 \ tree -OBJS += regression/cscodec-segv regression/filter-segv +OBJS += regression/cscodec-segv regression/filter-segv regression/stream-nomem .PHONY: clean debug export release setup test diff --git a/test/regression/stream-nomem.c b/test/regression/stream-nomem.c new file mode 100644 index 0000000..7233ac7 --- /dev/null +++ b/test/regression/stream-nomem.c @@ -0,0 +1,88 @@ +#include +#include + +#include + +#include "utils/utils.h" + +#include "input/inputstream.h" + +#include "testutils.h" + +static void *myrealloc(void *ptr, size_t len, void *pw) +{ + UNUSED(pw); + + return realloc(ptr, len); +} + +int main(int argc, char **argv) +{ + hubbub_inputstream *stream; + + /* This is specially calculated so that the inputstream is forced to + * reallocate (it assumes that the inputstream's buffer chunk size + * is 4k) */ +#define BUFFER_SIZE (4096 + 4) + uint8_t input_buffer[BUFFER_SIZE]; + uint8_t *buffer; + size_t buflen; + uint32_t c; + + if (argc != 2) { + printf("Usage: %s \n", argv[0]); + return 1; + } + + /* Populate the buffer with something sane */ + memset(input_buffer, 'a', BUFFER_SIZE); + /* Now, set up our test data */ + input_buffer[BUFFER_SIZE - 1] = '5'; + input_buffer[BUFFER_SIZE - 2] = '4'; + input_buffer[BUFFER_SIZE - 3] = '\xbd'; + input_buffer[BUFFER_SIZE - 4] = '\xbf'; + /* This byte will occupy the 4095th byte in the buffer and + * thus cause the entirety of U+FFFD to be buffered until after + * the buffer has been enlarged */ + input_buffer[BUFFER_SIZE - 5] = '\xef'; + input_buffer[BUFFER_SIZE - 6] = '3'; + input_buffer[BUFFER_SIZE - 7] = '2'; + input_buffer[BUFFER_SIZE - 8] = '1'; + + assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK); + + stream = hubbub_inputstream_create("UTF-8", "UTF-8", myrealloc, NULL); + assert(stream != NULL); + + assert(hubbub_inputstream_append(stream, input_buffer, BUFFER_SIZE) == + HUBBUB_OK); + + assert(hubbub_inputstream_append(stream, NULL, 0) == HUBBUB_OK); + + while ((c = hubbub_inputstream_peek(stream)) != HUBBUB_INPUTSTREAM_EOF) + hubbub_inputstream_advance(stream); + + assert(hubbub_inputstream_claim_buffer(stream, &buffer, &buflen) == + HUBBUB_OK); + + assert(buflen == BUFFER_SIZE); + + printf("Buffer: '%.*s'\n", 8, buffer + (BUFFER_SIZE - 8)); + + assert( buffer[BUFFER_SIZE - 6] == '3' && + buffer[BUFFER_SIZE - 5] == (uint8_t) '\xef' && + buffer[BUFFER_SIZE - 4] == (uint8_t) '\xbf' && + buffer[BUFFER_SIZE - 3] == (uint8_t) '\xbd' && + buffer[BUFFER_SIZE - 2] == '4'); + + free(buffer); + + hubbub_inputstream_destroy(stream); + + assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK); + + printf("PASS\n"); + + return 0; +} + -- cgit v1.2.3