From dc43c01c1562f6bef60e5ee6725e25c9b0c77a31 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Tue, 6 Jan 2009 10:11:41 +0000 Subject: Change API of inputstream_peek(_slow) to return errors. Joy. svn path=/trunk/libparserutils/; revision=5965 --- include/parserutils/errors.h | 1 + include/parserutils/input/inputstream.h | 58 +++++++++++++++------------------ src/input/inputstream.c | 44 +++++++++++++++---------- src/utils/errors.c | 5 +++ test/inputstream.c | 10 +++--- test/regression/stream-nomem.c | 6 ++-- 6 files changed, 67 insertions(+), 57 deletions(-) diff --git a/include/parserutils/errors.h b/include/parserutils/errors.h index 632d334..74c7f34 100644 --- a/include/parserutils/errors.h +++ b/include/parserutils/errors.h @@ -19,6 +19,7 @@ typedef enum parserutils_error { PARSERUTILS_FILENOTFOUND = 4, PARSERUTILS_NEEDDATA = 5, PARSERUTILS_BADENCODING = 6, + PARSERUTILS_EOF = 7 } parserutils_error; /* Convert a parserutils error value to a string */ diff --git a/include/parserutils/input/inputstream.h b/include/parserutils/input/inputstream.h index dac1ab7..9c0be7d 100644 --- a/include/parserutils/input/inputstream.h +++ b/include/parserutils/input/inputstream.h @@ -40,11 +40,6 @@ typedef struct parserutils_inputstream bool had_eof; /**< Whether EOF has been reached */ } parserutils_inputstream; -/* EOF pseudo-character */ -#define PARSERUTILS_INPUTSTREAM_EOF (0xFFFFFFFFU) -/* Out-of-data indicator */ -#define PARSERUTILS_INPUTSTREAM_OOD (0xFFFFFFFEU) - /* Create an input stream */ parserutils_error parserutils_inputstream_create(const char *enc, uint32_t encsrc, parserutils_charset_detect_func csdetect, @@ -64,16 +59,23 @@ parserutils_error parserutils_inputstream_insert( const uint8_t *data, size_t len); /* Slow form of css_inputstream_peek. */ -uintptr_t parserutils_inputstream_peek_slow(parserutils_inputstream *stream, - size_t offset, size_t *length); +parserutils_error parserutils_inputstream_peek_slow( + parserutils_inputstream *stream, + size_t offset, const uint8_t **ptr, size_t *length); /* Look at the character in the stream that starts at * offset bytes from the cursor * * \param stream Stream to look in * \param offset Byte offset of start of character + * \param ptr Pointer to location to receive pointer to character data * \param length Pointer to location to receive character length (in bytes) - * \return Pointer to character data, or EOF or OOD. + * \return PARSERUTILS_OK on success, + * _NEEDDATA on reaching the end of available input, + * _EOF on reaching the end of all input, + * _BADENCODING if the input cannot be decoded, + * _NOMEM on memory exhaustion, + * _BADPARM if bad parameters are passed. * * Once the character pointed to by the result of this call has been advanced * past (i.e. parserutils_inputstream_advance has caused the stream cursor to @@ -81,16 +83,17 @@ uintptr_t parserutils_inputstream_peek_slow(parserutils_inputstream *stream, * the data pointed to. Thus, any attempt to dereference the pointer after * advancing past the data it points to is a bug. */ -static inline uintptr_t parserutils_inputstream_peek( - parserutils_inputstream *stream, size_t offset, size_t *length) +static inline parserutils_error parserutils_inputstream_peek( + parserutils_inputstream *stream, size_t offset, + const uint8_t **ptr, size_t *length) { parserutils_error error = PARSERUTILS_OK; const parserutils_buffer *utf8; const uint8_t *utf8_data; size_t len, off, utf8_len; - if (stream == NULL) - return PARSERUTILS_INPUTSTREAM_OOD; + if (stream == NULL || ptr == NULL || length == NULL) + return PARSERUTILS_BADPARM; #ifndef NDEBUG #ifdef VERBOSE_INPUTSTREAM @@ -113,35 +116,28 @@ static inline uintptr_t parserutils_inputstream_peek( if (IS_ASCII(utf8_data[off])) { /* Early exit for ASCII case */ (*length) = 1; - return (uintptr_t) (utf8_data + off); + (*ptr) = (utf8_data + off); + return PARSERUTILS_OK; } else { error = parserutils_charset_utf8_char_byte_length( utf8_data + off, &len); - if (error != PARSERUTILS_OK && - error != PARSERUTILS_NEEDDATA) - return PARSERUTILS_INPUTSTREAM_OOD; + if (error == PARSERUTILS_OK) { + (*length) = len; + (*ptr) = (utf8_data + off); + return PARSERUTILS_OK; + } else if (error != PARSERUTILS_NEEDDATA) { + return error; + } } } #undef IS_ASCII - if (off == utf8_len || error == PARSERUTILS_NEEDDATA) { - uintptr_t data = parserutils_inputstream_peek_slow(stream, - offset, length); -#if !defined(NDEBUG) && defined(VERBOSE_INPUTSTREAM) - fprintf(stdout, "clen: %lu\n", *length); -#endif - return data; - } - -#if !defined(NDEBUG) && defined(VERBOSE_INPUTSTREAM) - fprintf(stdout, "clen: %lu\n", len); -#endif - - *length = len; + if (off != utf8_len && error != PARSERUTILS_NEEDDATA) + abort(); - return (uintptr_t) (utf8_data + off); + return parserutils_inputstream_peek_slow(stream, offset, ptr, length); } /** diff --git a/src/input/inputstream.c b/src/input/inputstream.c index 1a9be2a..f678e66 100644 --- a/src/input/inputstream.c +++ b/src/input/inputstream.c @@ -216,8 +216,14 @@ parserutils_error parserutils_inputstream_insert( * * \param stream Stream to look in * \param offset Byte offset of start of character + * \param ptr Pointer to location to receive pointer to character data * \param length Pointer to location to receive character length (in bytes) - * \return Pointer to character data, or EOF or OOD. + * \return PARSERUTILS_OK on success, + * _NEEDDATA on reaching the end of available input, + * _EOF on reaching the end of all input, + * _BADENCODING if the input cannot be decoded, + * _NOMEM on memory exhaustion, + * _BADPARM if bad parameters are passed. * * Once the character pointed to by the result of this call has been advanced * past (i.e. parserutils_inputstream_advance has caused the stream cursor to @@ -225,32 +231,33 @@ parserutils_error parserutils_inputstream_insert( * the data pointed to. Thus, any attempt to dereference the pointer after * advancing past the data it points to is a bug. */ -uintptr_t parserutils_inputstream_peek_slow(parserutils_inputstream *stream, - size_t offset, size_t *length) +parserutils_error parserutils_inputstream_peek_slow( + parserutils_inputstream *stream, + size_t offset, const uint8_t **ptr, size_t *length) { parserutils_inputstream_private *s = (parserutils_inputstream_private *) stream; parserutils_error error = PARSERUTILS_OK; size_t len; - if (stream == NULL) - return PARSERUTILS_INPUTSTREAM_OOD; + if (stream == NULL || ptr == NULL || length == NULL) + return PARSERUTILS_BADPARM; /* There's insufficient data in the buffer, so read some more */ if (s->raw->length == 0) { /* No more data to be had */ - return s->public.had_eof ? PARSERUTILS_INPUTSTREAM_EOF - : PARSERUTILS_INPUTSTREAM_OOD; + return s->public.had_eof ? PARSERUTILS_EOF + : PARSERUTILS_NEEDDATA; } /* Refill utf8 buffer from raw buffer */ error = parserutils_inputstream_refill_buffer(s); - /* We're currently converting all errors to OOD. Is this what we want? - * For example, the first time we fill the utf8 buffer, we could - * discover that we don't support the encoding of the raw data. */ - if (error != PARSERUTILS_OK || - s->public.cursor + offset == s->public.utf8->length) - return PARSERUTILS_INPUTSTREAM_OOD; + if (error != PARSERUTILS_OK) + return error; + + /* Refill may have succeeded, but not actually produced any new data */ + if (s->public.cursor + offset == s->public.utf8->length) + return PARSERUTILS_NEEDDATA; /* Now try the read */ if (IS_ASCII(s->public.utf8->data[s->public.cursor + offset])) { @@ -261,17 +268,18 @@ uintptr_t parserutils_inputstream_peek_slow(parserutils_inputstream *stream, &len); if (error != PARSERUTILS_OK && error != PARSERUTILS_NEEDDATA) - return PARSERUTILS_INPUTSTREAM_OOD; + return error; if (error == PARSERUTILS_NEEDDATA) { - return s->public.had_eof ? PARSERUTILS_INPUTSTREAM_EOF - : PARSERUTILS_INPUTSTREAM_OOD; + return s->public.had_eof ? PARSERUTILS_EOF + : PARSERUTILS_NEEDDATA; } } - *length = len; + (*length) = len; + (*ptr) = (s->public.utf8->data + s->public.cursor + offset); - return (uintptr_t) (s->public.utf8->data + s->public.cursor + offset); + return PARSERUTILS_OK; } #undef IS_ASCII diff --git a/src/utils/errors.c b/src/utils/errors.c index 93e098d..248ae8c 100644 --- a/src/utils/errors.c +++ b/src/utils/errors.c @@ -41,6 +41,9 @@ const char *parserutils_error_to_string(parserutils_error error) case PARSERUTILS_BADENCODING: result = "Unsupported encoding"; break; + case PARSERUTILS_EOF: + result = "EOF"; + break; } return result; @@ -69,6 +72,8 @@ parserutils_error parserutils_error_from_string(const char *str, size_t len) return PARSERUTILS_NEEDDATA; } else if (strncmp(str, "PARSERUTILS_BADENCODING", len) == 0) { return PARSERUTILS_BADENCODING; + } else if (strncmp(str, "PARSERUTILS_EOF", len) == 0) { + return PARSERUTILS_EOF; } return PARSERUTILS_OK; diff --git a/test/inputstream.c b/test/inputstream.c index 33b163b..426208d 100644 --- a/test/inputstream.c +++ b/test/inputstream.c @@ -28,7 +28,7 @@ int main(int argc, char **argv) size_t len, origlen; #define CHUNK_SIZE (4096) uint8_t buf[CHUNK_SIZE]; - uintptr_t c; + const uint8_t *c; size_t clen; if (argc != 3) { @@ -61,8 +61,8 @@ int main(int argc, char **argv) len -= CHUNK_SIZE; - while ((c = parserutils_inputstream_peek(stream, 0, &clen)) != - PARSERUTILS_INPUTSTREAM_OOD) { + while (parserutils_inputstream_peek(stream, 0, &c, &clen) != + PARSERUTILS_NEEDDATA) { parserutils_inputstream_advance(stream, clen); } } @@ -85,8 +85,8 @@ int main(int argc, char **argv) assert(parserutils_inputstream_append(stream, NULL, 0) == PARSERUTILS_OK); - while ((c = parserutils_inputstream_peek(stream, 0, &clen)) != - PARSERUTILS_INPUTSTREAM_EOF) { + while (parserutils_inputstream_peek(stream, 0, &c, &clen) != + PARSERUTILS_EOF) { parserutils_inputstream_advance(stream, clen); } diff --git a/test/regression/stream-nomem.c b/test/regression/stream-nomem.c index 004a807..fc8d514 100644 --- a/test/regression/stream-nomem.c +++ b/test/regression/stream-nomem.c @@ -26,7 +26,7 @@ int main(int argc, char **argv) uint8_t input_buffer[BUFFER_SIZE]; // uint8_t *buffer; // size_t buflen; - uintptr_t c; + const uint8_t *c; size_t clen; if (argc != 2) { @@ -61,8 +61,8 @@ int main(int argc, char **argv) assert(parserutils_inputstream_append(stream, NULL, 0) == PARSERUTILS_OK); - while ((c = parserutils_inputstream_peek(stream, 0, &clen)) != - PARSERUTILS_INPUTSTREAM_EOF) + while (parserutils_inputstream_peek(stream, 0, &c, &clen) != + PARSERUTILS_EOF) parserutils_inputstream_advance(stream, clen); /* -- cgit v1.2.3