summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/parserutils/errors.h1
-rw-r--r--include/parserutils/input/inputstream.h58
-rw-r--r--src/input/inputstream.c44
-rw-r--r--src/utils/errors.c5
-rw-r--r--test/inputstream.c10
-rw-r--r--test/regression/stream-nomem.c6
6 files changed, 67 insertions, 57 deletions
diff --git a/include/parserutils/errors.h b/include/parserutils/errors.h
index 632d334..74c7f34 100644
--- a/include/parserutils/errors.h
+++ b/include/parserutils/errors.h
@@ -19,6 +19,7 @@ typedef enum parserutils_error {
PARSERUTILS_FILENOTFOUND = 4,
PARSERUTILS_NEEDDATA = 5,
PARSERUTILS_BADENCODING = 6,
+ PARSERUTILS_EOF = 7
} parserutils_error;
/* Convert a parserutils error value to a string */
diff --git a/include/parserutils/input/inputstream.h b/include/parserutils/input/inputstream.h
index dac1ab7..9c0be7d 100644
--- a/include/parserutils/input/inputstream.h
+++ b/include/parserutils/input/inputstream.h
@@ -40,11 +40,6 @@ typedef struct parserutils_inputstream
bool had_eof; /**< Whether EOF has been reached */
} parserutils_inputstream;
-/* EOF pseudo-character */
-#define PARSERUTILS_INPUTSTREAM_EOF (0xFFFFFFFFU)
-/* Out-of-data indicator */
-#define PARSERUTILS_INPUTSTREAM_OOD (0xFFFFFFFEU)
-
/* Create an input stream */
parserutils_error parserutils_inputstream_create(const char *enc,
uint32_t encsrc, parserutils_charset_detect_func csdetect,
@@ -64,16 +59,23 @@ parserutils_error parserutils_inputstream_insert(
const uint8_t *data, size_t len);
/* Slow form of css_inputstream_peek. */
-uintptr_t parserutils_inputstream_peek_slow(parserutils_inputstream *stream,
- size_t offset, size_t *length);
+parserutils_error parserutils_inputstream_peek_slow(
+ parserutils_inputstream *stream,
+ size_t offset, const uint8_t **ptr, size_t *length);
/* Look at the character in the stream that starts at
* offset bytes from the cursor
*
* \param stream Stream to look in
* \param offset Byte offset of start of character
+ * \param ptr Pointer to location to receive pointer to character data
* \param length Pointer to location to receive character length (in bytes)
- * \return Pointer to character data, or EOF or OOD.
+ * \return PARSERUTILS_OK on success,
+ * _NEEDDATA on reaching the end of available input,
+ * _EOF on reaching the end of all input,
+ * _BADENCODING if the input cannot be decoded,
+ * _NOMEM on memory exhaustion,
+ * _BADPARM if bad parameters are passed.
*
* Once the character pointed to by the result of this call has been advanced
* past (i.e. parserutils_inputstream_advance has caused the stream cursor to
@@ -81,16 +83,17 @@ uintptr_t parserutils_inputstream_peek_slow(parserutils_inputstream *stream,
* the data pointed to. Thus, any attempt to dereference the pointer after
* advancing past the data it points to is a bug.
*/
-static inline uintptr_t parserutils_inputstream_peek(
- parserutils_inputstream *stream, size_t offset, size_t *length)
+static inline parserutils_error parserutils_inputstream_peek(
+ parserutils_inputstream *stream, size_t offset,
+ const uint8_t **ptr, size_t *length)
{
parserutils_error error = PARSERUTILS_OK;
const parserutils_buffer *utf8;
const uint8_t *utf8_data;
size_t len, off, utf8_len;
- if (stream == NULL)
- return PARSERUTILS_INPUTSTREAM_OOD;
+ if (stream == NULL || ptr == NULL || length == NULL)
+ return PARSERUTILS_BADPARM;
#ifndef NDEBUG
#ifdef VERBOSE_INPUTSTREAM
@@ -113,35 +116,28 @@ static inline uintptr_t parserutils_inputstream_peek(
if (IS_ASCII(utf8_data[off])) {
/* Early exit for ASCII case */
(*length) = 1;
- return (uintptr_t) (utf8_data + off);
+ (*ptr) = (utf8_data + off);
+ return PARSERUTILS_OK;
} else {
error = parserutils_charset_utf8_char_byte_length(
utf8_data + off, &len);
- if (error != PARSERUTILS_OK &&
- error != PARSERUTILS_NEEDDATA)
- return PARSERUTILS_INPUTSTREAM_OOD;
+ if (error == PARSERUTILS_OK) {
+ (*length) = len;
+ (*ptr) = (utf8_data + off);
+ return PARSERUTILS_OK;
+ } else if (error != PARSERUTILS_NEEDDATA) {
+ return error;
+ }
}
}
#undef IS_ASCII
- if (off == utf8_len || error == PARSERUTILS_NEEDDATA) {
- uintptr_t data = parserutils_inputstream_peek_slow(stream,
- offset, length);
-#if !defined(NDEBUG) && defined(VERBOSE_INPUTSTREAM)
- fprintf(stdout, "clen: %lu\n", *length);
-#endif
- return data;
- }
-
-#if !defined(NDEBUG) && defined(VERBOSE_INPUTSTREAM)
- fprintf(stdout, "clen: %lu\n", len);
-#endif
-
- *length = len;
+ if (off != utf8_len && error != PARSERUTILS_NEEDDATA)
+ abort();
- return (uintptr_t) (utf8_data + off);
+ return parserutils_inputstream_peek_slow(stream, offset, ptr, length);
}
/**
diff --git a/src/input/inputstream.c b/src/input/inputstream.c
index 1a9be2a..f678e66 100644
--- a/src/input/inputstream.c
+++ b/src/input/inputstream.c
@@ -216,8 +216,14 @@ parserutils_error parserutils_inputstream_insert(
*
* \param stream Stream to look in
* \param offset Byte offset of start of character
+ * \param ptr Pointer to location to receive pointer to character data
* \param length Pointer to location to receive character length (in bytes)
- * \return Pointer to character data, or EOF or OOD.
+ * \return PARSERUTILS_OK on success,
+ * _NEEDDATA on reaching the end of available input,
+ * _EOF on reaching the end of all input,
+ * _BADENCODING if the input cannot be decoded,
+ * _NOMEM on memory exhaustion,
+ * _BADPARM if bad parameters are passed.
*
* Once the character pointed to by the result of this call has been advanced
* past (i.e. parserutils_inputstream_advance has caused the stream cursor to
@@ -225,32 +231,33 @@ parserutils_error parserutils_inputstream_insert(
* the data pointed to. Thus, any attempt to dereference the pointer after
* advancing past the data it points to is a bug.
*/
-uintptr_t parserutils_inputstream_peek_slow(parserutils_inputstream *stream,
- size_t offset, size_t *length)
+parserutils_error parserutils_inputstream_peek_slow(
+ parserutils_inputstream *stream,
+ size_t offset, const uint8_t **ptr, size_t *length)
{
parserutils_inputstream_private *s =
(parserutils_inputstream_private *) stream;
parserutils_error error = PARSERUTILS_OK;
size_t len;
- if (stream == NULL)
- return PARSERUTILS_INPUTSTREAM_OOD;
+ if (stream == NULL || ptr == NULL || length == NULL)
+ return PARSERUTILS_BADPARM;
/* There's insufficient data in the buffer, so read some more */
if (s->raw->length == 0) {
/* No more data to be had */
- return s->public.had_eof ? PARSERUTILS_INPUTSTREAM_EOF
- : PARSERUTILS_INPUTSTREAM_OOD;
+ return s->public.had_eof ? PARSERUTILS_EOF
+ : PARSERUTILS_NEEDDATA;
}
/* Refill utf8 buffer from raw buffer */
error = parserutils_inputstream_refill_buffer(s);
- /* We're currently converting all errors to OOD. Is this what we want?
- * For example, the first time we fill the utf8 buffer, we could
- * discover that we don't support the encoding of the raw data. */
- if (error != PARSERUTILS_OK ||
- s->public.cursor + offset == s->public.utf8->length)
- return PARSERUTILS_INPUTSTREAM_OOD;
+ if (error != PARSERUTILS_OK)
+ return error;
+
+ /* Refill may have succeeded, but not actually produced any new data */
+ if (s->public.cursor + offset == s->public.utf8->length)
+ return PARSERUTILS_NEEDDATA;
/* Now try the read */
if (IS_ASCII(s->public.utf8->data[s->public.cursor + offset])) {
@@ -261,17 +268,18 @@ uintptr_t parserutils_inputstream_peek_slow(parserutils_inputstream *stream,
&len);
if (error != PARSERUTILS_OK && error != PARSERUTILS_NEEDDATA)
- return PARSERUTILS_INPUTSTREAM_OOD;
+ return error;
if (error == PARSERUTILS_NEEDDATA) {
- return s->public.had_eof ? PARSERUTILS_INPUTSTREAM_EOF
- : PARSERUTILS_INPUTSTREAM_OOD;
+ return s->public.had_eof ? PARSERUTILS_EOF
+ : PARSERUTILS_NEEDDATA;
}
}
- *length = len;
+ (*length) = len;
+ (*ptr) = (s->public.utf8->data + s->public.cursor + offset);
- return (uintptr_t) (s->public.utf8->data + s->public.cursor + offset);
+ return PARSERUTILS_OK;
}
#undef IS_ASCII
diff --git a/src/utils/errors.c b/src/utils/errors.c
index 93e098d..248ae8c 100644
--- a/src/utils/errors.c
+++ b/src/utils/errors.c
@@ -41,6 +41,9 @@ const char *parserutils_error_to_string(parserutils_error error)
case PARSERUTILS_BADENCODING:
result = "Unsupported encoding";
break;
+ case PARSERUTILS_EOF:
+ result = "EOF";
+ break;
}
return result;
@@ -69,6 +72,8 @@ parserutils_error parserutils_error_from_string(const char *str, size_t len)
return PARSERUTILS_NEEDDATA;
} else if (strncmp(str, "PARSERUTILS_BADENCODING", len) == 0) {
return PARSERUTILS_BADENCODING;
+ } else if (strncmp(str, "PARSERUTILS_EOF", len) == 0) {
+ return PARSERUTILS_EOF;
}
return PARSERUTILS_OK;
diff --git a/test/inputstream.c b/test/inputstream.c
index 33b163b..426208d 100644
--- a/test/inputstream.c
+++ b/test/inputstream.c
@@ -28,7 +28,7 @@ int main(int argc, char **argv)
size_t len, origlen;
#define CHUNK_SIZE (4096)
uint8_t buf[CHUNK_SIZE];
- uintptr_t c;
+ const uint8_t *c;
size_t clen;
if (argc != 3) {
@@ -61,8 +61,8 @@ int main(int argc, char **argv)
len -= CHUNK_SIZE;
- while ((c = parserutils_inputstream_peek(stream, 0, &clen)) !=
- PARSERUTILS_INPUTSTREAM_OOD) {
+ while (parserutils_inputstream_peek(stream, 0, &c, &clen) !=
+ PARSERUTILS_NEEDDATA) {
parserutils_inputstream_advance(stream, clen);
}
}
@@ -85,8 +85,8 @@ int main(int argc, char **argv)
assert(parserutils_inputstream_append(stream, NULL, 0) ==
PARSERUTILS_OK);
- while ((c = parserutils_inputstream_peek(stream, 0, &clen)) !=
- PARSERUTILS_INPUTSTREAM_EOF) {
+ while (parserutils_inputstream_peek(stream, 0, &c, &clen) !=
+ PARSERUTILS_EOF) {
parserutils_inputstream_advance(stream, clen);
}
diff --git a/test/regression/stream-nomem.c b/test/regression/stream-nomem.c
index 004a807..fc8d514 100644
--- a/test/regression/stream-nomem.c
+++ b/test/regression/stream-nomem.c
@@ -26,7 +26,7 @@ int main(int argc, char **argv)
uint8_t input_buffer[BUFFER_SIZE];
// uint8_t *buffer;
// size_t buflen;
- uintptr_t c;
+ const uint8_t *c;
size_t clen;
if (argc != 2) {
@@ -61,8 +61,8 @@ int main(int argc, char **argv)
assert(parserutils_inputstream_append(stream, NULL, 0) ==
PARSERUTILS_OK);
- while ((c = parserutils_inputstream_peek(stream, 0, &clen)) !=
- PARSERUTILS_INPUTSTREAM_EOF)
+ while (parserutils_inputstream_peek(stream, 0, &c, &clen) !=
+ PARSERUTILS_EOF)
parserutils_inputstream_advance(stream, clen);
/*