summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/utils/Makefile3
-rw-r--r--src/utils/chunkarray.c213
-rw-r--r--src/utils/chunkarray.h26
-rw-r--r--src/utils/hash.c338
-rw-r--r--src/utils/stack.c3
5 files changed, 581 insertions, 2 deletions
diff --git a/src/utils/Makefile b/src/utils/Makefile
index c786a09..f32ef72 100644
--- a/src/utils/Makefile
+++ b/src/utils/Makefile
@@ -35,7 +35,8 @@ d := $(DIR)
CFLAGS := $(CFLAGS) -I$(d)
# Sources
-SRCS_$(d) := buffer.c dict.c errors.c rbtree.c stack.c vector.c
+SRCS_$(d) := chunkarray.c buffer.c dict.c errors.c hash.c rbtree.c stack.c \
+ vector.c
# Append to sources for component
SOURCES += $(addprefix $(d), $(SRCS_$(d)))
diff --git a/src/utils/chunkarray.c b/src/utils/chunkarray.c
new file mode 100644
index 0000000..308f537
--- /dev/null
+++ b/src/utils/chunkarray.c
@@ -0,0 +1,213 @@
+/*
+ * This file is part of LibParserUtils.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "utils/chunkarray.h"
+
+typedef struct chunk chunk;
+
+struct chunk {
+#define CHUNK_SIZE (4096)
+ chunk *next;
+
+ uint32_t used;
+
+ uint8_t data[CHUNK_SIZE];
+};
+
+struct parserutils_chunkarray {
+ chunk *used_chunks;
+
+ chunk *free_chunks;
+
+ parserutils_alloc alloc;
+ void *pw;
+};
+
+/**
+ * Create a chunked array
+ *
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data
+ * \param array Pointer to location to receive array instance
+ * \return PARSERUTILS_OK on success, appropriate error otherwise
+ */
+parserutils_error parserutils_chunkarray_create(parserutils_alloc alloc,
+ void *pw, parserutils_chunkarray **array)
+{
+ parserutils_chunkarray *c;
+
+ if (alloc == NULL || array == NULL)
+ return PARSERUTILS_BADPARM;
+
+ c = alloc(0, sizeof(parserutils_chunkarray), pw);
+ if (c == NULL)
+ return PARSERUTILS_NOMEM;
+
+ c->used_chunks = NULL;
+ c->free_chunks = NULL;
+
+ c->alloc = alloc;
+ c->pw = pw;
+
+ *array = c;
+
+ return PARSERUTILS_OK;
+}
+
+/**
+ * Destroy a chunked array
+ *
+ * \param array The array to destroy
+ * \return PARSERUTILS_OK on success, appropriate error otherwise
+ */
+parserutils_error parserutils_chunkarray_destroy(parserutils_chunkarray *array)
+{
+ chunk *c, *d;
+
+ if (array == NULL)
+ return PARSERUTILS_BADPARM;
+
+ for (c = array->used_chunks; c != NULL; c = d) {
+ d = c->next;
+ array->alloc(c, 0, array->pw);
+ }
+
+ for (c = array->free_chunks; c != NULL; c = d) {
+ d = c->next;
+ array->alloc(c, 0, array->pw);
+ }
+
+ array->alloc(array, 0, array->pw);
+
+ return PARSERUTILS_OK;
+}
+
+/**
+ * Insert an item into an array
+ *
+ * \param array The array to insert into
+ * \param data Pointer to data to insert
+ * \param len Length, in bytes, of data
+ * \param inserted Pointer to location to receive pointer to inserted data
+ * \return PARSERUTILS_OK on success, appropriate error otherwise
+ */
+parserutils_error parserutils_chunkarray_insert(parserutils_chunkarray *array,
+ const void *data, size_t len, const void **inserted)
+{
+ if (array == NULL || data == NULL || inserted == NULL)
+ return PARSERUTILS_BADPARM;
+
+ /* If we're trying to insert data larger than CHUNK_SIZE, then it
+ * gets its own chunk. */
+ if (len > CHUNK_SIZE) {
+ chunk *c = array->alloc(0, sizeof(chunk) + len - CHUNK_SIZE,
+ array->pw);
+ if (c == NULL)
+ return PARSERUTILS_NOMEM;
+
+ /* Populate chunk */
+ (*inserted) = c->data;
+ memcpy(c->data, data, len);
+ c->used = len;
+
+ /* And put it in the used list */
+ c->next = array->used_chunks;
+ array->used_chunks = c;
+ } else {
+ /* Scan the free list until we find a chunk with enough space */
+ chunk *c, *prev;
+
+ for (prev = NULL, c = array->free_chunks; c != NULL;
+ prev = c, c = c->next) {
+ if (CHUNK_SIZE - c->used >= len)
+ break;
+ }
+
+ if (c == NULL) {
+ /* None big enough, create a new one */
+ c = array->alloc(0, sizeof(chunk), array->pw);
+ if (c == NULL)
+ return PARSERUTILS_NOMEM;
+
+ c->used = 0;
+
+ /* Insert it at the head of the free list */
+ c->next = array->free_chunks;
+ array->free_chunks = c;
+
+ /* And ensure that prev is kept in sync */
+ prev = NULL;
+ }
+
+ /* Populate chunk */
+ (*inserted) = c->data + c->used;
+ memcpy(c->data + c->used, data, len);
+ c->used += len;
+
+ /* If we've now filled the chunk, move it to the used list */
+ if (c->used == CHUNK_SIZE) {
+ if (prev != NULL)
+ prev->next = c->next;
+ else
+ array->free_chunks = c->next;
+
+ c->next = array->used_chunks;
+ array->used_chunks = c;
+ }
+ }
+
+ return PARSERUTILS_OK;
+}
+
+/******************************************************************************
+ * Private functions *
+ ******************************************************************************/
+
+extern void parserutils_chunkarray_dump(parserutils_chunkarray *array);
+
+/**
+ * Dump details of a chunked array to stdout
+ *
+ * \param array The array to dump
+ */
+void parserutils_chunkarray_dump(parserutils_chunkarray *array)
+{
+ uint32_t n = 0;
+ size_t count = 0;
+ size_t total = sizeof(parserutils_chunkarray);
+
+ for (chunk *c = array->used_chunks; c != NULL; c = c->next) {
+ n++;
+ count += c->used;
+ if (c->used == CHUNK_SIZE)
+ total += sizeof(chunk);
+ else
+ total += sizeof(chunk) + c->used - CHUNK_SIZE;
+ }
+
+ printf("%u full blocks: %zu bytes\n", n, count);
+
+ n = 0;
+ count = 0;
+
+ for (chunk *c = array->free_chunks; c != NULL; c = c->next) {
+ n++;
+ count += c->used;
+ total += sizeof(chunk);
+ }
+
+ printf("%u partial blocks: %zu bytes (of %u => %f%%)\n", n, count,
+ n * CHUNK_SIZE,
+ (count * 100) / ((float) n * CHUNK_SIZE));
+
+ printf("Total: %zu (%lu) (%lu)\n", total, sizeof(chunk),
+ sizeof(parserutils_chunkarray));
+}
+
diff --git a/src/utils/chunkarray.h b/src/utils/chunkarray.h
new file mode 100644
index 0000000..4ee98cd
--- /dev/null
+++ b/src/utils/chunkarray.h
@@ -0,0 +1,26 @@
+/*
+ * This file is part of LibParserUtils.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef parserutils_utils_chunkarray_h_
+#define parserutils_utils_chunkarray_h_
+
+#include <parserutils/errors.h>
+#include <parserutils/functypes.h>
+
+struct parserutils_chunkarray;
+typedef struct parserutils_chunkarray parserutils_chunkarray;
+
+parserutils_error parserutils_chunkarray_create(parserutils_alloc alloc,
+ void *pw, parserutils_chunkarray **array);
+parserutils_error parserutils_chunkarray_destroy(parserutils_chunkarray *array);
+
+parserutils_error parserutils_chunkarray_insert(parserutils_chunkarray *array,
+ const void *data, size_t len,
+ const void **inserted);
+
+#endif
+
diff --git a/src/utils/hash.c b/src/utils/hash.c
new file mode 100644
index 0000000..1694d72
--- /dev/null
+++ b/src/utils/hash.c
@@ -0,0 +1,338 @@
+/*
+ * This file is part of LibParserUtils.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <parserutils/utils/hash.h>
+
+#include "utils/chunkarray.h"
+
+typedef struct slot_table slot_table;
+
+struct parserutils_hash {
+ slot_table *slots;
+
+ parserutils_chunkarray *data;
+ parserutils_chunkarray *entries;
+
+ parserutils_alloc alloc;
+ void *pw;
+};
+
+struct slot_table {
+#define DEFAULT_SLOTS (1<<6)
+ size_t n_slots;
+ size_t n_used;
+
+ const parserutils_hash_entry *slots[];
+};
+
+static inline uint32_t _hash(const uint8_t *data, size_t len);
+static inline int _cmp(const uint8_t *a, size_t alen,
+ const uint8_t *b, size_t blen);
+static inline void grow_slots(parserutils_hash *hash);
+
+/**
+ * Create a hash
+ *
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data
+ * \param hash Pointer to location to receive result
+ * \return PARSERUTILS_OK on success, appropriate error otherwise
+ */
+parserutils_error parserutils_hash_create(parserutils_alloc alloc, void *pw,
+ parserutils_hash **hash)
+{
+ parserutils_hash *h;
+ parserutils_error error;
+
+ if (alloc == NULL || hash == NULL)
+ return PARSERUTILS_BADPARM;
+
+ h = alloc(0, sizeof(parserutils_hash), pw);
+ if (h == NULL)
+ return PARSERUTILS_NOMEM;
+
+ h->slots = alloc(0, sizeof(slot_table) +
+ DEFAULT_SLOTS * sizeof(parserutils_hash_entry *),
+ pw);
+ if (h->slots == NULL) {
+ alloc(h, 0, pw);
+ return PARSERUTILS_NOMEM;
+ }
+
+ memset(h->slots, 0, sizeof(slot_table) +
+ DEFAULT_SLOTS * sizeof(parserutils_hash_entry *));
+ h->slots->n_slots = DEFAULT_SLOTS;
+
+ error = parserutils_chunkarray_create(alloc, pw, &h->data);
+ if (error != PARSERUTILS_OK) {
+ alloc(h->slots, 0, pw);
+ alloc(h, 0, pw);
+ return error;
+ }
+
+ error = parserutils_chunkarray_create(alloc, pw, &h->entries);
+ if (error != PARSERUTILS_OK) {
+ alloc(h->data, 0, pw);
+ alloc(h->slots, 0, pw);
+ alloc(h, 0, pw);
+ return error;
+ }
+
+ h->alloc = alloc;
+ h->pw = pw;
+
+ *hash = h;
+
+ return PARSERUTILS_OK;
+}
+
+/**
+ * Destroy a hash
+ *
+ * \param hash The hash to destroy
+ * \return PARSERUTILS_OK on success, appropriate error otherwise
+ */
+parserutils_error parserutils_hash_destroy(parserutils_hash *hash)
+{
+ if (hash == NULL)
+ return PARSERUTILS_BADPARM;
+
+ parserutils_chunkarray_destroy(hash->data);
+
+ parserutils_chunkarray_destroy(hash->entries);
+
+ hash->alloc(hash->slots, 0, hash->pw);
+
+ hash->alloc(hash, 0, hash->pw);
+
+ return PARSERUTILS_OK;
+}
+
+/**
+ * Insert an item into a hash
+ *
+ * \param hash The hash to insert into
+ * \param data Pointer to data
+ * \param len Length, in bytes, of data
+ * \param inserted Pointer to location to receive pointer to data in hash
+ * \return PARSERUTILS_OK on success, appropriate error otherwise
+ */
+parserutils_error parserutils_hash_insert(parserutils_hash *hash,
+ const uint8_t *data, size_t len,
+ const parserutils_hash_entry **inserted)
+{
+ uint32_t index;
+ slot_table *slots;
+ const void *idata, *ientry;
+ parserutils_hash_entry entry;
+ parserutils_error error;
+
+ if (hash == NULL || data == NULL || inserted == NULL)
+ return PARSERUTILS_BADPARM;
+
+ slots = hash->slots;
+
+ /* Find index */
+ index = _hash(data, len) % slots->n_slots;
+
+ /* Use linear probing to resolve collisions */
+ while (slots->slots[index] != NULL) {
+ /* If this data is already in the hash, return it */
+ if (_cmp(data, len, slots->slots[index]->data,
+ slots->slots[index]->len) == 0) {
+ (*inserted) = slots->slots[index];
+ return PARSERUTILS_OK;
+ }
+
+ index = (index + 1) % slots->n_slots;
+ }
+
+ /* The data isn't in the hash. Index identifies the slot to use */
+ error = parserutils_chunkarray_insert(hash->data, data, len, &idata);
+ if (error != PARSERUTILS_OK)
+ return error;
+
+ entry.len = len;
+ entry.data = idata;
+
+ error = parserutils_chunkarray_insert(hash->entries,
+ &entry, sizeof(parserutils_hash_entry), &ientry);
+ if (error != PARSERUTILS_OK) {
+ /* We effectively leak the inserted data, as chunkarray
+ * doesn't support deletion. */
+ return error;
+ }
+
+ (*inserted) = slots->slots[index] = ientry;
+
+ /* If the table is 75% full, then increase its size */
+ if (++slots->n_used >= (slots->n_slots >> 1) + (slots->n_slots >> 2))
+ grow_slots(hash);
+
+ return PARSERUTILS_OK;
+}
+
+/******************************************************************************
+ * Private functions *
+ ******************************************************************************/
+
+/**
+ * Hsieh hash function
+ *
+ * \param data Pointer to data to hash
+ * \param len Length, in bytes, of data
+ * \return hash value
+ */
+uint32_t _hash(const uint8_t *data, size_t len)
+{
+ uint32_t hash = len, tmp;
+ int rem = len & 3;
+
+#define read16(d) ((((uint32_t)((d)[1])) << 8) | ((uint32_t)((d)[0])))
+
+ for (len = len / 4; len > 0; len--) {
+ hash += read16(data);
+ tmp = (read16(data + 2) << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ data += 4;
+ hash += hash >> 11;
+ }
+
+ switch (rem) {
+ case 3:
+ hash += read16(data);
+ hash ^= hash << 16;
+ hash ^= data[2] << 18;
+ hash += hash >> 11;
+ break;
+ case 2:
+ hash += read16(data);
+ hash ^= hash << 11;
+ hash += hash >> 17;
+ break;
+ case 1:
+ hash += data[0];
+ hash ^= hash << 10;
+ hash += hash >> 1;
+ }
+
+#undef read16
+
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 4;
+ hash += hash >> 17;
+ hash ^= hash << 25;
+ hash += hash >> 6;
+
+ return hash;
+}
+
+/**
+ * Comparator for hash entries
+ *
+ * \param a First item to consider
+ * \param alen Length, in bytes, of a
+ * \param b Second item to consider
+ * \param blen Length, in bytes, of b
+ * \return <0 iff a<b, ==0 iff a=b, >0 iff a>b
+ */
+int _cmp(const uint8_t *a, size_t alen, const uint8_t *b, size_t blen)
+{
+ int result;
+
+ /* Check for identity first */
+ if (a == b)
+ return 0;
+
+ /* Sort first by length, then by data equality */
+ if ((result = alen - blen) == 0)
+ result = memcmp(a, b, alen);
+
+ return result;
+}
+
+/**
+ * Increase the size of the slot table
+ *
+ * \param hash The hash to process
+ */
+void grow_slots(parserutils_hash *hash)
+{
+ slot_table *s;
+ size_t new_size;
+
+ if (hash == NULL)
+ return;
+
+ new_size = hash->slots->n_slots << 1;
+
+ /* Create new slot table */
+ s = hash->alloc(0, sizeof(slot_table) +
+ new_size * sizeof(parserutils_hash_entry *),
+ hash->pw);
+ if (s == NULL)
+ return;
+
+ memset(s, 0, sizeof(slot_table) +
+ new_size * sizeof(parserutils_hash_entry *));
+ s->n_slots = new_size;
+
+ /* Now, populate it with the contents of the current one */
+ for (uint32_t i = 0; i < hash->slots->n_slots; i++) {
+ const parserutils_hash_entry *e = hash->slots->slots[i];
+
+ if (e == NULL)
+ continue;
+
+ /* Find new index */
+ uint32_t index = _hash(e->data, e->len) % s->n_slots;
+
+ /* Use linear probing to resolve collisions */
+ while (s->slots[index] != NULL)
+ index = (index + 1) % s->n_slots;
+
+ /* Insert into new slot table */
+ s->slots[index] = e;
+ s->n_used++;
+ }
+
+ /* Destroy current table, and replace it with the new one */
+ hash->alloc(hash->slots, 0, hash->pw);
+ hash->slots = s;
+
+ return;
+}
+
+extern void parserutils_chunkarray_dump(parserutils_chunkarray *array);
+extern void parserutils_hash_dump(parserutils_hash *hash);
+
+/**
+ * Dump details of a hash to stdout
+ *
+ * \param hash The hash to dump
+ */
+void parserutils_hash_dump(parserutils_hash *hash)
+{
+ printf("%zu slots used (of %zu => %f%%)\n", hash->slots->n_used,
+ hash->slots->n_slots,
+ hash->slots->n_used * 100 / (float) hash->slots->n_slots);
+
+ printf("Data:\n");
+ parserutils_chunkarray_dump(hash->data);
+
+ printf("Entries:\n");
+ parserutils_chunkarray_dump(hash->entries);
+
+ printf("Hash structures: %zu\n",
+ sizeof(parserutils_hash) + sizeof(slot_table) +
+ hash->slots->n_slots * sizeof(parserutils_hash_entry *));
+}
+
diff --git a/src/utils/stack.c b/src/utils/stack.c
index a157597..725a23a 100644
--- a/src/utils/stack.c
+++ b/src/utils/stack.c
@@ -92,7 +92,8 @@ parserutils_error parserutils_stack_destroy(parserutils_stack *stack)
* \param item The item to push
* \return PARSERUTILS_OK on success, appropriate error otherwise
*/
-parserutils_error parserutils_stack_push(parserutils_stack *stack, void *item)
+parserutils_error parserutils_stack_push(parserutils_stack *stack,
+ const void *item)
{
int32_t slot;