summaryrefslogtreecommitdiff
path: root/src/utils/dict.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/utils/dict.c')
-rw-r--r--src/utils/dict.c275
1 files changed, 275 insertions, 0 deletions
diff --git a/src/utils/dict.c b/src/utils/dict.c
new file mode 100644
index 0000000..a9d5dd8
--- /dev/null
+++ b/src/utils/dict.c
@@ -0,0 +1,275 @@
+/*
+ * This file is part of LibParserUtils.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include <parserutils/utils/dict.h>
+
+#include "utils/rbtree.h"
+#include "utils/utils.h"
+
+/**
+ * A dictionary entry
+ */
+struct parserutils_dict_entry
+{
+ size_t len; /**< Length of data, in bytes */
+ uint8_t *data; /**< Entry data */
+};
+
+/**
+ * Dictionary object
+ */
+struct parserutils_dict
+{
+#define TABLE_SIZE (79)
+ parserutils_rbtree *table[TABLE_SIZE]; /**< Hashtable of entries */
+
+ parserutils_alloc alloc; /**< Memory (de)allocation function */
+ void *pw; /**< Client-specific data */
+};
+
+static inline uint32_t dict_hash(const uint8_t *data, size_t len);
+static int dict_cmp(const void *a, const void *b);
+static void dict_del(void *key, void *value, void *pw);
+
+/**
+ * Create a dictionary
+ *
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data
+ * \return Pointer to dictionary instance, or NULL on memory exhaustion
+ */
+parserutils_dict *parserutils_dict_create(parserutils_alloc alloc, void *pw)
+{
+ parserutils_dict *dict;
+
+ if (alloc == NULL)
+ return NULL;
+
+ dict = alloc(NULL, sizeof(parserutils_dict), pw);
+ if (dict == NULL)
+ return NULL;
+
+ memset(dict->table, 0, TABLE_SIZE * sizeof(parserutils_rbtree *));
+
+ dict->alloc = alloc;
+ dict->pw = pw;
+
+ return dict;
+}
+
+/**
+ * Destroy a dictionary
+ *
+ * \param dict The dictionary instance to destroy
+ */
+void parserutils_dict_destroy(parserutils_dict *dict)
+{
+ for (int i = 0; i < TABLE_SIZE; i++) {
+ parserutils_rbtree_destroy(dict->table[i], dict_del, dict);
+ }
+
+ dict->alloc(dict, 0, dict->pw);
+}
+
+/**
+ * Insert an item into a dictionary
+ *
+ * \param dict The dictionary to insert into
+ * \param data Pointer to data
+ * \param len Length, in bytes, of data
+ * \param result Pointer to location to receive pointer to data in dictionary
+ */
+parserutils_error parserutils_dict_insert(parserutils_dict *dict,
+ const uint8_t *data, size_t len,
+ const parserutils_dict_entry **result)
+{
+ parserutils_error error;
+ parserutils_dict_entry *entry = NULL;
+ void *old_value;
+ uint32_t index;
+
+ if (dict == NULL || data == NULL || len == 0)
+ return PARSERUTILS_BADPARM;
+
+ index = dict_hash(data, len) % TABLE_SIZE;
+
+ if (dict->table[index] != NULL) {
+ parserutils_dict_entry search = { len, (uint8_t *) data };
+
+ error = parserutils_rbtree_find(dict->table[index],
+ (void *) &search, (void *) &entry);
+ if (error != PARSERUTILS_OK)
+ return error;
+
+ if (entry != NULL) {
+ *result = entry;
+ return PARSERUTILS_OK;
+ }
+ } else {
+ dict->table[index] = parserutils_rbtree_create(dict_cmp,
+ dict->alloc, dict->pw);
+ if (dict->table[index] == NULL)
+ return PARSERUTILS_NOMEM;
+ }
+
+ entry = dict->alloc(NULL, sizeof(parserutils_dict_entry) + len,
+ dict->pw);
+ if (entry == NULL)
+ return PARSERUTILS_NOMEM;
+
+ /* Do-it-yourself variable-sized data member (simplifies the
+ * manufacture of an entry to search for, above) */
+ memcpy(((uint8_t *) entry) + sizeof(parserutils_dict_entry), data, len);
+ entry->data = ((uint8_t *) entry) + sizeof(parserutils_dict_entry);
+ entry->len = len;
+
+ error = parserutils_rbtree_insert(dict->table[index], (void *) entry,
+ (void *) entry, &old_value);
+ if (error != PARSERUTILS_OK) {
+ dict->alloc(entry, 0, dict->pw);
+ return error;
+ }
+ assert(old_value == NULL);
+
+ *result = entry;
+
+ return PARSERUTILS_OK;
+}
+
+/**
+ * Hsieh hash function
+ *
+ * \param data Pointer to data to hash
+ * \param len Length, in bytes, of data
+ * \return hash value
+ */
+uint32_t dict_hash(const uint8_t *data, size_t len)
+{
+ uint32_t hash = len, tmp;
+ int rem = len & 3;
+
+#define read16(d) ((((uint32_t)((d)[1])) << 8) | ((uint32_t)((d)[0])))
+
+ for (len = len / 4; len > 0; len--) {
+ hash += read16(data);
+ tmp = (read16(data + 2) << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ data += 4;
+ hash += hash >> 11;
+ }
+
+ switch (rem) {
+ case 3:
+ hash += read16(data);
+ hash ^= hash << 16;
+ hash ^= data[2] << 18;
+ hash += hash >> 11;
+ break;
+ case 2:
+ hash += read16(data);
+ hash ^= hash << 11;
+ hash += hash >> 17;
+ break;
+ case 1:
+ hash += data[0];
+ hash ^= hash << 10;
+ hash += hash >> 1;
+ }
+
+#undef read16
+
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 4;
+ hash += hash >> 17;
+ hash ^= hash << 25;
+ hash += hash >> 6;
+
+ return hash;
+}
+
+/**
+ * Comparison function for dictionary entries
+ *
+ * \param a First entry to consider
+ * \param b Second entry to consider
+ * \return <0 iff a<b, ==0 iff a=b, >0 iff a>b
+ */
+int dict_cmp(const void *a, const void *b)
+{
+ const parserutils_dict_entry *aa = (const parserutils_dict_entry *) a;
+ const parserutils_dict_entry *bb = (const parserutils_dict_entry *) b;
+ size_t min_len = min(aa->len, bb->len);
+ int result = 0;
+
+ if ((result = memcmp(aa->data, bb->data, min_len)) == 0) {
+ result = aa->len - bb->len;
+ }
+
+ return result;
+}
+
+/**
+ * Destructor for dictionary entries
+ *
+ * \param key Key
+ * \param value Value
+ * \param pw Dictionary instance
+ */
+void dict_del(void *key, void *value, void *pw)
+{
+ parserutils_dict *dict = (parserutils_dict *) pw;
+
+ UNUSED(value);
+
+ dict->alloc(key, 0, dict->pw);
+}
+
+#ifndef NDEBUG
+#include <stdio.h>
+
+extern void parserutils_dict_dump(parserutils_dict *dict);
+
+/**
+ * Print out a dictionary entry
+ *
+ * \param key The key
+ * \param value The value
+ * \param depth Depth in tree
+ */
+static void dict_print(const void *key, const void *value, int depth)
+{
+ const parserutils_dict_entry *e = (const parserutils_dict_entry *) key;
+
+ UNUSED(value);
+
+ while (depth-- > 0)
+ putchar(' ');
+
+ printf("'%.*s'\n", e->len, e->data);
+}
+
+/**
+ * Print out a dictionary
+ *
+ * \param dict The dictionary to print
+ */
+void parserutils_dict_dump(parserutils_dict *dict)
+{
+ if (dict == NULL)
+ return;
+
+ for (int i = 0; i < TABLE_SIZE; i++) {
+ printf("%d:\n", i);
+ parserutils_rbtree_dump(dict->table[i], dict_print);
+ }
+}
+#endif
+