summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Kendrick (humdrum) <rob.kendrick@codethink.co.uk>2013-05-17 11:40:43 +0100
committerRob Kendrick (humdrum) <rob.kendrick@codethink.co.uk>2013-05-17 11:40:43 +0100
commit60e8f11850f338aef0742578f1e97f0db9c0548a (patch)
tree6b708d5e20815fb0be4d8e52765b7d6504e1c0bf
parent9f00abec0388ccbce07b67d5e2490e3e6c383456 (diff)
downloadnetsurf-60e8f11850f338aef0742578f1e97f0db9c0548a.tar.gz
netsurf-60e8f11850f338aef0742578f1e97f0db9c0548a.tar.bz2
urldb maintains a bloom filter of URLs contained within and consults it when searching
-rw-r--r--content/urldb.c32
-rw-r--r--utils/Makefile4
2 files changed, 33 insertions, 3 deletions
diff --git a/content/urldb.c b/content/urldb.c
index e3cc1d73d..92065d0f3 100644
--- a/content/urldb.c
+++ b/content/urldb.c
@@ -107,6 +107,7 @@
#include "utils/filename.h"
#include "utils/url.h"
#include "utils/utils.h"
+#include "utils/bloom.h"
struct cookie_internal_data {
char *name; /**< Cookie name */
@@ -327,6 +328,8 @@ static int loaded_cookie_file_version;
#define MIN_URL_FILE_VERSION 106
#define URL_FILE_VERSION 106
+static struct bloom_filter *url_bloom;
+
/**
* Import an URL database from file, replacing any existing database
*
@@ -346,7 +349,10 @@ void urldb_load(const char *filename)
assert(filename);
- LOG(("Loading URL file"));
+ LOG(("Loading URL file %s", filename));
+
+ if (url_bloom == NULL)
+ url_bloom = bloom_create(16384);
fp = fopen(filename, "r");
if (!fp) {
@@ -446,6 +452,11 @@ void urldb_load(const char *filename)
(port ? ports : ""),
s);
+ if (url_bloom != NULL)
+ bloom_insert_str(url_bloom,
+ url,
+ strlen(url));
+
/* TODO: store URLs in pre-parsed state, and make
* a nsurl_load to generate the nsurl more
* swiftly.
@@ -782,6 +793,13 @@ bool urldb_add_url(nsurl *url)
unsigned int port_int;
assert(url);
+
+ if (url_bloom == NULL)
+ url_bloom = bloom_create(16384);
+
+ if (url_bloom != NULL)
+ bloom_insert_str(url_bloom, nsurl_access(url),
+ nsurl_length(url));
/* Copy and merge path/query strings */
if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &path_query, &len) !=
@@ -1857,6 +1875,14 @@ struct path_data *urldb_find_url(nsurl *url)
bool match;
assert(url);
+
+ if (url_bloom != NULL) {
+ if (bloom_search_str(url_bloom,
+ nsurl_access(url),
+ nsurl_length(url)) == false) {
+ return NULL;
+ }
+ }
scheme = nsurl_get_component(url, NSURL_SCHEME);
if (scheme == NULL)
@@ -3951,6 +3977,10 @@ void urldb_destroy(void)
b = a->next;
urldb_destroy_host_tree(a);
}
+
+ /* And the bloom filter */
+ if (url_bloom != NULL)
+ bloom_destroy(url_bloom);
}
/**
diff --git a/utils/Makefile b/utils/Makefile
index ed34e9557..071e4fec1 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -2,6 +2,6 @@
S_UTILS := base64.c corestrings.c filename.c filepath.c hashtable.c \
libdom.c locale.c log.c messages.c nsurl.c talloc.c url.c \
- utf8.c utils.c useragent.c
+ utf8.c utils.c useragent.c bloom.c
-S_UTILS := $(addprefix utils/,$(S_UTILS)) \ No newline at end of file
+S_UTILS := $(addprefix utils/,$(S_UTILS))