summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Kendrick (humdrum) <rob.kendrick@codethink.co.uk>2013-05-17 12:12:55 +0100
committerRob Kendrick (humdrum) <rob.kendrick@codethink.co.uk>2013-05-17 12:12:55 +0100
commitc204e5ff0cbdb71eb8fe05c31252dc1d0674d300 (patch)
tree3b01f3c43ab31d701a73a1365f987e051e259b05
parent60e8f11850f338aef0742578f1e97f0db9c0548a (diff)
downloadnetsurf-c204e5ff0cbdb71eb8fe05c31252dc1d0674d300.tar.gz
netsurf-c204e5ff0cbdb71eb8fe05c31252dc1d0674d300.tar.bz2
Add explanitory comment and make bloom size be a #define
-rw-r--r--content/urldb.c13
1 files changed, 11 insertions, 2 deletions
diff --git a/content/urldb.c b/content/urldb.c
index 92065d0f3..c52dc5b76 100644
--- a/content/urldb.c
+++ b/content/urldb.c
@@ -328,7 +328,16 @@ static int loaded_cookie_file_version;
#define MIN_URL_FILE_VERSION 106
#define URL_FILE_VERSION 106
+/* Bloom filter used for short-circuting the false case of "is this
+ * URL in the database?". BLOOM_SIZE controls how large the filter is
+ * in bytes. Primitive experimentation shows that for a filter of X
+ * bytes filled with X items, searching for X items not in the filter
+ * has a 5% false-positive rate. We set it to 32kB, which should be
+ * enough for all but the largest databases, while not being shockingly
+ * wasteful on memory.
+ */
static struct bloom_filter *url_bloom;
+#define BLOOM_SIZE (1024 * 32)
/**
* Import an URL database from file, replacing any existing database
@@ -352,7 +361,7 @@ void urldb_load(const char *filename)
LOG(("Loading URL file %s", filename));
if (url_bloom == NULL)
- url_bloom = bloom_create(16384);
+ url_bloom = bloom_create(BLOOM_SIZE);
fp = fopen(filename, "r");
if (!fp) {
@@ -795,7 +804,7 @@ bool urldb_add_url(nsurl *url)
assert(url);
if (url_bloom == NULL)
- url_bloom = bloom_create(16384);
+ url_bloom = bloom_create(BLOOM_SIZE);
if (url_bloom != NULL)
bloom_insert_str(url_bloom, nsurl_access(url),