From 94b5c956766a3f1ee916f2bb946c3d9273c90ae1 Mon Sep 17 00:00:00 2001 From: Vincent Sanders Date: Fri, 10 Jul 2015 10:48:26 +0100 Subject: Add unit test for bloom filter Adds check based unit test for teh bloom filter implementation. This is based on Roberts original test code in utils/bloom.c and uses /usr/share/dict as a source of strings to check bloom creation, no false negatives and the false positive rate is below 15%. --- test/Makefile | 5 +- test/bloom.c | 195 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ utils/bloom.c | 61 ++---------------- 3 files changed, 202 insertions(+), 59 deletions(-) create mode 100644 test/bloom.c diff --git a/test/Makefile b/test/Makefile index 9db5030b5..06f6943cc 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,7 +1,7 @@ # # NetSurf unit tests -TESTS := nsurl urldbtest nsoption #llcache +TESTS := nsurl urldbtest nsoption bloom #llcache # nsurl sources and flags nsurl_SRCS := utils/corestrings.c utils/nsurl.c utils/idna.c \ @@ -28,7 +28,8 @@ llcache_SRCS := content/fetch.c content/fetchers/curl.c \ nsoption_SRCS := utils/nsoption.c \ test/log.c test/nsoption.c - +bloom_SRCS := utils/bloom.c \ + test/bloom.c # Coverage builds need additional flags ifeq ($(MAKECMDGOALS),coverage) diff --git a/test/bloom.c b/test/bloom.c new file mode 100644 index 000000000..dd3220eac --- /dev/null +++ b/test/bloom.c @@ -0,0 +1,195 @@ +/* + * Copyright 2015 Vincent Sanders + * Copyright 2013 Rob Kendrick + * + * This file is part of NetSurf, http://www.netsurf-browser.org/ + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/** + * \file + * Test bloom filter operations. + * + * Implementation taken from original test rig in bloom filter code + */ + +#include +#include +#include +#include +#include + +#include "utils/bloom.h" + +#define BLOOM_SIZE 8192 +#define FALSE_POSITIVE_RATE 15 /* acceptable false positive percentage rate */ + +static struct bloom_filter *dict_bloom; + +/* Fixtures */ + + +/** + * create dictionary bloom + * + * bloom constructed from the first BLOOM_SIZE entries of the + * dictionary + */ +static void dict_bloom_create(void) +{ + FILE *dictf; + char buf[BUFSIZ]; + int i; + + dictf = fopen("/usr/share/dict/words", "r"); + ck_assert(dictf != NULL); + + dict_bloom = bloom_create(BLOOM_SIZE); + ck_assert(dict_bloom != NULL); + + for (i = 0; i < BLOOM_SIZE; i++) { + fscanf(dictf, "%s", buf); + bloom_insert_str(dict_bloom, buf, strlen(buf)); + } + + fclose(dictf); +} + +static void dict_bloom_teardown(void) +{ + bloom_destroy(dict_bloom); +} + +/* Tests */ + +START_TEST(bloom_create_test) +{ + struct bloom_filter *b; + b = bloom_create(BLOOM_SIZE); + bloom_insert_str(b, "NetSurf", 7); + ck_assert(bloom_search_str(b, "NetSurf", 7)); + ck_assert(!bloom_search_str(b, "NotSurf", 7)); + bloom_destroy(b); +} +END_TEST + +START_TEST(bloom_match_test) +{ + FILE *dictf; + char buf[BUFSIZ]; + int i; + + dictf = fopen("/usr/share/dict/words", "r"); + ck_assert(dictf != NULL); + + for (i = 0; i < BLOOM_SIZE; i++) { + fscanf(dictf, "%s", buf); + ck_assert(bloom_search_str(dict_bloom, buf, strlen(buf))); + } + fclose(dictf); +} +END_TEST + +START_TEST(bloom_falsepositive_test) +{ + FILE *dictf; + char buf[BUFSIZ]; + int i; + int false_positives = 0; + + dictf = fopen("/usr/share/dict/words", "r"); + ck_assert(dictf != NULL); + + /* skip elements known presnent */ + for (i = 0; i < BLOOM_SIZE; i++) { + fscanf(dictf, "%s", buf); + } + + /* false positives are possible we are checking for low rate */ + for (i = 0; i < BLOOM_SIZE; i++) { + fscanf(dictf, "%s", buf); + if (bloom_search_str(dict_bloom, buf, strlen(buf)) == true) + false_positives++; + } + fclose(dictf); + + printf("false positive rate %d%%/%d%%\n", + (false_positives * 100)/BLOOM_SIZE, + FALSE_POSITIVE_RATE); + ck_assert(false_positives < ((BLOOM_SIZE * FALSE_POSITIVE_RATE) / 100)); +} +END_TEST + + +/* Suite */ + +Suite *bloom_suite(void) +{ + Suite *s; + TCase *tc_create; + TCase *tc_match; + TCase *tc_falsepositive; + + s = suite_create("Bloom filter"); + + /* Basic API creation */ + tc_create = tcase_create("Creation"); + + tcase_add_test(tc_create, bloom_create_test); + + suite_add_tcase(s, tc_create); + + + /* Matching entry tests */ + tc_match = tcase_create("Match"); + + tcase_add_checked_fixture(tc_match, + dict_bloom_create, + dict_bloom_teardown); + + tcase_add_test(tc_match, bloom_match_test); + + suite_add_tcase(s, tc_match); + + + /* Not matching tests */ + tc_falsepositive = tcase_create("False positive rate"); + + tcase_add_checked_fixture(tc_falsepositive, + dict_bloom_create, + dict_bloom_teardown); + + tcase_add_test(tc_falsepositive, bloom_falsepositive_test); + + suite_add_tcase(s, tc_falsepositive); + + return s; +} + +int main(int argc, char **argv) +{ + int number_failed; + Suite *s; + SRunner *sr; + + s = bloom_suite(); + + sr = srunner_create(s); + srunner_run_all(sr, CK_ENV); + + number_failed = srunner_ntests_failed(sr); + srunner_free(sr); + + return (number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/utils/bloom.c b/utils/bloom.c index df9e76e1c..e51ee63fe 100644 --- a/utils/bloom.c +++ b/utils/bloom.c @@ -16,8 +16,10 @@ * along with this program. If not, see . */ -/** \file - * Trivial bloom filter */ +/** + * \file + * Trivial bloom filter + */ #include #include "utils/bloom.h" @@ -107,58 +109,3 @@ uint32_t bloom_items(struct bloom_filter *b) return b->items; } -#ifdef TEST_RIG - -#include -#include -#include - -int main(int argc, char *arg[]) -{ - struct bloom_filter *b = bloom_create(8192); - FILE *dict = fopen("/usr/share/dict/words", "r"); - char buf[BUFSIZ]; - int false_positives = 0, total = 0; - - for (int i = 0; i < 8192; i++) { - fscanf(dict, "%s", buf); - printf("adding %s\n", buf); - bloom_insert_str(b, buf, strlen(buf)); - } - - printf("adding NetSurf\n"); - - bloom_insert_str(b, "NetSurf", 7); - printf("checking NetSurf (should be true)\n"); - assert(bloom_search_str(b, "NetSurf", 7)); - - fseek(dict, 0, SEEK_SET); - - for (int i = 0; i < 8192; i++) { - fscanf(dict, "%s", buf); - printf("checking %s (should be true)\n", buf); - assert(bloom_search_str(b, buf, strlen(buf))); - - total++; - } - - for (int i = 0; i < 8192; i++) { - fscanf(dict, "%s", buf); - printf("checking %s (should be false)\n", buf); - if (bloom_search_str(b, buf, strlen(buf)) == true) - false_positives++; - total++; - } - - printf("false positives: %d of %d, %f%%\n", - false_positives, total, - ((float)false_positives / total) * 100); - - fclose(dict); - bloom_destroy(b); - - return 0; -} - -#endif /* TEST_RIG */ - -- cgit v1.2.3