summaryrefslogtreecommitdiff
path: root/content/url_store.c
diff options
context:
space:
mode:
authorRichard Wilson <rjw@netsurf-browser.org>2005-04-27 00:01:17 +0000
committerRichard Wilson <rjw@netsurf-browser.org>2005-04-27 00:01:17 +0000
commit8a85a5796507f300df9e32fa8c934fd3520a19a4 (patch)
tree05b7537659e9ff20d209b71cec72b346df039dab /content/url_store.c
parent808298b2428deeeee8b09aeaa390887330c9abe6 (diff)
downloadnetsurf-8a85a5796507f300df9e32fa8c934fd3520a19a4.tar.gz
netsurf-8a85a5796507f300df9e32fa8c934fd3520a19a4.tar.bz2
[project @ 2005-04-27 00:01:17 by rjw]
Improve the speed of URL matching svn path=/import/netsurf/; revision=1686
Diffstat (limited to 'content/url_store.c')
-rw-r--r--content/url_store.c65
1 files changed, 39 insertions, 26 deletions
diff --git a/content/url_store.c b/content/url_store.c
index ae6501c0d..3c2f6022f 100644
--- a/content/url_store.c
+++ b/content/url_store.c
@@ -35,6 +35,7 @@ static struct hostname_data *url_store_hostnames = NULL;
static struct hostname_data *url_store_find_hostname(const char *url);
static struct hostname_data *url_store_match_hostname(const char *url,
struct hostname_data *previous);
+static char *url_store_match_scheme = NULL;
/**
@@ -261,7 +262,8 @@ static struct hostname_data *url_store_match_hostname(const char *url,
break;
}
/* special case: if hostname is not www then try it */
- if (www_test && ((current->hostname_length - 4) >= hostname_length) &&
+ if (www_test && ((current->hostname_length - 4) >=
+ hostname_length) &&
(!strncmp(current->hostname, "www.", 4)) &&
(!strncmp(hostname, current->hostname + 4,
hostname_length))) {
@@ -290,7 +292,6 @@ static struct hostname_data *url_store_match_hostname(const char *url,
char *url_store_match(const char *url, struct url_data **reference) {
struct hostname_data *hostname;
struct url_data *search = NULL;
- char *scheme;
int scheme_length;
int url_length;
url_func_result res;
@@ -300,23 +301,27 @@ char *url_store_match(const char *url, struct url_data **reference) {
if (!url_store_hostnames)
return NULL;
-
- /* find the first URL, not necessarily matching */
+
+ /* find the scheme and first URL, not necessarily matching */
if (!*reference) {
hostname = url_store_match_hostname(url, NULL);
if (!hostname)
return NULL;
+ if (url_store_match_scheme) {
+ free(url_store_match_scheme);
+ url_store_match_scheme = NULL;
+ }
+ res = url_scheme(url, &url_store_match_scheme);
+ if (res != URL_FUNC_OK)
+ return NULL;
} else {
search = *reference;
hostname = search->parent;
}
- res = url_scheme(url, &scheme);
- if (res != URL_FUNC_OK)
- return NULL;
- scheme_length = strlen(scheme);
+ scheme_length = strlen(url_store_match_scheme);
url_length = strlen(url);
- www_test = (!strcmp(scheme, "http") &&
+ www_test = (!strcmp(url_store_match_scheme, "http") &&
strncmp(url + 4 + 3, "www.", 4)); /* 'http' + '://' */
/* work through all our strings, ignoring the scheme and 'www.' */
@@ -333,29 +338,31 @@ char *url_store_match(const char *url, struct url_data **reference) {
hostname = url_store_match_hostname(url, hostname);
if (!hostname)
return NULL;
- } else if ((search->data.visits > 0) && (search->data.requests > 0)){
+ } else if ((search->data.visits > 0) &&
+ (search->data.requests > 0)){
/* straight match */
if ((search->data.url_length >= url_length) &&
- (!strncmp(search->data.url, url, url_length))) {
- free(scheme);
+ (!strncmp(search->data.url, url,
+ url_length))) {
*reference = search;
return search->data.url;
}
/* try with 'www.' inserted after the scheme */
- if (www_test && ((search->data.url_length - 4) >= url_length) &&
- (!strncmp(search->data.url, scheme, scheme_length)) &&
- (!strncmp(search->data.url + scheme_length + 3, "www.", 4)) &&
+ if (www_test && ((search->data.url_length - 4) >=
+ url_length) &&
+ (!strncmp(search->data.url,
+ url_store_match_scheme,
+ scheme_length)) &&
+ (!strncmp(search->data.url + scheme_length + 3,
+ "www.", 4)) &&
(!strncmp(search->data.url + scheme_length + 7,
url + scheme_length + 3,
url_length - scheme_length - 3))) {
- free(scheme);
*reference = search;
return search->data.url;
}
}
}
-
- free(scheme);
return NULL;
}
@@ -427,8 +434,8 @@ void url_store_load(const char *file) {
url->requests = atoi(s);
}
} else if (version == 101) {
- /* version 101 is as 100, but in hostname chunks, pre-sorted in reverse
- * alphabetical order */
+ /* version 101 is as 100, but in hostname chunks, pre-sorted
+ * in reverse alphabetical order */
while (fgets(s, MAXIMUM_URL_LENGTH, fp)) {
if (s[strlen(s) - 1] == '\n')
s[strlen(s) - 1] = '\0';
@@ -447,7 +454,8 @@ void url_store_load(const char *file) {
if (!result)
break;
result->data.url_length = strlen(s);
- result->data.url = malloc(result->data.url_length + 1);
+ result->data.url = malloc(result->
+ data.url_length + 1);
if (!result->data.url) {
free(result);
break;
@@ -491,12 +499,14 @@ void url_store_save(const char *file) {
/* file format version number */
fprintf(fp, "101\n");
- for (search = url_store_hostnames; search && search->next; search = search->next);
+ for (search = url_store_hostnames; search && search->next;
+ search = search->next);
for (; search; search = search->previous) {
url_count = 0;
for (url = search->url; url; url = url->next)
if ((url->data.requests > 0) &&
- (strlen(url->data.url) < MAXIMUM_URL_LENGTH))
+ (strlen(url->data.url) <
+ MAXIMUM_URL_LENGTH))
url_count++;
free(normal);
normal = url_store_match_string(search->hostname);
@@ -505,9 +515,12 @@ void url_store_save(const char *file) {
for (url = search->url; url->next; url = url->next);
for (; url; url = url->previous)
if ((url->data.requests > 0) &&
- (strlen(url->data.url) < MAXIMUM_URL_LENGTH))
- fprintf(fp, "%s\n%i\n%i\n", url->data.url,
- url->data.visits, url->data.requests);
+ (strlen(url->data.url) <
+ MAXIMUM_URL_LENGTH))
+ fprintf(fp, "%s\n%i\n%i\n",
+ url->data.url,
+ url->data.visits,
+ url->data.requests);
}
}
fclose(fp);