summaryrefslogtreecommitdiff
path: root/content/urldb.c
diff options
context:
space:
mode:
Diffstat (limited to 'content/urldb.c')
-rw-r--r--content/urldb.c1443
1 files changed, 891 insertions, 552 deletions
diff --git a/content/urldb.c b/content/urldb.c
index b6eaf630b..4bdb10e66 100644
--- a/content/urldb.c
+++ b/content/urldb.c
@@ -32,14 +32,14 @@
*
* This provides something looking like:
*
- * root (a sentinel)
- * |
- * -------------------------------------------------
- * | | | | | | |
+ * root (a sentinel)
+ * |
+ * -------------------------------------------------
+ * | | | | | | |
* com edu gov 127.0.0.1 net org uk TLDs
- * | | | | | |
+ * | | | | | |
* google ... ... ... ... co 2LDs
- * | |
+ * | |
* www bbc Hosts/Subdomains
* |
* www ...
@@ -62,11 +62,11 @@
*
* (sentinel)
* |
- * path
- * |
- * to
- * |
- * resource.html
+ * path
+ * |
+ * to
+ * |
+ * resource.html
*
* This represents the absolute path "/path/to/resource.html". The leaf node
* "resource.html" contains the last visited time of the resource.
@@ -108,6 +108,7 @@
#include "utils/time.h"
#include "utils/nsurl.h"
#include "utils/ascii.h"
+#include "utils/http.h"
#include "netsurf/bitmap.h"
#include "desktop/cookie_manager.h"
#include "desktop/gui_internal.h"
@@ -118,7 +119,7 @@
/**
* cookie entry.
*
- * \warn This *must* be kept in sync with the public interface in
+ * \warning This *must* be kept in sync with the public interface in
* netsurf/cookie_db.h
*/
struct cookie_internal_data {
@@ -143,28 +144,43 @@ struct cookie_internal_data {
};
-/* A protection space is defined as a tuple canonical_root_url and realm.
- * This structure lives as linked list element in a leaf host_part struct
- * so we need additional scheme and port to have a canonical_root_url. */
+
+/**
+ * A protection space
+ *
+ * This is defined as a tuple canonical_root_url and realm. This
+ * structure lives as linked list element in a leaf host_part struct
+ * so we need additional scheme and port to have a canonical_root_url.
+ */
struct prot_space_data {
- lwc_string *scheme; /**< URL scheme of canonical hostname of this
- * protection space. */
- unsigned int port; /**< Port number of canonical hostname of this
- * protection space. When 0, it means the
- * default port for given scheme, i.e. 80
- * (http), 443 (https). */
- char *realm; /**< Protection realm */
-
- char *auth; /**< Authentication details for this
- * protection space in form
- * username:password */
- struct prot_space_data *next; /**< Next sibling */
-};
+ /**
+ * URL scheme of canonical hostname of this protection space.
+ */
+ lwc_string *scheme;
+ /**
+ * Port number of canonical hostname of this protection
+ * space. When 0, it means the default port for given scheme,
+ * i.e. 80 (http), 443 (https).
+ */
+ unsigned int port;
+ /** Protection realm */
+ char *realm;
-struct cache_internal_data {
- char filename[12]; /**< Cached filename, or first byte 0 for none */
+ /**
+ * Authentication details for this protection space in form
+ * username:password
+ */
+ char *auth;
+ /** Next sibling */
+ struct prot_space_data *next;
};
+
+/**
+ * meta data about a url
+ *
+ * \warning must be kept in sync with url_data structure in netsurf/url_db.h
+ */
struct url_internal_data {
char *title; /**< Resource title */
unsigned int visits; /**< Visit count */
@@ -172,6 +188,10 @@ struct url_internal_data {
content_type type; /**< Type of resource */
};
+
+/**
+ * data entry for url
+ */
struct path_data {
nsurl *url; /**< Full URL */
lwc_string *scheme; /**< URL scheme for data */
@@ -183,45 +203,66 @@ struct path_data {
char **fragment; /**< Array of fragments */
bool persistent; /**< This entry should persist */
- struct bitmap *thumb; /**< Thumbnail image of resource */
struct url_internal_data urld; /**< URL data for resource */
- struct cache_internal_data cache; /**< Cache data for resource */
- const struct prot_space_data *prot_space; /**< Protection space
- * to which this resource belongs too. Can be
- * NULL when it does not belong to a protection
- * space or when it is not known. No
- * ownership (is with struct host_part::prot_space). */
- struct cookie_internal_data *cookies; /**< Cookies associated with resource */
- struct cookie_internal_data *cookies_end; /**< Last cookie in list */
+
+ /**
+ * Protection space to which this resource belongs too. Can be
+ * NULL when it does not belong to a protection space or when
+ * it is not known. No ownership (is with struct host_part::prot_space).
+ */
+ const struct prot_space_data *prot_space;
+ /** Cookies associated with resource */
+ struct cookie_internal_data *cookies;
+ /** Last cookie in list */
+ struct cookie_internal_data *cookies_end;
struct path_data *next; /**< Next sibling */
struct path_data *prev; /**< Previous sibling */
- struct path_data *parent; /**< Parent path segment */
- struct path_data *children; /**< Child path segments */
- struct path_data *last; /**< Last child */
+ struct path_data *parent; /**< Parent path segment */
+ struct path_data *children; /**< Child path segments */
+ struct path_data *last; /**< Last child */
+};
+
+struct hsts_data {
+ time_t expires; /**< Expiry time */
+ bool include_sub_domains; /**< Whether to include subdomains */
};
struct host_part {
- /**< Known paths on this host. This _must_ be first so that
- * struct host_part *h = (struct host_part *)mypath; works */
+ /**
+ * Known paths on this host. This _must_ be first so that
+ * struct host_part *h = (struct host_part *)mypath; works
+ */
struct path_data paths;
- bool permit_invalid_certs; /**< Allow access to SSL protected
- * resources on this host without
- * verifying certificate authenticity
- */
+ /**
+ * Allow access to SSL protected resources on this host
+ * without verifying certificate authenticity
+ */
+ bool permit_invalid_certs;
+ /* HSTS data */
+ struct hsts_data hsts;
- char *part; /**< Part of host string */
+ /**
+ * Part of host string
+ */
+ char *part;
- struct prot_space_data *prot_space; /**< Linked list of all known
- * proctection spaces known for his host and
- * all its schems and ports. */
+ /**
+ * Linked list of all known proctection spaces known for this
+ * host and all its schems and ports.
+ */
+ struct prot_space_data *prot_space;
struct host_part *next; /**< Next sibling */
struct host_part *prev; /**< Previous sibling */
- struct host_part *parent; /**< Parent host part */
- struct host_part *children; /**< Child host parts */
+ struct host_part *parent; /**< Parent host part */
+ struct host_part *children; /**< Child host parts */
};
+
+/**
+ * search index node
+ */
struct search_node {
const struct host_part *data; /**< Host tree entry */
@@ -247,26 +288,56 @@ static struct search_node *search_trees[NUM_SEARCH_TREES] = {
&empty, &empty, &empty, &empty
};
+/** Minimum cookie database file version */
#define MIN_COOKIE_FILE_VERSION 100
+/** Current cookie database file version */
#define COOKIE_FILE_VERSION 102
+/** loaded cookie file version */
static int loaded_cookie_file_version;
-#define MIN_URL_FILE_VERSION 106
-/** URL database file version */
-#define URL_FILE_VERSION 106
+/** Minimum URL database file version */
+#define MIN_URL_FILE_VERSION 106
+/** Current URL database file version */
+#define URL_FILE_VERSION 107
-/* Bloom filter used for short-circuting the false case of "is this
+/**
+ * filter for url presence in database
+ *
+ * Bloom filter used for short-circuting the false case of "is this
* URL in the database?". BLOOM_SIZE controls how large the filter is
* in bytes. Primitive experimentation shows that for a filter of X
* bytes filled with X items, searching for X items not in the filter
* has a 5% false-positive rate. We set it to 32kB, which should be
- * enough for all but the largest databases, while not being shockingly
- * wasteful on memory.
+ * enough for all but the largest databases, while not being
+ * shockingly wasteful on memory.
*/
static struct bloom_filter *url_bloom;
+/**
+ * Size of url filter
+ */
#define BLOOM_SIZE (1024 * 32)
+/**
+ * write a time_t to a file portably
+ *
+ * \param fp File to write to
+ * \param val the unix time value to output
+ * \return NSERROR_OK on success
+ */
+static nserror urldb_write_timet(FILE *fp, time_t val)
+{
+ int use;
+ char op[32];
+
+ use = nsc_sntimet(op, 32, &val);
+ if (use == 0) {
+ fprintf(fp, "%i\n", (int)val);
+ } else {
+ fprintf(fp, "%.*s\n", use, op);
+ }
+ return NSERROR_OK;
+}
/**
* Write paths associated with a host
@@ -279,9 +350,14 @@ static struct bloom_filter *url_bloom;
* \param path_used Used size of path
* \param expiry Expiry time of URLs
*/
-static void urldb_write_paths(const struct path_data *parent, const char *host,
- FILE *fp, char **path, int *path_alloc, int *path_used,
- time_t expiry)
+static void
+urldb_write_paths(const struct path_data *parent,
+ const char *host,
+ FILE *fp,
+ char **path,
+ int *path_alloc,
+ int *path_used,
+ time_t expiry)
{
const struct path_data *p = parent;
int i;
@@ -291,16 +367,19 @@ static void urldb_write_paths(const struct path_data *parent, const char *host,
int len = *path_used + seglen + 1;
if (*path_alloc < len) {
- char *temp = realloc(*path,
- (len > 64) ? len : *path_alloc + 64);
- if (!temp)
+ char *temp;
+ temp = realloc(*path,
+ (len > 64) ? len : *path_alloc + 64);
+ if (!temp) {
return;
+ }
*path = temp;
*path_alloc = (len > 64) ? len : *path_alloc + 64;
}
- if (p->segment != NULL)
+ if (p->segment != NULL) {
memcpy(*path + *path_used - 1, p->segment, seglen);
+ }
if (p->children != NULL) {
(*path)[*path_used + seglen - 1] = '/';
@@ -317,22 +396,29 @@ static void urldb_write_paths(const struct path_data *parent, const char *host,
p = p->children;
} else {
/* leaf node */
- if (p->persistent ||((p->urld.last_visit > expiry) &&
- (p->urld.visits > 0))) {
+ if (p->persistent ||
+ ((p->urld.last_visit > expiry) &&
+ (p->urld.visits > 0))) {
fprintf(fp, "%s\n", lwc_string_data(p->scheme));
- if (p->port)
+ if (p->port) {
fprintf(fp,"%d\n", p->port);
- else
+ } else {
fprintf(fp, "\n");
+ }
fprintf(fp, "%s\n", *path);
/** \todo handle fragments? */
- fprintf(fp, "%i\n%i\n%i\n", p->urld.visits,
- (int)p->urld.last_visit,
- (int)p->urld.type);
+ /* number of visits */
+ fprintf(fp, "%i\n", p->urld.visits);
+
+ /* time entry was last used */
+ urldb_write_timet(fp, p->urld.last_visit);
+
+ /* entry type */
+ fprintf(fp, "%i\n", (int)p->urld.type);
fprintf(fp, "\n");
@@ -346,8 +432,9 @@ static void urldb_write_paths(const struct path_data *parent, const char *host,
i--)
s[i] = '\0';
fprintf(fp, "%s\n", p->urld.title);
- } else
+ } else {
fprintf(fp, "\n");
+ }
}
/* Now, find next node to process. */
@@ -384,8 +471,10 @@ static void urldb_write_paths(const struct path_data *parent, const char *host,
* \param expiry Expiry time for URLs
* \param count Pointer to count
*/
-static void urldb_count_urls(const struct path_data *root, time_t expiry,
- unsigned int *count)
+static void
+urldb_count_urls(const struct path_data *root,
+ time_t expiry,
+ unsigned int *count)
{
const struct path_data *p = root;
@@ -395,8 +484,9 @@ static void urldb_count_urls(const struct path_data *root, time_t expiry,
p = p->children;
} else {
/* No more children, increment count if required */
- if (p->persistent || ((p->urld.last_visit > expiry) &&
- (p->urld.visits > 0))) {
+ if (p->persistent ||
+ ((p->urld.last_visit > expiry) &&
+ (p->urld.visits > 0))) {
(*count)++;
}
@@ -429,7 +519,8 @@ static void urldb_save_search_tree(struct search_node *parent, FILE *fp)
unsigned int path_count = 0;
char *path, *p, *end;
int path_alloc = 64, path_used = 1;
- time_t expiry;
+ time_t expiry, hsts_expiry = 0;
+ int hsts_include_subdomains = 0;
expiry = time(NULL) - ((60 * 60 * 24) * nsoption_int(expire_url));
@@ -445,9 +536,9 @@ static void urldb_save_search_tree(struct search_node *parent, FILE *fp)
path[0] = '\0';
for (h = parent->data, p = host, end = host + sizeof host;
- h && h != &db_root && p < end; h = h->parent) {
+ h && h != &db_root && p < end; h = h->parent) {
int written = snprintf(p, end - p, "%s%s", h->part,
- (h->parent && h->parent->parent) ? "." : "");
+ (h->parent && h->parent->parent) ? "." : "");
if (written < 0) {
free(path);
return;
@@ -455,13 +546,25 @@ static void urldb_save_search_tree(struct search_node *parent, FILE *fp)
p += written;
}
+ h = parent->data;
+ if (h && h->hsts.expires > expiry) {
+ hsts_expiry = h->hsts.expires;
+ hsts_include_subdomains = h->hsts.include_sub_domains;
+ }
+
urldb_count_urls(&parent->data->paths, expiry, &path_count);
if (path_count > 0) {
- fprintf(fp, "%s\n%i\n", host, path_count);
+ fprintf(fp, "%s %i ", host, hsts_include_subdomains);
+ urldb_write_timet(fp, hsts_expiry);
+ fprintf(fp, "%i\n", path_count);
urldb_write_paths(&parent->data->paths, host, fp,
- &path, &path_alloc, &path_used, expiry);
+ &path, &path_alloc, &path_used, expiry);
+ } else if (hsts_expiry) {
+ fprintf(fp, "%s %i ", host, hsts_include_subdomains);
+ urldb_write_timet(fp, hsts_expiry);
+ fprintf(fp, "0\n");
}
free(path);
@@ -478,7 +581,8 @@ static void urldb_save_search_tree(struct search_node *parent, FILE *fp)
* \param cookie_callback Callback function
* \return true to continue, false otherwise
*/
-static bool urldb_iterate_entries_path(const struct path_data *parent,
+static bool
+urldb_iterate_entries_path(const struct path_data *parent,
bool (*url_callback)(nsurl *url, const struct url_data *data),
bool (*cookie_callback)(const struct cookie_data *data))
{
@@ -503,7 +607,7 @@ static bool urldb_iterate_entries_path(const struct path_data *parent,
assert(p->url);
if (!url_callback(p->url,
- (const struct url_data *) u))
+ (const struct url_data *) u))
return false;
} else {
c = (const struct cookie_data *)p->cookies;
@@ -550,8 +654,10 @@ static bool urldb__host_is_ip_address(const char *host)
#ifndef NO_IPV6
struct in6_addr ipv6;
char ipv6_addr[64];
+ unsigned int ipv6_addr_len;
#endif
- /** @todo FIXME Some parts of urldb.c make confusions between hosts
+ /**
+ * @todo FIXME Some parts of urldb.c make confusions between hosts
* and "prefixes", we can sometimes be erroneously passed more than
* just a host. Sometimes we may be passed trailing slashes, or even
* whole path segments. A specific criminal in this class is
@@ -575,8 +681,9 @@ static bool urldb__host_is_ip_address(const char *host)
char *c = strdup(host);
c[slash - host] = '\0';
sane_host = c;
- host_len = slash - host - 1;
- LOG("WARNING: called with non-host '%s'", host);
+ host_len = slash - host;
+ NSLOG(netsurf, INFO, "WARNING: called with non-host '%s'",
+ host);
}
if (strspn(sane_host, "0123456789abcdefABCDEF[].:") < host_len)
@@ -603,11 +710,18 @@ static bool urldb__host_is_ip_address(const char *host)
}
#ifndef NO_IPV6
- if (sane_host[0] != '[' || sane_host[host_len] != ']')
+ if ((host_len < 6) ||
+ (sane_host[0] != '[') ||
+ (sane_host[host_len - 1] != ']')) {
goto out_false;
+ }
- strncpy(ipv6_addr, sane_host + 1, sizeof(ipv6_addr));
- ipv6_addr[sizeof(ipv6_addr) - 1] = '\0';
+ ipv6_addr_len = host_len - 2;
+ if (ipv6_addr_len >= sizeof(ipv6_addr)) {
+ ipv6_addr_len = sizeof(ipv6_addr) - 1;
+ }
+ strncpy(ipv6_addr, sane_host + 1, ipv6_addr_len);
+ ipv6_addr[ipv6_addr_len] = '\0';
if (inet_pton(AF_INET6, ipv6_addr, &ipv6) == 1)
goto out_true;
@@ -626,8 +740,8 @@ out_true:
/**
* Compare host_part with prefix
*
- * \param a
- * \param b
+ * \param a host part
+ * \param b prefix
* \return 0 if match, non-zero, otherwise
*/
static int urldb_search_match_prefix(const struct host_part *a, const char *b)
@@ -661,12 +775,13 @@ static int urldb_search_match_prefix(const struct host_part *a, const char *b)
/* Consider segment lengths only in the case
* where the prefix contains segments */
plen = strlen(a->part);
- if (plen > dot - b)
+ if (plen > dot - b) {
/* len(a) > len(b) */
return 1;
- else if (plen < dot - b)
+ } else if (plen < dot - b) {
/* len(a) < len(b) */
return -1;
+ }
}
b = dot + 1;
@@ -677,12 +792,13 @@ static int urldb_search_match_prefix(const struct host_part *a, const char *b)
* a) The path lengths differ
* or b) The hosts are identical
*/
- if (a && a != &db_root && b >= end)
+ if (a && a != &db_root && b >= end) {
/* len(a) > len(b) => prefix matches */
return 0;
- else if ((!a || a == &db_root) && b < end)
+ } else if ((!a || a == &db_root) && b < end) {
/* len(a) < len(b) => prefix does not match */
return -1;
+ }
/* Identical */
return 0;
@@ -711,31 +827,38 @@ urldb_iterate_partial_host(struct search_node *root,
c = urldb_search_match_prefix(root->data, prefix);
- if (c > 0)
+ if (c > 0) {
/* No match => look in left subtree */
- return urldb_iterate_partial_host(root->left, prefix,
- callback);
- else if (c < 0)
+ return urldb_iterate_partial_host(root->left,
+ prefix,
+ callback);
+ } else if (c < 0) {
/* No match => look in right subtree */
- return urldb_iterate_partial_host(root->right, prefix,
- callback);
- else {
+ return urldb_iterate_partial_host(root->right,
+ prefix,
+ callback);
+ } else {
/* Match => iterate over l/r subtrees & process this node */
- if (!urldb_iterate_partial_host(root->left, prefix,
- callback))
+ if (!urldb_iterate_partial_host(root->left,
+ prefix,
+ callback)) {
return false;
+ }
if (root->data->paths.children) {
/* and extract all paths attached to this host */
if (!urldb_iterate_entries_path(&root->data->paths,
- callback, NULL)) {
+ callback,
+ NULL)) {
return false;
}
}
- if (!urldb_iterate_partial_host(root->right, prefix,
- callback))
+ if (!urldb_iterate_partial_host(root->right,
+ prefix,
+ callback)) {
return false;
+ }
}
return true;
@@ -745,54 +868,54 @@ urldb_iterate_partial_host(struct search_node *root,
/**
* Partial path iterator (internal)
*
+ * Given: http://www.example.org/a/b/c/d//e
+ * and assuming a path tree:
+ * ^
+ * / \
+ * a1 b1
+ * / \
+ * a2 b2
+ * /|\
+ * a b c
+ * 3 3 |
+ * d
+ * |
+ * e
+ * / \
+ * f g
+ *
+ * Prefix will be: p will be:
+ *
+ * a/b/c/d//e a1
+ * b/c/d//e a2
+ * b/c/d//e b3
+ * c/d//e a3
+ * c/d//e b3
+ * c/d//e c
+ * d//e d
+ * /e e (skip /)
+ * e e
+ *
+ * I.E. perform a breadth-first search of the tree.
+ *
* \param parent Root of (sub)tree to traverse
* \param prefix Prefix to match
* \param callback Callback function
* \return true to continue, false otherwise
*/
-static bool urldb_iterate_partial_path(const struct path_data *parent,
- const char *prefix, bool (*callback)(nsurl *url,
- const struct url_data *data))
+static bool
+urldb_iterate_partial_path(const struct path_data *parent,
+ const char *prefix,
+ bool (*callback)(nsurl *url, const struct url_data *data))
{
const struct path_data *p = parent->children;
const char *slash, *end = prefix + strlen(prefix);
- /*
- * Given: http://www.example.org/a/b/c/d//e
- * and assuming a path tree:
- * .
- * / \
- * a1 b1
- * / \
- * a2 b2
- * /|\
- * a b c
- * 3 3 |
- * d
- * |
- * e
- * / \
- * f g
- *
- * Prefix will be: p will be:
- *
- * a/b/c/d//e a1
- * b/c/d//e a2
- * b/c/d//e b3
- * c/d//e a3
- * c/d//e b3
- * c/d//e c
- * d//e d
- * /e e (skip /)
- * e e
- *
- * I.E. we perform a breadth-first search of the tree.
- */
-
do {
slash = strchr(prefix, '/');
- if (!slash)
+ if (!slash) {
slash = end;
+ }
if (slash == prefix && *prefix == '/') {
/* Ignore "//" */
@@ -805,9 +928,11 @@ static bool urldb_iterate_partial_path(const struct path_data *parent,
if (slash == end) {
/* we've run out of prefix, so all
* paths below this one match */
- if (!urldb_iterate_entries_path(p, callback,
- NULL))
+ if (!urldb_iterate_entries_path(p,
+ callback,
+ NULL)) {
return false;
+ }
/* Progress to next sibling */
p = p->next;
@@ -835,30 +960,37 @@ static bool urldb_iterate_partial_path(const struct path_data *parent,
* \param cookie_callback Callback function
* \return true to continue, false otherwise
*/
-static bool urldb_iterate_entries_host(struct search_node *parent,
- bool (*url_callback)(nsurl *url,
- const struct url_data *data),
+static bool
+urldb_iterate_entries_host(struct search_node *parent,
+ bool (*url_callback)(nsurl *url, const struct url_data *data),
bool (*cookie_callback)(const struct cookie_data *data))
{
- if (parent == &empty)
+ if (parent == &empty) {
return true;
+ }
if (!urldb_iterate_entries_host(parent->left,
- url_callback, cookie_callback))
+ url_callback,
+ cookie_callback)) {
return false;
+ }
- if ((parent->data->paths.children) || ((cookie_callback) &&
- (parent->data->paths.cookies))) {
+ if ((parent->data->paths.children) ||
+ ((cookie_callback) &&
+ (parent->data->paths.cookies))) {
/* We have paths (or domain cookies), so iterate them */
if (!urldb_iterate_entries_path(&parent->data->paths,
- url_callback, cookie_callback)) {
+ url_callback,
+ cookie_callback)) {
return false;
}
}
if (!urldb_iterate_entries_host(parent->right,
- url_callback, cookie_callback))
+ url_callback,
+ cookie_callback)) {
return false;
+ }
return true;
}
@@ -871,16 +1003,17 @@ static bool urldb_iterate_entries_host(struct search_node *parent,
* \param parent Parent node to add to
* \return Pointer to added node, or NULL on memory exhaustion
*/
-static struct host_part *urldb_add_host_node(const char *part,
- struct host_part *parent)
+static struct host_part *
+urldb_add_host_node(const char *part, struct host_part *parent)
{
struct host_part *d;
assert(part && parent);
d = calloc(1, sizeof(struct host_part));
- if (!d)
+ if (!d) {
return NULL;
+ }
d->part = strdup(part);
if (!d->part) {
@@ -889,8 +1022,9 @@ static struct host_part *urldb_add_host_node(const char *part,
}
d->next = parent->children;
- if (parent->children)
+ if (parent->children) {
parent->children->prev = d;
+ }
d->parent = parent;
parent->children = d;
@@ -900,6 +1034,10 @@ static struct host_part *urldb_add_host_node(const char *part,
/**
* Fragment comparator callback for qsort
+ *
+ * \param a first value
+ * \param b second value
+ * \return 0 for equal else positive or negative value on comparison
*/
static int urldb_add_path_fragment_cmp(const void *a, const void *b)
{
@@ -928,13 +1066,13 @@ urldb_add_path_fragment(struct path_data *segment, lwc_string *fragment)
return segment;
temp = realloc(segment->fragment,
- (segment->frag_cnt + 1) * sizeof(char *));
+ (segment->frag_cnt + 1) * sizeof(char *));
if (!temp)
return NULL;
segment->fragment = temp;
segment->fragment[segment->frag_cnt] =
- strdup(lwc_string_data(fragment));
+ strdup(lwc_string_data(fragment));
if (!segment->fragment[segment->frag_cnt]) {
/* Don't free temp - it's now our buffer */
return NULL;
@@ -944,8 +1082,10 @@ urldb_add_path_fragment(struct path_data *segment, lwc_string *fragment)
/* We want fragments in alphabetical order, so sort them
* It may prove better to insert in alphabetical order instead */
- qsort(segment->fragment, segment->frag_cnt, sizeof (char *),
- urldb_add_path_fragment_cmp);
+ qsort(segment->fragment,
+ segment->frag_cnt,
+ sizeof (char *),
+ urldb_add_path_fragment_cmp);
return segment;
}
@@ -962,9 +1102,11 @@ urldb_add_path_fragment(struct path_data *segment, lwc_string *fragment)
* \return Pointer to added node, or NULL on memory exhaustion
*/
static struct path_data *
-urldb_add_path_node(lwc_string *scheme, unsigned int port,
- const char *segment, lwc_string *fragment,
- struct path_data *parent)
+urldb_add_path_node(lwc_string *scheme,
+ unsigned int port,
+ const char *segment,
+ lwc_string *fragment,
+ struct path_data *parent)
{
struct path_data *d, *e;
@@ -1025,7 +1167,7 @@ urldb_add_path_node(lwc_string *scheme, unsigned int port,
/**
* Get the search tree for a particular host
*
- * \param host the host to lookup
+ * \param host the host to lookup
* \return the corresponding search tree
*/
static struct search_node **urldb_get_search_tree_direct(const char *host)
@@ -1044,7 +1186,7 @@ static struct search_node **urldb_get_search_tree_direct(const char *host)
/**
* Get the search tree for a particular host
*
- * \param host the host to lookup
+ * \param host the host to lookup
* \return the corresponding search tree
*/
static struct search_node *urldb_get_search_tree(const char *host)
@@ -1054,10 +1196,10 @@ static struct search_node *urldb_get_search_tree(const char *host)
/**
- * Compare host_part with a string
+ * Compare host part with a string
*
- * \param a
- * \param b
+ * \param a host part
+ * \param b string to compare
* \return 0 if match, non-zero, otherwise
*/
static int urldb_search_match_string(const struct host_part *a, const char *b)
@@ -1089,12 +1231,13 @@ static int urldb_search_match_string(const struct host_part *a, const char *b)
/* The strings matched, now check that the lengths do, too */
plen = strlen(a->part);
- if (plen > dot - b)
+ if (plen > dot - b) {
/* len(a) > len(b) */
return 1;
- else if (plen < dot - b)
+ } else if (plen < dot - b) {
/* len(a) < len(b) */
return -1;
+ }
b = dot + 1;
a = a->parent;
@@ -1104,12 +1247,13 @@ static int urldb_search_match_string(const struct host_part *a, const char *b)
* a) The path lengths differ
* or b) The hosts are identical
*/
- if (a && a != &db_root && b >= end)
+ if (a && a != &db_root && b >= end) {
/* len(a) > len(b) */
return 1;
- else if ((!a || a == &db_root) && b < end)
+ } else if ((!a || a == &db_root) && b < end) {
/* len(a) < len(b) */
return -1;
+ }
/* Identical */
return 0;
@@ -1136,12 +1280,13 @@ urldb_search_find(struct search_node *root, const char *host)
c = urldb_search_match_string(root->data, host);
- if (c > 0)
+ if (c > 0) {
return urldb_search_find(root->left, host);
- else if (c < 0)
+ } else if (c < 0) {
return urldb_search_find(root->right, host);
- else
- return root->data;
+ }
+
+ return root->data;
}
@@ -1154,8 +1299,11 @@ urldb_search_find(struct search_node *root, const char *host)
* \param port The port associated with the path
* \return Pointer to path data or NULL if not found.
*/
-static struct path_data *urldb_match_path(const struct path_data *parent,
- const char *path, lwc_string *scheme, unsigned short port)
+static struct path_data *
+urldb_match_path(const struct path_data *parent,
+ const char *path,
+ lwc_string *scheme,
+ unsigned short port)
{
const struct path_data *p;
const char *slash;
@@ -1165,7 +1313,7 @@ static struct path_data *urldb_match_path(const struct path_data *parent,
assert(parent->segment == NULL);
if (path[0] != '/') {
- LOG("path is %s", path);
+ NSLOG(netsurf, INFO, "path is %s", path);
}
assert(path[0] == '/');
@@ -1175,14 +1323,14 @@ static struct path_data *urldb_match_path(const struct path_data *parent,
while (p != NULL) {
slash = strchr(path + 1, '/');
- if (!slash)
+ if (!slash) {
slash = path + strlen(path);
+ }
if (strncmp(p->segment, path + 1, slash - path - 1) == 0 &&
- lwc_string_isequal(p->scheme, scheme, &match) ==
- lwc_error_ok &&
- match == true &&
- p->port == port) {
+ lwc_string_isequal(p->scheme, scheme, &match) == lwc_error_ok &&
+ match == true &&
+ p->port == port) {
if (*slash == '\0') {
/* Complete match */
return (struct path_data *) p;
@@ -1233,7 +1381,7 @@ static struct path_data *urldb_find_url(nsurl *url)
return NULL;
if (lwc_string_isequal(scheme, corestring_lwc_mailto, &match) ==
- lwc_error_ok && match == true) {
+ lwc_error_ok && match == true) {
lwc_string_unref(scheme);
return NULL;
}
@@ -1244,7 +1392,7 @@ static struct path_data *urldb_find_url(nsurl *url)
lwc_string_unref(host);
} else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) ==
- lwc_error_ok && match == true) {
+ lwc_error_ok && match == true) {
host_str = "localhost";
} else {
@@ -1260,8 +1408,7 @@ static struct path_data *urldb_find_url(nsurl *url)
}
/* generate plq (path, leaf, query) */
- if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &plq, &len) !=
- NSERROR_OK) {
+ if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &plq, &len) != NSERROR_OK) {
lwc_string_unref(scheme);
return NULL;
}
@@ -1296,12 +1443,15 @@ static void urldb_dump_paths(struct path_data *parent)
do {
if (p->segment != NULL) {
- LOG("\t%s : %u", lwc_string_data(p->scheme), p->port);
+ NSLOG(netsurf, INFO, "\t%s : %u",
+ lwc_string_data(p->scheme), p->port);
- LOG("\t\t'%s'", p->segment);
+ NSLOG(netsurf, INFO, "\t\t'%s'", p->segment);
- for (i = 0; i != p->frag_cnt; i++)
- LOG("\t\t\t#%s", p->fragment[i]);
+ for (i = 0; i != p->frag_cnt; i++) {
+ NSLOG(netsurf, INFO, "\t\t\t#%s",
+ p->fragment[i]);
+ }
}
if (p->children != NULL) {
@@ -1330,17 +1480,19 @@ static void urldb_dump_hosts(struct host_part *parent)
struct host_part *h;
if (parent->part) {
- LOG("%s", parent->part);
+ NSLOG(netsurf, INFO, "%s", parent->part);
- LOG("\t%s invalid SSL certs", parent->permit_invalid_certs ? "Permits" : "Denies");
+ NSLOG(netsurf, INFO, "\t%s invalid SSL certs",
+ parent->permit_invalid_certs ? "Permits" : "Denies");
}
/* Dump path data */
urldb_dump_paths(&parent->paths);
/* and recurse */
- for (h = parent->children; h; h = h->next)
+ for (h = parent->children; h; h = h->next) {
urldb_dump_hosts(h);
+ }
}
@@ -1357,7 +1509,7 @@ static void urldb_dump_search(struct search_node *parent, int depth)
char s[1024];
int r;
int sl = sizeof(s) - 2;
-
+
if (parent == &empty)
return;
@@ -1383,17 +1535,17 @@ static void urldb_dump_search(struct search_node *parent, int depth)
}
s[i]= 0;
- LOG("%s", s);
+ NSLOG(netsurf, INFO, "%s", s);
urldb_dump_search(parent->right, depth + 1);
}
/**
- * Compare a pair of host_parts
+ * Compare a pair of host parts
*
- * \param a
- * \param b
+ * \param a first host part
+ * \param b second host part
* \return 0 if match, non-zero, otherwise
*/
static int
@@ -1405,21 +1557,24 @@ urldb_search_match_host(const struct host_part *a, const struct host_part *b)
/* traverse up tree to root, comparing parts as we go. */
for (; a && a != &db_root && b && b != &db_root;
- a = a->parent, b = b->parent)
- if ((ret = strcasecmp(a->part, b->part)) != 0)
+ a = a->parent, b = b->parent) {
+ if ((ret = strcasecmp(a->part, b->part)) != 0) {
/* They differ => return the difference here */
return ret;
+ }
+ }
/* If we get here then either:
* a) The path lengths differ
* or b) The hosts are identical
*/
- if (a && a != &db_root && (!b || b == &db_root))
+ if (a && a != &db_root && (!b || b == &db_root)) {
/* len(a) > len(b) */
return 1;
- else if ((!a || a == &db_root) && b && b != &db_root)
+ } else if ((!a || a == &db_root) && b && b != &db_root) {
/* len(a) < len(b) */
return -1;
+ }
/* identical */
return 0;
@@ -1434,11 +1589,11 @@ urldb_search_match_host(const struct host_part *a, const struct host_part *b)
*/
static struct search_node *urldb_search_skew(struct search_node *root)
{
- struct search_node *temp;
-
assert(root);
if (root->left->level == root->level) {
+ struct search_node *temp;
+
temp = root->left;
root->left = temp->right;
temp->right = root;
@@ -1457,11 +1612,11 @@ static struct search_node *urldb_search_skew(struct search_node *root)
*/
static struct search_node *urldb_search_split(struct search_node *root)
{
- struct search_node *temp;
-
assert(root);
if (root->right->right->level == root->level) {
+ struct search_node *temp;
+
temp = root->right;
root->right = temp->left;
temp->left = root;
@@ -1493,10 +1648,10 @@ urldb_search_insert_internal(struct search_node *root, struct search_node *n)
if (c > 0) {
root->left = urldb_search_insert_internal(
- root->left, n);
+ root->left, n);
} else if (c < 0) {
root->right = urldb_search_insert_internal(
- root->right, n);
+ root->right, n);
} else {
/* exact match */
free(n);
@@ -1548,8 +1703,11 @@ urldb_search_insert(struct search_node *root, const struct host_part *data)
* \param was_quoted Whether \a v was quoted in the input
* \return true on success, false on memory exhaustion
*/
-static bool urldb_parse_avpair(struct cookie_internal_data *c, char *n,
- char *v, bool was_quoted)
+static bool
+urldb_parse_avpair(struct cookie_internal_data *c,
+ char *n,
+ char *v,
+ bool was_quoted)
{
int vlen;
@@ -1623,7 +1781,7 @@ static bool urldb_parse_avpair(struct cookie_internal_data *c, char *n,
/* do nothing */
}
- res = nsc_strntimet(datenoday, strlen(datenoday), &expires);
+ res = nsc_strntimet(datenoday, strlen(datenoday), &expires);
if (res != NSERROR_OK) {
/* assume we have an unrepresentable date =>
* force it to the maximum possible value of a
@@ -1640,8 +1798,9 @@ static bool urldb_parse_avpair(struct cookie_internal_data *c, char *n,
c->name = strdup(n);
c->value = strdup(v);
c->value_was_quoted = was_quoted;
- if (!c->name || !c->value)
+ if (!c->name || !c->value) {
return false;
+ }
}
return true;
@@ -1911,6 +2070,180 @@ urldb_parse_cookie(nsurl *url, const char **cookie)
/**
+ * Add a path to the database, creating any intermediate entries
+ *
+ * \param scheme URL scheme associated with path
+ * \param port Port number on host associated with path
+ * \param host Host tree node to attach to
+ * \param path_query Absolute path plus query to add (freed)
+ * \param fragment URL fragment, or NULL
+ * \param url URL (fragment ignored)
+ * \return Pointer to leaf node, or NULL on memory exhaustion
+ */
+static struct path_data *
+urldb_add_path(lwc_string *scheme,
+ unsigned int port,
+ const struct host_part *host,
+ char *path_query,
+ lwc_string *fragment,
+ nsurl *url)
+{
+ struct path_data *d, *e;
+ char *buf = path_query;
+ char *segment, *slash;
+ bool match;
+
+ assert(scheme && host && url);
+
+ d = (struct path_data *) &host->paths;
+
+ /* skip leading '/' */
+ segment = buf;
+ if (*segment == '/')
+ segment++;
+
+ /* Process path segments */
+ do {
+ slash = strchr(segment, '/');
+ if (!slash) {
+ /* last segment */
+ /* look for existing entry */
+ for (e = d->children; e; e = e->next)
+ if (strcmp(segment, e->segment) == 0 &&
+ lwc_string_isequal(scheme,
+ e->scheme, &match) ==
+ lwc_error_ok &&
+ match == true &&
+ e->port == port)
+ break;
+
+ d = e ? urldb_add_path_fragment(e, fragment) :
+ urldb_add_path_node(scheme, port,
+ segment, fragment, d);
+ break;
+ }
+
+ *slash = '\0';
+
+ /* look for existing entry */
+ for (e = d->children; e; e = e->next)
+ if (strcmp(segment, e->segment) == 0 &&
+ lwc_string_isequal(scheme, e->scheme,
+ &match) == lwc_error_ok &&
+ match == true &&
+ e->port == port)
+ break;
+
+ d = e ? e : urldb_add_path_node(scheme, port, segment, NULL, d);
+ if (!d)
+ break;
+
+ segment = slash + 1;
+ } while (1);
+
+ free(path_query);
+
+ if (d && !d->url) {
+ /* Insert defragmented URL */
+ if (nsurl_defragment(url, &d->url) != NSERROR_OK)
+ return NULL;
+ }
+
+ return d;
+}
+
+
+/**
+ * Add a host to the database, creating any intermediate entries
+ *
+ * \param host Hostname to add
+ * \return Pointer to leaf node, or NULL on memory exhaustion
+ */
+static struct host_part *urldb_add_host(const char *host)
+{
+ struct host_part *d = (struct host_part *) &db_root, *e;
+ struct search_node *s;
+ char buf[256]; /* 256 bytes is sufficient - domain names are
+ * limited to 255 chars. */
+ char *part;
+
+ assert(host);
+
+ if (urldb__host_is_ip_address(host)) {
+ /* Host is an IP, so simply add as TLD */
+
+ /* Check for existing entry */
+ for (e = d->children; e; e = e->next)
+ if (strcasecmp(host, e->part) == 0)
+ /* found => return it */
+ return e;
+
+ d = urldb_add_host_node(host, d);
+
+ s = urldb_search_insert(search_trees[ST_IP], d);
+ if (!s) {
+ /* failed */
+ d = NULL;
+ } else {
+ search_trees[ST_IP] = s;
+ }
+
+ return d;
+ }
+
+ /* Copy host string, so we can corrupt it */
+ strncpy(buf, host, sizeof buf);
+ buf[sizeof buf - 1] = '\0';
+
+ /* Process FQDN segments backwards */
+ do {
+ part = strrchr(buf, '.');
+ if (!part) {
+ /* last segment */
+ /* Check for existing entry */
+ for (e = d->children; e; e = e->next)
+ if (strcasecmp(buf, e->part) == 0)
+ break;
+
+ if (e) {
+ d = e;
+ } else {
+ d = urldb_add_host_node(buf, d);
+ }
+
+ /* And insert into search tree */
+ if (d) {
+ struct search_node **r;
+
+ r = urldb_get_search_tree_direct(buf);
+ s = urldb_search_insert(*r, d);
+ if (!s) {
+ /* failed */
+ d = NULL;
+ } else {
+ *r = s;
+ }
+ }
+ break;
+ }
+
+ /* Check for existing entry */
+ for (e = d->children; e; e = e->next)
+ if (strcasecmp(part + 1, e->part) == 0)
+ break;
+
+ d = e ? e : urldb_add_host_node(part + 1, d);
+ if (!d)
+ break;
+
+ *part = '\0';
+ } while (1);
+
+ return d;
+}
+
+
+/**
* Insert a cookie into the database
*
* \param c The cookie to insert
@@ -1918,8 +2251,10 @@ urldb_parse_cookie(nsurl *url, const char **cookie)
* \param url URL (sans fragment) associated with cookie
* \return true on success, false on memory exhaustion (c will be freed)
*/
-static bool urldb_insert_cookie(struct cookie_internal_data *c,
- lwc_string *scheme, nsurl *url)
+static bool
+urldb_insert_cookie(struct cookie_internal_data *c,
+ lwc_string *scheme,
+ nsurl *url)
{
struct cookie_internal_data *d;
const struct host_part *h;
@@ -1947,8 +2282,8 @@ static bool urldb_insert_cookie(struct cookie_internal_data *c,
assert(scheme != NULL);
h = urldb_search_find(
- urldb_get_search_tree(c->domain),
- c->domain);
+ urldb_get_search_tree(c->domain),
+ c->domain);
if (!h) {
h = urldb_add_host(c->domain);
@@ -1960,7 +2295,7 @@ static bool urldb_insert_cookie(struct cookie_internal_data *c,
/* find path */
p = urldb_add_path(scheme, 0, h,
- strdup(c->path), NULL, url);
+ strdup(c->path), NULL, url);
if (!p) {
urldb_free_cookie(c);
return false;
@@ -1970,8 +2305,8 @@ static bool urldb_insert_cookie(struct cookie_internal_data *c,
/* add cookie */
for (d = p->cookies; d; d = d->next) {
if (!strcmp(d->domain, c->domain) &&
- !strcmp(d->path, c->path) &&
- !strcmp(d->name, c->name))
+ !strcmp(d->path, c->path) &&
+ !strcmp(d->name, c->name))
break;
}
@@ -2035,8 +2370,12 @@ static bool urldb_insert_cookie(struct cookie_internal_data *c,
* \param buf Pointer to Pointer to buffer (updated)
* \return true on success, false on memory exhaustion
*/
-static bool urldb_concat_cookie(struct cookie_internal_data *c, int version,
- int *used, int *alloc, char **buf)
+static bool
+urldb_concat_cookie(struct cookie_internal_data *c,
+ int version,
+ int *used,
+ int *alloc,
+ char **buf)
{
/* Combined (A)BNF for the Cookie: request header:
*
@@ -2100,10 +2439,10 @@ static bool urldb_concat_cookie(struct cookie_internal_data *c, int version,
* We allow for the possibility that values are quoted
*/
max_len = 2 + strlen(c->name) + 1 + strlen(c->value) + 2 +
- (c->path_from_set ?
- 8 + strlen(c->path) + 2 : 0) +
- (c->domain_from_set ?
- 10 + strlen(c->domain) + 2 : 0);
+ (c->path_from_set ?
+ 8 + strlen(c->path) + 2 : 0) +
+ (c->domain_from_set ?
+ 10 + strlen(c->domain) + 2 : 0);
if (*used + max_len >= *alloc) {
char *temp = realloc(*buf, *alloc + 4096);
@@ -2144,7 +2483,7 @@ static bool urldb_concat_cookie(struct cookie_internal_data *c, int version,
/* Value needs quoting if it contains any separator or if
* it needs preserving from the Set-Cookie header */
if (c->value_was_quoted ||
- strpbrk(c->value, separators) != NULL) {
+ strpbrk(c->value, separators) != NULL) {
sprintf(*buf + *used - 1, "\"%s\"", c->value);
*used += 1 + strlen(c->value) + 1;
} else {
@@ -2187,9 +2526,17 @@ static bool urldb_concat_cookie(struct cookie_internal_data *c, int version,
/**
* deletes paths from a cookie.
+ *
+ * \param domain the cookie domain
+ * \param path the cookie path
+ * \param name The cookie name
+ * \param parent The url data of the cookie
*/
-static void urldb_delete_cookie_paths(const char *domain, const char *path,
- const char *name, struct path_data *parent)
+static void
+urldb_delete_cookie_paths(const char *domain,
+ const char *path,
+ const char *name,
+ struct path_data *parent)
{
struct cookie_internal_data *c;
struct path_data *p = parent;
@@ -2199,17 +2546,19 @@ static void urldb_delete_cookie_paths(const char *domain, const char *path,
do {
for (c = p->cookies; c; c = c->next) {
if (strcmp(c->domain, domain) == 0 &&
- strcmp(c->path, path) == 0 &&
- strcmp(c->name, name) == 0) {
- if (c->prev)
+ strcmp(c->path, path) == 0 &&
+ strcmp(c->name, name) == 0) {
+ if (c->prev) {
c->prev->next = c->next;
- else
+ } else {
p->cookies = c->next;
+ }
- if (c->next)
+ if (c->next) {
c->next->prev = c->prev;
- else
+ } else {
p->cookies_end = c->prev;
+ }
urldb_free_cookie(c);
@@ -2235,17 +2584,26 @@ static void urldb_delete_cookie_paths(const char *domain, const char *path,
/**
* Deletes cookie hosts and their assoicated paths
+ *
+ * \param domain the cookie domain
+ * \param path the cookie path
+ * \param name The cookie name
+ * \param parent The url data of the cookie
*/
-static void urldb_delete_cookie_hosts(const char *domain, const char *path,
- const char *name, struct host_part *parent)
+static void
+urldb_delete_cookie_hosts(const char *domain,
+ const char *path,
+ const char *name,
+ struct host_part *parent)
{
struct host_part *h;
assert(parent);
urldb_delete_cookie_paths(domain, path, name, &parent->paths);
- for (h = parent->children; h; h = h->next)
+ for (h = parent->children; h; h = h->next) {
urldb_delete_cookie_hosts(domain, path, name, h);
+ }
}
@@ -2267,9 +2625,10 @@ static void urldb_save_cookie_paths(FILE *fp, struct path_data *parent)
struct cookie_internal_data *c;
for (c = p->cookies; c != NULL; c = c->next) {
- if (c->expires == -1 || c->expires < now)
+ if (c->expires == -1 || c->expires < now) {
/* Skip expired & session cookies */
continue;
+ }
fprintf(fp,
"%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t"
@@ -2282,9 +2641,9 @@ static void urldb_save_cookie_paths(FILE *fp, struct path_data *parent)
c->no_destroy, c->name, c->value,
c->value_was_quoted,
p->scheme ? lwc_string_data(p->scheme) :
- "unused",
+ "unused",
p->url ? nsurl_access(p->url) :
- "unused",
+ "unused",
c->comment ? c->comment : "");
}
}
@@ -2350,21 +2709,19 @@ static void urldb_destroy_path_node_content(struct path_data *node)
struct cookie_internal_data *a, *b;
unsigned int i;
- if (node->url != NULL)
+ if (node->url != NULL) {
nsurl_unref(node->url);
+ }
- if (node->scheme != NULL)
+ if (node->scheme != NULL) {
lwc_string_unref(node->scheme);
+ }
free(node->segment);
for (i = 0; i < node->frag_cnt; i++)
free(node->fragment[i]);
free(node->fragment);
- if (node->thumb) {
- guit->bitmap->destroy(node->thumb);
- }
-
free(node->urld.title);
for (a = node->cookies; a; a = b) {
@@ -2513,7 +2870,7 @@ void urldb_destroy(void)
}
-/* exported interface documented in content/urldb.h */
+/* exported interface documented in netsurf/url_db.h */
nserror urldb_load(const char *filename)
{
#define MAXIMUM_URL_LENGTH 4096
@@ -2528,14 +2885,15 @@ nserror urldb_load(const char *filename)
assert(filename);
- LOG("Loading URL file %s", filename);
+ NSLOG(netsurf, INFO, "Loading URL file %s", filename);
if (url_bloom == NULL)
url_bloom = bloom_create(BLOOM_SIZE);
fp = fopen(filename, "r");
if (!fp) {
- LOG("Failed to open file '%s' for reading", filename);
+ NSLOG(netsurf, INFO, "Failed to open file '%s' for reading",
+ filename);
return NSERROR_NOT_FOUND;
}
@@ -2546,17 +2904,20 @@ nserror urldb_load(const char *filename)
version = atoi(s);
if (version < MIN_URL_FILE_VERSION) {
- LOG("Unsupported URL file version.");
+ NSLOG(netsurf, INFO, "Unsupported URL file version.");
fclose(fp);
return NSERROR_INVALID;
}
if (version > URL_FILE_VERSION) {
- LOG("Unknown URL file version.");
+ NSLOG(netsurf, INFO, "Unknown URL file version.");
fclose(fp);
return NSERROR_INVALID;
}
while (fgets(host, sizeof host, fp)) {
+ time_t hsts_expiry = 0;
+ int hsts_include_sub_domains = 0;
+
/* get the hostname */
length = strlen(host) - 1;
host[length] = '\0';
@@ -2574,6 +2935,25 @@ nserror urldb_load(const char *filename)
continue;
}
+ if (version >= 107) {
+ char *p = host;
+ while (*p && *p != ' ') p++;
+ while (*p && *p == ' ') { *p = '\0'; p++; }
+ hsts_include_sub_domains = (*p == '1');
+ while (*p && *p != ' ') p++;
+ while (*p && *p == ' ') p++;
+ nsc_snptimet(p, strlen(p), &hsts_expiry);
+ }
+
+ h = urldb_add_host(host);
+ if (!h) {
+ NSLOG(netsurf, INFO, "Failed adding host: '%s'", host);
+ fclose(fp);
+ return NSERROR_NOMEM;
+ }
+ h->hsts.expires = hsts_expiry;
+ h->hsts.include_sub_domains = hsts_include_sub_domains;
+
/* read number of URLs */
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
break;
@@ -2581,17 +2961,10 @@ nserror urldb_load(const char *filename)
/* no URLs => try next host */
if (urls == 0) {
- LOG("No URLs for '%s'", host);
+ NSLOG(netsurf, INFO, "No URLs for '%s'", host);
continue;
}
- h = urldb_add_host(host);
- if (!h) {
- LOG("Failed adding host: '%s'", host);
- fclose(fp);
- return NSERROR_NOMEM;
- }
-
/* load the non-corrupt data */
for (i = 0; i < urls; i++) {
struct path_data *p = NULL;
@@ -2621,16 +2994,16 @@ nserror urldb_load(const char *filename)
s[length] = '\0';
if (!strcasecmp(host, "localhost") &&
- !strcasecmp(scheme, "file"))
+ !strcasecmp(scheme, "file"))
is_file = true;
snprintf(url, sizeof url, "%s://%s%s%s%s",
- scheme,
- /* file URLs have no host */
- (is_file ? "" : host),
- (port ? ":" : ""),
- (port ? ports : ""),
- s);
+ scheme,
+ /* file URLs have no host */
+ (is_file ? "" : host),
+ (port ? ":" : ""),
+ (port ? ports : ""),
+ s);
/* TODO: store URLs in pre-parsed state, and make
* a nsurl_load to generate the nsurl more
@@ -2638,7 +3011,8 @@ nserror urldb_load(const char *filename)
* Need a nsurl_save too.
*/
if (nsurl_create(url, &nsurl) != NSERROR_OK) {
- LOG("Failed inserting '%s'", url);
+ NSLOG(netsurf, INFO, "Failed inserting '%s'",
+ url);
fclose(fp);
return NSERROR_NOMEM;
}
@@ -2650,19 +3024,21 @@ nserror urldb_load(const char *filename)
/* Copy and merge path/query strings */
if (nsurl_get(nsurl, NSURL_PATH | NSURL_QUERY,
- &path_query, &len) != NSERROR_OK) {
- LOG("Failed inserting '%s'", url);
+ &path_query, &len) != NSERROR_OK) {
+ NSLOG(netsurf, INFO, "Failed inserting '%s'",
+ url);
fclose(fp);
return NSERROR_NOMEM;
}
scheme_lwc = nsurl_get_component(nsurl, NSURL_SCHEME);
fragment_lwc = nsurl_get_component(nsurl,
- NSURL_FRAGMENT);
+ NSURL_FRAGMENT);
p = urldb_add_path(scheme_lwc, port, h, path_query,
- fragment_lwc, nsurl);
+ fragment_lwc, nsurl);
if (!p) {
- LOG("Failed inserting '%s'", url);
+ NSLOG(netsurf, INFO, "Failed inserting '%s'",
+ url);
fclose(fp);
return NSERROR_NOMEM;
}
@@ -2676,10 +3052,13 @@ nserror urldb_load(const char *filename)
if (p)
p->urld.visits = (unsigned int)atoi(s);
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+ /* entry last use time */
+ if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) {
break;
- if (p)
- p->urld.last_visit = (time_t)atoi(s);
+ }
+ if (p) {
+ nsc_snptimet(s, strlen(s) - 1, &p->urld.last_visit);
+ }
if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
break;
@@ -2703,13 +3082,13 @@ nserror urldb_load(const char *filename)
}
fclose(fp);
- LOG("Successfully loaded URL file");
+ NSLOG(netsurf, INFO, "Successfully loaded URL file");
#undef MAXIMUM_URL_LENGTH
return NSERROR_OK;
}
-/* exported interface documented in content/urldb.h */
+/* exported interface documented in netsurf/url_db.h */
nserror urldb_save(const char *filename)
{
FILE *fp;
@@ -2719,7 +3098,8 @@ nserror urldb_save(const char *filename)
fp = fopen(filename, "w");
if (!fp) {
- LOG("Failed to open file '%s' for writing", filename);
+ NSLOG(netsurf, INFO, "Failed to open file '%s' for writing",
+ filename);
return NSERROR_SAVE_FAILED;
}
@@ -2737,17 +3117,20 @@ nserror urldb_save(const char *filename)
/* exported interface documented in content/urldb.h */
-void urldb_set_url_persistence(nsurl *url, bool persist)
+nserror urldb_set_url_persistence(nsurl *url, bool persist)
{
struct path_data *p;
assert(url);
p = urldb_find_url(url);
- if (!p)
- return;
+ if (!p) {
+ return NSERROR_NOT_FOUND;
+ }
p->persistent = persist;
+
+ return NSERROR_OK;
}
@@ -2778,7 +3161,7 @@ bool urldb_add_url(nsurl *url)
/* Copy and merge path/query strings */
if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &path_query, &len) !=
- NSERROR_OK) {
+ NSERROR_OK) {
return false;
}
assert(path_query != NULL);
@@ -2795,7 +3178,7 @@ bool urldb_add_url(nsurl *url)
lwc_string_unref(host);
} else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) ==
- lwc_error_ok && match == true) {
+ lwc_error_ok && match == true) {
host_str = "localhost";
} else {
@@ -2838,54 +3221,69 @@ bool urldb_add_url(nsurl *url)
/* exported interface documented in content/urldb.h */
-void urldb_set_url_title(nsurl *url, const char *title)
+nserror urldb_set_url_title(nsurl *url, const char *title)
{
struct path_data *p;
char *temp;
- assert(url && title);
+ assert(url);
p = urldb_find_url(url);
- if (!p)
- return;
+ if (p == NULL) {
+ return NSERROR_NOT_FOUND;
+ }
- temp = strdup(title);
- if (!temp)
- return;
+ /* copy the parameter if necessary */
+ if (title != NULL) {
+ temp = strdup(title);
+ if (temp == NULL) {
+ return NSERROR_NOMEM;
+ }
+ } else {
+ temp = NULL;
+ }
free(p->urld.title);
p->urld.title = temp;
+
+ return NSERROR_OK;
}
/* exported interface documented in content/urldb.h */
-void urldb_set_url_content_type(nsurl *url, content_type type)
+nserror urldb_set_url_content_type(nsurl *url, content_type type)
{
struct path_data *p;
assert(url);
p = urldb_find_url(url);
- if (!p)
- return;
+ if (!p) {
+ return NSERROR_NOT_FOUND;
+ }
p->urld.type = type;
+
+ return NSERROR_OK;
}
/* exported interface documented in content/urldb.h */
-void urldb_update_url_visit_data(nsurl *url)
+nserror urldb_update_url_visit_data(nsurl *url)
{
struct path_data *p;
assert(url);
p = urldb_find_url(url);
- if (!p)
- return;
+ if (!p) {
+ return NSERROR_NOT_FOUND;
+ }
p->urld.last_visit = time(NULL);
p->urld.visits++;
+
+ return NSERROR_OK;
}
@@ -2905,7 +3303,7 @@ void urldb_reset_url_visit_data(nsurl *url)
}
-/* exported interface documented in content/urldb.h */
+/* exported interface documented in netsurf/url_db.h */
const struct url_data *urldb_get_url_data(nsurl *url)
{
struct path_data *p;
@@ -2938,7 +3336,7 @@ nsurl *urldb_get_url(nsurl *url)
}
-/* exported interface documented in content/urldb.h */
+/* exported interface documented in netsurf/url_db.h */
void urldb_set_auth_details(nsurl *url, const char *realm, const char *auth)
{
struct path_data *p, *pi;
@@ -2965,10 +3363,10 @@ void urldb_set_auth_details(nsurl *url, const char *realm, const char *auth)
/* Search if given URL belongs to a protection space we already know of. */
for (space = h->prot_space; space; space = space->next) {
if (!strcmp(space->realm, realm) &&
- lwc_string_isequal(space->scheme, p->scheme,
- &match) == lwc_error_ok &&
- match == true &&
- space->port == p->port)
+ lwc_string_isequal(space->scheme, p->scheme,
+ &match) == lwc_error_ok &&
+ match == true &&
+ space->port == p->port)
break;
}
@@ -3001,7 +3399,7 @@ void urldb_set_auth_details(nsurl *url, const char *realm, const char *auth)
}
-/* exported interface documented in content/urldb.h */
+/* exported interface documented in netsurf/url_db.h */
const char *urldb_get_auth_details(nsurl *url, const char *realm)
{
struct path_data *p, *p_cur, *p_top;
@@ -3051,7 +3449,7 @@ const char *urldb_get_auth_details(nsurl *url, const char *realm)
}
-/* exported interface documented in content/urldb.h */
+/* exported interface documented in netsurf/url_db.h */
void urldb_set_cert_permissions(nsurl *url, bool permit)
{
struct path_data *p;
@@ -3099,51 +3497,141 @@ bool urldb_get_cert_permissions(nsurl *url)
/* exported interface documented in content/urldb.h */
-bool urldb_set_thumbnail(nsurl *url, struct bitmap *bitmap)
+bool urldb_set_hsts_policy(struct nsurl *url, const char *header)
{
struct path_data *p;
+ struct host_part *h;
+ lwc_string *host;
+ time_t now = time(NULL);
+ http_strict_transport_security *sts;
+ uint32_t max_age = 0;
+ nserror error;
assert(url);
+ host = nsurl_get_component(url, NSURL_HOST);
+ if (host != NULL) {
+ if (urldb__host_is_ip_address(lwc_string_data(host))) {
+ /* Host is IP: ignore */
+ lwc_string_unref(host);
+ return true;
+ } else if (lwc_string_length(host) == 0) {
+ /* Host is blank: ignore */
+ lwc_string_unref(host);
+ return true;
+ }
+
+ lwc_string_unref(host);
+ } else {
+ /* No host part: ignore */
+ return true;
+ }
+
/* add url, in case it's missing */
urldb_add_url(url);
p = urldb_find_url(url);
- if (p == NULL) {
+ if (!p)
return false;
+
+ for (; p && p->parent; p = p->parent)
+ /* do nothing */;
+ assert(p);
+
+ h = (struct host_part *)p;
+ if (h->permit_invalid_certs) {
+ /* Transport is tainted: ignore */
+ return true;
+ }
+
+ error = http_parse_strict_transport_security(header, &sts);
+ if (error != NSERROR_OK) {
+ /* Parse failed: ignore */
+ return true;
}
- LOG("Setting bitmap on %s", nsurl_access(url));
+ h->hsts.include_sub_domains =
+ http_strict_transport_security_include_subdomains(sts);
- if ((p->thumb) && (p->thumb != bitmap)) {
- guit->bitmap->destroy(p->thumb);
+ max_age = http_strict_transport_security_max_age(sts);
+ if (max_age == 0) {
+ h->hsts.expires = 0;
+ h->hsts.include_sub_domains = false;
+ } else if ((time_t) (now + max_age) > h->hsts.expires) {
+ h->hsts.expires = now + max_age;
}
- p->thumb = bitmap;
+ http_strict_transport_security_destroy(sts);
return true;
}
/* exported interface documented in content/urldb.h */
-struct bitmap *urldb_get_thumbnail(nsurl *url)
+bool urldb_get_hsts_enabled(struct nsurl *url)
{
struct path_data *p;
+ const struct host_part *h;
+ lwc_string *host;
+ time_t now = time(NULL);
assert(url);
+ host = nsurl_get_component(url, NSURL_HOST);
+ if (host != NULL) {
+ if (urldb__host_is_ip_address(lwc_string_data(host))) {
+ /* Host is IP: not enabled */
+ lwc_string_unref(host);
+ return false;
+ } else if (lwc_string_length(host) == 0) {
+ /* Host is blank: not enabled */
+ lwc_string_unref(host);
+ return false;
+ }
+
+ lwc_string_unref(host);
+ } else {
+ /* No host part: not enabled */
+ return false;
+ }
+
+ /* The URL must exist in the db in order to find HSTS policy, since
+ * we search up the tree from the URL node, and policy from further
+ * up may also apply. */
+ urldb_add_url(url);
+
p = urldb_find_url(url);
if (!p)
- return NULL;
+ return false;
+
+ for (; p && p->parent; p = p->parent)
+ /* do nothing */;
+ assert(p);
- return p->thumb;
+ h = (const struct host_part *)p;
+
+ /* Consult record for this host */
+ if (h->hsts.expires > now) {
+ /* Not expired */
+ return true;
+ }
+
+ /* Consult parent domains */
+ for (h = h->parent; h && h != &db_root; h = h->parent) {
+ if (h->hsts.expires > now && h->hsts.include_sub_domains) {
+ /* Not expired and subdomains included */
+ return true;
+ }
+ }
+
+ return false;
}
-/* exported interface documented in content/urldb.h */
-void urldb_iterate_partial(const char *prefix,
- bool (*callback)(nsurl *url,
- const struct url_data *data))
+/* exported interface documented in netsurf/url_db.h */
+void
+urldb_iterate_partial(const char *prefix,
+ bool (*callback)(nsurl *url, const struct url_data *data))
{
char host[256];
char buf[260]; /* max domain + "www." */
@@ -3165,7 +3653,7 @@ void urldb_iterate_partial(const char *prefix,
/* if there's a slash in the input, then we can
* assume that we're looking for a path */
snprintf(host, sizeof host, "%.*s",
- (int) (slash - prefix), prefix);
+ (int) (slash - prefix), prefix);
h = urldb_search_find(tree, host);
if (!h) {
@@ -3185,7 +3673,7 @@ void urldb_iterate_partial(const char *prefix,
if (h->paths.children) {
/* Have paths, iterate them */
urldb_iterate_partial_path(&h->paths, slash + 1,
- callback);
+ callback);
}
} else {
@@ -3199,17 +3687,17 @@ void urldb_iterate_partial(const char *prefix,
/* now look for www.prefix */
snprintf(buf, sizeof buf, "www.%s", prefix);
if(!urldb_iterate_partial_host(
- search_trees[ST_DN + 'w' - 'a'],
- buf, callback))
+ search_trees[ST_DN + 'w' - 'a'],
+ buf, callback))
return;
}
}
}
-/* exported interface documented in content/urldb.h */
-void urldb_iterate_entries(bool (*callback)(nsurl *url,
- const struct url_data *data))
+/* exported interface documented in netsurf/url_db.h */
+void
+urldb_iterate_entries(bool (*callback)(nsurl *url, const struct url_data *data))
{
int i;
@@ -3217,8 +3705,10 @@ void urldb_iterate_entries(bool (*callback)(nsurl *url,
for (i = 0; i < NUM_SEARCH_TREES; i++) {
if (!urldb_iterate_entries_host(search_trees[i],
- callback, NULL))
+ callback,
+ NULL)) {
break;
+ }
}
}
@@ -3232,7 +3722,7 @@ void urldb_iterate_cookies(bool (*callback)(const struct cookie_data *data))
for (i = 0; i < NUM_SEARCH_TREES; i++) {
if (!urldb_iterate_entries_host(search_trees[i],
- NULL, callback))
+ NULL, callback))
break;
}
}
@@ -3285,7 +3775,7 @@ bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer)
/* Domain match host names */
if (lwc_string_isequal(host, rhost, &match) == lwc_error_ok &&
- match == false) {
+ match == false) {
const char *hptr;
const char *rptr;
const char *dot;
@@ -3294,7 +3784,7 @@ bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer)
/* Ensure neither host nor rhost are IP addresses */
if (urldb__host_is_ip_address(host_data) ||
- urldb__host_is_ip_address(rhost_data)) {
+ urldb__host_is_ip_address(rhost_data)) {
/* IP address, so no partial match */
lwc_string_unref(rhost);
goto error;
@@ -3342,8 +3832,8 @@ bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer)
/* 3 */
if (*hptr == '\0' ||
- (dot = strchr(hptr + 1, '.')) == NULL ||
- *(dot + 1) == '\0') {
+ (dot = strchr(hptr + 1, '.')) == NULL ||
+ *(dot + 1) == '\0') {
lwc_string_unref(rhost);
goto error;
}
@@ -3356,9 +3846,11 @@ bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer)
do {
struct cookie_internal_data *c;
- const char *suffix;
char *dot;
size_t len;
+#ifdef WITH_NSPSL
+ const char *suffix;
+#endif
c = urldb_parse_cookie(url, &cur);
if (!c) {
@@ -3377,8 +3869,8 @@ bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer)
/* 4.3.2:i Cookie path must be a prefix of URL path */
len = strlen(c->path);
if (len > lwc_string_length(path) ||
- strncmp(c->path, lwc_string_data(path),
- len) != 0) {
+ strncmp(c->path, lwc_string_data(path),
+ len) != 0) {
urldb_free_cookie(c);
goto error;
}
@@ -3391,7 +3883,8 @@ bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer)
}
suffix = nspsl_getpublicsuffix(dot);
if (suffix == NULL) {
- LOG("domain %s was a public suffix domain", dot);
+ NSLOG(netsurf, INFO,
+ "domain %s was a public suffix domain", dot);
urldb_free_cookie(c);
goto error;
}
@@ -3448,7 +3941,7 @@ bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer)
}
if (strcasecmp(lwc_string_data(host) + (hlen - dlen),
- domain)) {
+ domain)) {
urldb_free_cookie(c);
goto error;
}
@@ -3530,7 +4023,7 @@ char *urldb_get_cookie(nsurl *url, bool include_http_only)
scheme = p->scheme;
matched_cookies = malloc(matched_cookies_size *
- sizeof(struct cookie_internal_data *));
+ sizeof(struct cookie_internal_data *));
if (!matched_cookies)
return NULL;
@@ -3539,8 +4032,8 @@ char *urldb_get_cookie(nsurl *url, bool include_http_only)
if (count == matched_cookies_size) { \
struct cookie_internal_data **temp; \
temp = realloc(matched_cookies, \
- (matched_cookies_size + 20) * \
- sizeof(struct cookie_internal_data *)); \
+ (matched_cookies_size + 20) * \
+ sizeof(struct cookie_internal_data *)); \
\
if (temp == NULL) { \
free(ret); \
@@ -3587,10 +4080,10 @@ char *urldb_get_cookie(nsurl *url, bool include_http_only)
continue;
if (c->secure && lwc_string_isequal(
- q->scheme,
- corestring_lwc_https,
- &match) &&
- match == false)
+ q->scheme,
+ corestring_lwc_https,
+ &match) &&
+ match == false)
/* secure cookie for insecure host.
* ignore */
continue;
@@ -3627,10 +4120,10 @@ char *urldb_get_cookie(nsurl *url, bool include_http_only)
continue;
if (c->secure && lwc_string_isequal(
- q->scheme,
- corestring_lwc_https,
- &match) &&
- match == false)
+ q->scheme,
+ corestring_lwc_https,
+ &match) &&
+ match == false)
/* Secure cookie for insecure server
* => ignore */
continue;
@@ -3672,9 +4165,9 @@ char *urldb_get_cookie(nsurl *url, bool include_http_only)
continue;
if (c->secure && lwc_string_isequal(p->scheme,
- corestring_lwc_https,
- &match) &&
- match == false)
+ corestring_lwc_https,
+ &match) &&
+ match == false)
/* Secure cookie for insecure server
* => ignore */
continue;
@@ -3695,7 +4188,7 @@ char *urldb_get_cookie(nsurl *url, bool include_http_only)
/* Finally consider domain cookies for hosts which domain match ours */
for (h = (const struct host_part *)p; h && h != &db_root;
- h = h->parent) {
+ h = h->parent) {
for (c = h->paths.cookies; c; c = c->next) {
if (c->expires != -1 && c->expires < now)
/* cookie has expired => ignore */
@@ -3707,9 +4200,9 @@ char *urldb_get_cookie(nsurl *url, bool include_http_only)
continue;
if (c->secure && lwc_string_isequal(scheme,
- corestring_lwc_https,
- &match) &&
- match == false)
+ corestring_lwc_https,
+ &match) &&
+ match == false)
/* secure cookie for insecure host. ignore */
continue;
@@ -3741,7 +4234,7 @@ char *urldb_get_cookie(nsurl *url, bool include_http_only)
for (i = 0; i < count; i++) {
if (!urldb_concat_cookie(matched_cookies[i], version,
- &ret_used, &ret_alloc, &ret)) {
+ &ret_used, &ret_alloc, &ret)) {
free(ret);
free(matched_cookies);
return NULL;
@@ -3776,7 +4269,7 @@ char *urldb_get_cookie(nsurl *url, bool include_http_only)
/* exported interface documented in content/urldb.h */
void urldb_delete_cookie(const char *domain, const char *path,
- const char *name)
+ const char *name)
{
urldb_delete_cookie_hosts(domain, path, name, &db_root);
}
@@ -3794,24 +4287,24 @@ void urldb_load_cookies(const char *filename)
if (!fp)
return;
-#define FIND_T { \
- for (; *p && *p != '\t'; p++) \
- ; /* do nothing */ \
- if (p >= end) { \
- LOG("Overran input"); \
- continue; \
- } \
- *p++ = '\0'; \
-}
+#define FIND_T { \
+ for (; *p && *p != '\t'; p++) \
+ ; /* do nothing */ \
+ if (p >= end) { \
+ NSLOG(netsurf, INFO, "Overran input"); \
+ continue; \
+ } \
+ *p++ = '\0'; \
+ }
-#define SKIP_T { \
- for (; *p && *p == '\t'; p++) \
- ; /* do nothing */ \
- if (p >= end) { \
- LOG("Overran input"); \
- continue; \
- } \
-}
+#define SKIP_T { \
+ for (; *p && *p == '\t'; p++) \
+ ; /* do nothing */ \
+ if (p >= end) { \
+ NSLOG(netsurf, INFO, "Overran input"); \
+ continue; \
+ } \
+ }
while (fgets(s, sizeof s, fp)) {
char *p = s, *end = 0,
@@ -3836,8 +4329,9 @@ void urldb_load_cookies(const char *filename)
FIND_T; SKIP_T; loaded_cookie_file_version = atoi(p);
if (loaded_cookie_file_version <
- MIN_COOKIE_FILE_VERSION) {
- LOG("Unsupported Cookie file version");
+ MIN_COOKIE_FILE_VERSION) {
+ NSLOG(netsurf, INFO,
+ "Unsupported Cookie file version");
break;
}
@@ -3905,7 +4399,7 @@ void urldb_load_cookies(const char *filename)
c->no_destroy = no_destroy;
if (!(c->name && c->value && c->comment &&
- c->domain && c->path)) {
+ c->domain && c->path)) {
urldb_free_cookie(c);
break;
}
@@ -3921,7 +4415,7 @@ void urldb_load_cookies(const char *filename)
break;
}
scheme_lwc = nsurl_get_component(url_nsurl,
- NSURL_SCHEME);
+ NSURL_SCHEME);
/* And insert it into database */
if (!urldb_insert_cookie(c, scheme_lwc, url_nsurl)) {
@@ -3953,7 +4447,7 @@ void urldb_save_cookies(const char *filename)
{
FILE *fp;
int cookie_file_version = max(loaded_cookie_file_version,
- COOKIE_FILE_VERSION);
+ COOKIE_FILE_VERSION);
assert(filename);
@@ -3961,19 +4455,18 @@ void urldb_save_cookies(const char *filename)
if (!fp)
return;
- fprintf(fp, "# >%s\n", filename);
fprintf(fp, "# NetSurf cookies file.\n"
- "#\n"
- "# Lines starting with a '#' are comments, "
- "blank lines are ignored.\n"
- "#\n"
- "# All lines prior to \"Version:\t%d\" are discarded.\n"
- "#\n"
- "# Version\tDomain\tDomain from Set-Cookie\tPath\t"
- "Path from Set-Cookie\tSecure\tHTTP-Only\tExpires\tLast used\t"
- "No destroy\tName\tValue\tValue was quoted\tScheme\t"
- "URL\tComment\n",
- cookie_file_version);
+ "#\n"
+ "# Lines starting with a '#' are comments, "
+ "blank lines are ignored.\n"
+ "#\n"
+ "# All lines prior to \"Version:\t%d\" are discarded.\n"
+ "#\n"
+ "# Version\tDomain\tDomain from Set-Cookie\tPath\t"
+ "Path from Set-Cookie\tSecure\tHTTP-Only\tExpires\tLast used\t"
+ "No destroy\tName\tValue\tValue was quoted\tScheme\t"
+ "URL\tComment\n",
+ cookie_file_version);
fprintf(fp, "Version:\t%d\n", cookie_file_version);
urldb_save_cookie_hosts(fp, &db_root);
@@ -3982,172 +4475,18 @@ void urldb_save_cookies(const char *filename)
}
-/* exported interface documented in content/urldb.h */
+/* exported interface documented in netsurf/url_db.h */
void urldb_dump(void)
{
int i;
urldb_dump_hosts(&db_root);
- for (i = 0; i != NUM_SEARCH_TREES; i++)
+ for (i = 0; i != NUM_SEARCH_TREES; i++) {
urldb_dump_search(search_trees[i], 0);
-}
-
-
-/* exported interface documented in content/urldb.h */
-struct host_part *urldb_add_host(const char *host)
-{
- struct host_part *d = (struct host_part *) &db_root, *e;
- struct search_node *s;
- char buf[256]; /* 256 bytes is sufficient - domain names are
- * limited to 255 chars. */
- char *part;
-
- assert(host);
-
- if (urldb__host_is_ip_address(host)) {
- /* Host is an IP, so simply add as TLD */
-
- /* Check for existing entry */
- for (e = d->children; e; e = e->next)
- if (strcasecmp(host, e->part) == 0)
- /* found => return it */
- return e;
-
- d = urldb_add_host_node(host, d);
-
- s = urldb_search_insert(search_trees[ST_IP], d);
- if (!s) {
- /* failed */
- d = NULL;
- } else {
- search_trees[ST_IP] = s;
- }
-
- return d;
}
-
- /* Copy host string, so we can corrupt it */
- strncpy(buf, host, sizeof buf);
- buf[sizeof buf - 1] = '\0';
-
- /* Process FQDN segments backwards */
- do {
- part = strrchr(buf, '.');
- if (!part) {
- /* last segment */
- /* Check for existing entry */
- for (e = d->children; e; e = e->next)
- if (strcasecmp(buf, e->part) == 0)
- break;
-
- if (e) {
- d = e;
- } else {
- d = urldb_add_host_node(buf, d);
- }
-
- /* And insert into search tree */
- if (d) {
- struct search_node **r;
-
- r = urldb_get_search_tree_direct(buf);
- s = urldb_search_insert(*r, d);
- if (!s) {
- /* failed */
- d = NULL;
- } else {
- *r = s;
- }
- }
- break;
- }
-
- /* Check for existing entry */
- for (e = d->children; e; e = e->next)
- if (strcasecmp(part + 1, e->part) == 0)
- break;
-
- d = e ? e : urldb_add_host_node(part + 1, d);
- if (!d)
- break;
-
- *part = '\0';
- } while (1);
-
- return d;
}
-/* exported interface documented in content/urldb.h */
-struct path_data *
-urldb_add_path(lwc_string *scheme,
- unsigned int port,
- const struct host_part *host,
- char *path_query,
- lwc_string *fragment,
- nsurl *url)
-{
- struct path_data *d, *e;
- char *buf = path_query;
- char *segment, *slash;
- bool match;
-
- assert(scheme && host && url);
-
- d = (struct path_data *) &host->paths;
-
- /* skip leading '/' */
- segment = buf;
- if (*segment == '/')
- segment++;
-
- /* Process path segments */
- do {
- slash = strchr(segment, '/');
- if (!slash) {
- /* last segment */
- /* look for existing entry */
- for (e = d->children; e; e = e->next)
- if (strcmp(segment, e->segment) == 0 &&
- lwc_string_isequal(scheme,
- e->scheme, &match) ==
- lwc_error_ok &&
- match == true &&
- e->port == port)
- break;
-
- d = e ? urldb_add_path_fragment(e, fragment) :
- urldb_add_path_node(scheme, port,
- segment, fragment, d);
- break;
- }
-
- *slash = '\0';
-
- /* look for existing entry */
- for (e = d->children; e; e = e->next)
- if (strcmp(segment, e->segment) == 0 &&
- lwc_string_isequal(scheme, e->scheme,
- &match) == lwc_error_ok &&
- match == true &&
- e->port == port)
- break;
-
- d = e ? e : urldb_add_path_node(scheme, port, segment, NULL, d);
- if (!d)
- break;
-
- segment = slash + 1;
- } while (1);
- free(path_query);
- if (d && !d->url) {
- /* Insert defragmented URL */
- if (nsurl_defragment(url, &d->url) != NSERROR_OK)
- return NULL;
- }
-
- return d;
-}