diff options
Diffstat (limited to 'content/urldb.c')
-rw-r--r-- | content/urldb.c | 2231 |
1 files changed, 2231 insertions, 0 deletions
diff --git a/content/urldb.c b/content/urldb.c new file mode 100644 index 000000000..c7a798a92 --- /dev/null +++ b/content/urldb.c @@ -0,0 +1,2231 @@ +/* + * This file is part of NetSurf, http://netsurf.sourceforge.net/ + * Licensed under the GNU General Public License, + * http://www.opensource.org/licenses/gpl-license + * Copyright 2006 John M Bell <jmb202@ecs.soton.ac.uk> + */ + +/** \file + * Unified URL information database (implementation) + * + * URLs are stored in a tree-based structure as follows: + * + * The host component is extracted from each URL and, if a FQDN, split on + * every '.'.The tree is constructed by inserting each FQDN segment in + * reverse order. Duplicate nodes are merged. + * + * If the host part of an URL is an IP address, then this is added to the + * tree verbatim (as if it were a TLD). + * + * This provides something looking like: + * + * root (a sentinel) + * | + * ------------------------------------------------- + * | | | | | | | + * com edu gov 127.0.0.1 net org uk TLDs + * | | | | | | + * google ... ... ... ... co 2LDs + * | | + * www bbc Hosts/Subdomains + * | + * www ... + * + * Each of the nodes in this tree is a struct host_part. This stores the + * FQDN segment (or IP address) with which the node is concerned. Each node + * may contain further information about paths on a host (struct path_data) + * or SSL certificate processing on a host-wide basis + * (host_part::permit_invalid_certs). + * + * Path data is concerned with storing various metadata about the path in + * question. This includes global history data, HTTP authentication details + * and any associated HTTP cookies. This is stored as a tree of path segments + * hanging off the relevant host_part node. + * + * Therefore, to find the last visited time of the URL + * http://www.example.com/path/to/resource.html, the FQDN tree would be + * traversed in the order root -> "com" -> "example" -> "www". The "www" + * node would have attached to it a tree of struct path_data: + * + * (sentinel) + * | + * path + * | + * to + * | + * resource.html + * + * This represents the absolute path "/path/to/resource.html". The leaf node + * "resource.html" contains the last visited time of the resource. + * + * The mechanism described above is, however, not particularly conducive to + * fast searching of the database for a given URL (or URLs beginning with a + * given prefix). Therefore, an anciliary data structure is used to enable + * fast searching. This structure simply reflects the contents of the + * database, with entries being added/removed at the same time as for the + * core database. In order to ensure that degenerate cases are kept to a + * minimum, we use an AAtree. This is an approximation of a Red-Black tree + * with similar performance characteristics, but with a significantly + * simpler implementation. Entries in this tree comprise pointers to the + * leaf nodes of the host tree described above. + */ + +#include <assert.h> +#include <ctype.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include "netsurf/image/bitmap.h" +#include "netsurf/content/urldb.h" +#include "netsurf/desktop/options.h" +#ifdef riscos +/** \todo lose this */ +#include "netsurf/riscos/bitmap.h" +#endif +#include "netsurf/utils/log.h" +#include "netsurf/utils/url.h" +#include "netsurf/utils/utils.h" + +struct cookie { + char *name; /**< Cookie name */ + char *value; /**< Cookie value */ + char *comment; /**< Cookie comment */ + time_t expires; /**< Expiry timestamp, or 0 for session */ + time_t last_used; /**< Last used time */ + bool secure; /**< Only send for HTTPS requests */ + enum { COOKIE_NETSCAPE = 0, + COOKIE_RFC2109 = 1, + COOKIE_RFC2965 = 2 + } version; /**< Specification compliance */ + bool no_destroy; /**< Never destroy this cookie, + * unless it's expired */ + + struct cookie *next; /**< Next in list */ +}; + +struct auth_data { + char *realm; /**< Protection realm */ + char *auth; /**< Authentication details in form + * username:password */ +}; + +struct url_internal_data { + char *title; /**< Resource title */ + unsigned int visits; /**< Visit count */ + time_t last_visit; /**< Last visit time */ + content_type type; /**< Type of resource */ +}; + +struct path_data { + char *scheme; /**< URL scheme for data */ + unsigned int port; /**< Port number for data */ + char *segment; /**< Path segment for this node */ + unsigned int frag_cnt; /**< Number of entries in ::fragment */ + char **fragment; /**< Array of fragments */ + + struct bitmap *thumb; /**< Thumbnail image of resource */ + struct url_internal_data url; /**< URL data for resource */ + struct auth_data auth; /**< Authentication data for resource */ + struct cookie *cookies; /**< Cookies associated with resource */ + + struct path_data *next; /**< Next sibling */ + struct path_data *prev; /**< Previous sibling */ + struct path_data *parent; /**< Parent path segment */ + struct path_data *children; /**< Child path segments */ + struct path_data *last; /**< Last child */ +}; + +struct host_part { + /**< Known paths on this host. This _must_ be first so that + * struct host_part *h = (struct host_part *)mypath; works */ + struct path_data paths; + bool permit_invalid_certs; /**< Allow access to SSL protected + * resources on this host without + * verifying certificate authenticity + */ + + char *part; /**< Part of host string */ + + struct host_part *next; /**< Next sibling */ + struct host_part *prev; /**< Previous sibling */ + struct host_part *parent; /**< Parent host part */ + struct host_part *children; /**< Child host parts */ +}; + +struct search_node { + const struct host_part *data; /**< Host tree entry */ + + unsigned int level; /**< Node level */ + + struct search_node *left; /**< Left subtree */ + struct search_node *right; /**< Right subtree */ +}; + +/* Saving */ +static void urldb_save_search_tree(struct search_node *root, FILE *fp); +static void urldb_count_urls(const struct path_data *root, time_t expiry, + unsigned int *count); +static void urldb_write_urls(const struct path_data *parent, + const char *host, FILE *fp, char **path, int *path_alloc, + int *path_used, time_t expiry); + +/* Iteration */ +static bool urldb_iterate_partial_host(struct search_node *root, + const char *prefix, bool (*callback)(const char *url)); +static bool urldb_iterate_partial_path(const struct path_data *parent, + const char *host, const char *prefix, + char **path, int *path_alloc, int *path_used, + bool (*callback)(const char *url)); +static bool urldb_iterate_entries_host(struct search_node *parent, + bool (*callback)(const char *url)); +static bool urldb_iterate_entries_path(const char *host, char **path, + int *path_alloc, int *path_used, + const struct path_data *parent, + bool (*callback)(const char *url)); + +/* Insertion */ +static struct host_part *urldb_add_host_node(const char *part, + struct host_part *parent); +static struct host_part *urldb_add_host(const char *host); +static struct path_data *urldb_add_path_node(const char *scheme, + unsigned int port, const char *segment, const char *fragment, + struct path_data *parent); +static struct path_data *urldb_add_path(const char *scheme, + unsigned int port, const struct host_part *host, + const char *path, const char *fragment); +static int urldb_add_path_fragment_cmp(const void *a, const void *b); +static struct path_data *urldb_add_path_fragment(struct path_data *segment, + const char *fragment); + +/* Lookup */ +static struct path_data *urldb_find_url(const char *url); +static struct path_data *urldb_match_path(const struct path_data *parent, + const char *path, const char *scheme, unsigned short port); + +/* Dump */ +static void urldb_dump_hosts(struct host_part *parent); +static void urldb_dump_paths(struct path_data *parent); +static void urldb_dump_search(struct search_node *parent, int depth); + +/* Search tree */ +static struct search_node *urldb_search_insert(struct search_node *root, + const struct host_part *data); +static struct search_node *urldb_search_insert_internal( + struct search_node *root, struct search_node *n); +static struct search_node *urldb_search_remove(struct search_node *root, + const struct host_part *data); +static const struct host_part *urldb_search_find(struct search_node *root, + const char *host); +static struct search_node *urldb_search_skew(struct search_node *root); +static struct search_node *urldb_search_split(struct search_node *root); +static int urldb_search_match_host(const struct host_part *a, + const struct host_part *b); +static int urldb_search_match_string(const struct host_part *a, + const char *b); +static int urldb_search_match_prefix(const struct host_part *a, + const char *b); + +/** Root database handle */ +static struct host_part db_root; + +/** Search trees - one per letter + 1 for IPs */ +#define NUM_SEARCH_TREES 27 +#define ST_IP 0 +#define ST_DN 1 +static struct search_node empty = { 0, 0, &empty, &empty }; +static struct search_node *search_trees[NUM_SEARCH_TREES] = { + &empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty, + &empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty, + &empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty, + &empty, &empty, &empty +}; + +/** + * Import an URL database from file, replacing any existing database + * + * \param filename Name of file containing data + */ +void urldb_load(const char *filename) +{ +#define MAXIMUM_URL_LENGTH 4096 + char s[MAXIMUM_URL_LENGTH]; + struct host_part *h; + int urls; + int i; + int version; + int length; + FILE *fp; + + /** \todo optimise */ + + assert(filename); + + LOG(("Loading URL file")); + + fp = fopen(filename, "r"); + if (!fp) { + LOG(("Failed to open file '%s' for reading", filename)); + return; + } + + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + return; + version = atoi(s); + if (version < 105) { + LOG(("Unsupported URL file version.")); + return; + } + if (version > 105) { + LOG(("Unknown URL file version.")); + return; + } + + while (fgets(s, MAXIMUM_URL_LENGTH, fp)) { + /* get the hostname */ + length = strlen(s) - 1; + s[length] = '\0'; + + /* skip data that has ended up with a host of '' */ + if (length == 0) { + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + urls = atoi(s); + for (i = 0; i < (6 * urls); i++) + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + continue; + } + + h = urldb_add_host(s); + if (!h) + die("Memory exhausted whilst loading URL file"); + + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + urls = atoi(s); + + /* load the non-corrupt data */ + for (i = 0; i < urls; i++) { + struct path_data *p = NULL; + + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + length = strlen(s) - 1; + s[length] = '\0'; + + urldb_add_url(s); + p = urldb_find_url(s); + + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + if (p) + p->url.visits = (unsigned int)atoi(s); + + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + if (p) + p->url.last_visit = (time_t)atoi(s); + + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + if (p) + p->url.type = (content_type)atoi(s); + + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; +#ifdef riscos + if (p && strlen(s) == 12) { + /* ensure filename is 'XX.XX.XX.XX' */ + if ((s[2] == '.') && (s[5] == '.') && + (s[8] == '.')) { + s[11] = '\0'; + p->thumb = bitmap_create_file(s); + } + } +#endif + + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + length = strlen(s) - 1; + if (p && length > 0) { + s[length] = '\0'; + p->url.title = malloc(length + 1); + if (p->url.title) + memcpy(p->url.title, s, length + 1); + } + } + } + + fclose(fp); + LOG(("Successfully loaded URL file")); +#undef MAXIMUM_URL_LENGTH +} + +/** + * Export the current database to file + * + * \param filename Name of file to export to + */ +void urldb_save(const char *filename) +{ + FILE *fp; + int i; + + assert(filename); + + fp = fopen(filename, "w"); + if (!fp) { + LOG(("Failed to open file '%s' for writing", filename)); + return; + } + + /* file format version number */ + fprintf(fp, "105\n"); + + for (i = 0; i != NUM_SEARCH_TREES; i++) { + urldb_save_search_tree(search_trees[i], fp); + } + + fclose(fp); +} + +/** + * Save a search (sub)tree + * + * \param root Root of (sub)tree to save + * \param fp File to write to + */ +void urldb_save_search_tree(struct search_node *parent, FILE *fp) +{ + char host[256]; + const struct host_part *h; + unsigned int path_count = 0; + char *path, *p, *end; + int path_alloc = 64, path_used = 2; + time_t expiry = time(NULL) - (60 * 60 * 24) * option_expire_url; + + if (parent == &empty) + return; + + urldb_save_search_tree(parent->left, fp); + + path = malloc(path_alloc); + if (!path) + return; + + path[0] = '/'; + path[1] = '\0'; + + for (h = parent->data, p = host, end = host + sizeof host; + h && h != &db_root && p < end; h = h->parent) { + int written = snprintf(p, end - p, "%s%s", h->part, + (h->parent && h->parent->parent) ? "." : ""); + if (written < 0) { + free(path); + return; + } + p += written; + } + + urldb_count_urls(&parent->data->paths, expiry, &path_count); + + if (path_count > 0) { + fprintf(fp, "%s\n%i\n", host, path_count); + + urldb_write_urls(&parent->data->paths, host, fp, + &path, &path_alloc, &path_used, expiry); + } + + free(path); + + urldb_save_search_tree(parent->right, fp); +} + +/** + * Count number of URLs associated with a host + * + * \param root Root of path data tree + * \param expiry Expiry time for URLs + * \param count Pointer to count + */ +void urldb_count_urls(const struct path_data *root, time_t expiry, + unsigned int *count) +{ + const struct path_data *p; + + if (!root->children) { + if ((root->url.last_visit > expiry) && + (root->url.visits > 0)) + (*count)++; + } + + for (p = root->children; p; p = p->next) + urldb_count_urls(p, expiry, count); +} + +/** + * Write URLs associated with a host + * + * \param parent Root of (sub)tree to write + * \param host Current host name + * \param fp File to write to + * \param path Current path string + * \param path_alloc Allocated size of path + * \param path_used Used size of path + * \param expiry Expiry time of URLs + */ +void urldb_write_urls(const struct path_data *parent, const char *host, + FILE *fp, char **path, int *path_alloc, int *path_used, + time_t expiry) +{ + const struct path_data *p; + int i; + int pused = *path_used; + + if (!parent->children) { + /* leaf node */ + if (!((parent->url.last_visit > expiry) && + (parent->url.visits > 0))) + /* expired */ + return; + + fprintf(fp, "%s://%s", parent->scheme, host); + + if (parent->port) + fprintf(fp,":%d", parent->port); + + fprintf(fp, "%s\n", *path); + + /** \todo handle fragments? */ + + fprintf(fp, "%i\n%i\n%i\n", parent->url.visits, + (int)parent->url.last_visit, + (int)parent->url.type); + +#ifdef riscos + if (parent->thumb) + fprintf(fp, "%s\n", parent->thumb->filename); +#else + fprintf(fp, "\n"); +#endif + + if (parent->url.title) { + char *s = parent->url.title; + for (i = 0; s[i] != '\0'; i++) + if (s[i] < 32) + s[i] = ' '; + for (--i; ((i > 0) && (s[i] == ' ')); i--) + s[i] = '\0'; + fprintf(fp, "%s\n", parent->url.title); + } else + fprintf(fp, "\n"); + } + + for (p = parent->children; p; p = p->next) { + int len = *path_used + strlen(p->segment) + 1; + if (*path_alloc < len) { + char *temp = realloc(*path, + (len > 64) ? len : *path_alloc + 64); + if (!temp) + return; + *path = temp; + *path_alloc = (len > 64) ? len : *path_alloc + 64; + } + + strcat(*path, p->segment); + if (p->children) { + strcat(*path, "/"); + } else { + len -= 1; + } + + *path_used = len; + + urldb_write_urls(p, host, fp, path, path_alloc, path_used, + expiry); + + /* restore path to its state on entry to this function */ + *path_used = pused; + (*path)[pused - 1] = '\0'; + } +} + +/** + * Insert an URL into the database + * + * \param url URL to insert + * \return true on success, false otherwise + */ +bool urldb_add_url(const char *url) +{ + struct host_part *h; + struct path_data *p; + char *fragment = NULL, *host, *plq, *scheme, *colon; + unsigned short port; + url_func_result ret; + assert(url); + + /** \todo consider file: URLs */ + + host = strchr(url, '#'); + if (host) { + fragment = strdup(host+1); + if (!fragment) + return false; + } + + /* extract host */ + ret = url_host(url, &host); + if (ret != URL_FUNC_OK) + return false; + + /* extract path, leafname, query */ + ret = url_plq(url, &plq); + if (ret != URL_FUNC_OK) { + free(host); + free(fragment); + return false; + } + + /* extract scheme */ + ret = url_scheme(url, &scheme); + if (ret != URL_FUNC_OK) { + free(plq); + free(host); + free(fragment); + return false; + } + + colon = strrchr(host, ':'); + if (!colon) { + port = 0; + } else { + *colon = '\0'; + port = atoi(colon + 1); + } + + /* Get host entry */ + h = urldb_add_host(host); + if (!h) { + free(scheme); + free(plq); + free(host); + free(fragment); + return false; + } + + /* Get path entry */ + p = urldb_add_path(scheme, port, h, plq, fragment); + if (!p) { + free(scheme); + free(plq); + free(host); + free(fragment); + return false; + } + + return true; +} + +/** + * Set an URL's title string, replacing any existing one + * + * \param url The URL to look for + * \param title The title string to use (copied) + */ +void urldb_set_url_title(const char *url, const char *title) +{ + struct path_data *p; + char *temp; + + assert(url && title); + + p = urldb_find_url(url); + if (!p) + return; + + temp = strdup(title); + if (!temp) + return; + + free(p->url.title); + p->url.title = temp; +} + +/** + * Set an URL's content type + * + * \param url The URL to look for + * \param type The type to set + */ +void urldb_set_url_content_type(const char *url, content_type type) +{ + struct path_data *p; + + assert(url); + + p = urldb_find_url(url); + if (!p) + return; + + p->url.type = type; +} + +/** + * Update an URL's visit data + * + * \param url The URL to update + */ +void urldb_update_url_visit_data(const char *url) +{ + struct path_data *p; + + assert(url); + + p = urldb_find_url(url); + if (!p) + return; + + p->url.last_visit = time(NULL); + p->url.visits++; +} + +/** + * Reset an URL's visit statistics + * + * \param url The URL to reset + */ +void urldb_reset_url_visit_data(const char *url) +{ + struct path_data *p; + + assert(url); + + p = urldb_find_url(url); + if (!p) + return; + + p->url.last_visit = (time_t)0; + p->url.visits = 0; +} + + +/** + * Find data for an URL. + * + * \param url Absolute URL to look for + * \return Pointer to result struct, or NULL + */ +const struct url_data *urldb_get_url_data(const char *url) +{ + struct path_data *p; + + assert(url); + + p = urldb_find_url(url); + if (!p) + return NULL; + + return (struct url_data *)&p->url; +} + +/** + * Look up authentication details in database + * + * \param url Absolute URL to search for + * \return Pointer to authentication details, or NULL if not found + */ +const char *urldb_get_auth_details(const char *url) +{ + struct path_data *p, *q; + + assert(url); + + /* add to the db, so our lookup will work */ + urldb_add_url(url); + + p = urldb_find_url(url); + if (!p) + return NULL; + + for (; p; p = p->parent) { + /* The parent path entry is stored hung off the + * parent entry with an empty (not NULL) segment string. + * We look for this here. + */ + for (q = p->children; q; q = q->next) { + if (strlen(q->segment) == 0) + break; + } + + if (q && q->auth.realm && q->auth.auth) + break; + } + + if (!q) + return NULL; + + return q->auth.auth; +} + +/** + * Retrieve certificate verification permissions from database + * + * \param url Absolute URL to search for + * \return true to permit connections to hosts with invalid certificates, + * false otherwise. + */ +bool urldb_get_cert_permissions(const char *url) +{ + struct path_data *p; + struct host_part *h; + + assert(url); + + p = urldb_find_url(url); + if (!p) + return false; + + for (; p && p->parent; p = p->parent) + /* do nothing */; + + h = (struct host_part *)p; + + return h->permit_invalid_certs; +} + +/** + * Set authentication data for an URL + * + * \param url The URL to consider + * \param realm The authentication realm + * \param auth The authentication details (in form username:password) + */ +void urldb_set_auth_details(const char *url, const char *realm, + const char *auth) +{ + struct path_data *p; + char *t1, *t2; + + assert(url && realm && auth); + + /* add url, in case it's missing */ + urldb_add_url(url); + + p = urldb_find_url(url); + if (!p) + return; + + /** \todo search subtree for same realm/auth details + * and remove them (as the lookup routine searches up the tree) */ + + t1 = strdup(realm); + t2 = strdup(auth); + + if (!t1 || !t2) { + free(t1); + free(t2); + return; + } + + free(p->auth.realm); + free(p->auth.auth); + + p->auth.realm = t1; + p->auth.auth = t2; +} + +/** + * Set certificate verification permissions + * + * \param url URL to consider + * \param permit Set to true to allow invalid certificates + */ +void urldb_set_cert_permissions(const char *url, bool permit) +{ + struct path_data *p; + struct host_part *h; + + assert(url); + + /* add url, in case it's missing */ + urldb_add_url(url); + + p = urldb_find_url(url); + if (!p) + return; + + for (; p && p->parent; p = p->parent) + /* do nothing */; + + h = (struct host_part *)p; + + h->permit_invalid_certs = permit; +} + +/** + * Set thumbnail for url, replacing any existing thumbnail + * + * \param url Absolute URL to consider + * \param bitmap Opaque pointer to thumbnail data + */ +void urldb_set_thumbnail(const char *url, struct bitmap *bitmap) +{ + struct path_data *p; + + assert(url && bitmap); + + p = urldb_find_url(url); + if (!p) + return; + + if (p->thumb) + bitmap_destroy(p->thumb); + + p->thumb = bitmap; +} + +/** + * Retrieve thumbnail data for given URL + * + * \param url Absolute URL to search for + * \return Pointer to thumbnail data, or NULL if not found. + */ +const struct bitmap *urldb_get_thumbnail(const char *url) +{ + struct path_data *p; + + assert(url); + + p = urldb_find_url(url); + if (!p) + return NULL; + + return p->thumb; +} + +/** + * Iterate over entries in the database which match the given prefix + * + * \param prefix Prefix to match + * \param callback Callback function + */ +void urldb_iterate_partial(const char *prefix, + bool (*callback)(const char *url)) +{ + char host[256]; + char buf[260]; /* max domain + "www." */ + const char *slash; + struct search_node *tree; + const struct host_part *h; + + assert(prefix && callback); + + slash = strchr(prefix, '/'); + + if (*prefix >= '0' && *prefix <= '9') + tree = search_trees[ST_IP]; + else if (isalpha(*prefix)) + tree = search_trees[ST_DN + tolower(*prefix) - 'a']; + else + return; + + if (slash) { + /* if there's a slash in the input, then we can + * assume that we're looking for a path */ + char *path, *domain = host; + int path_alloc = 64, path_used = 2; + + snprintf(host, sizeof host, "%.*s", slash - prefix, prefix); + + h = urldb_search_find(tree, host); + if (!h) { + int len = slash - prefix; + + if ((len == 1 && tolower(host[0]) != 'w') || + (len == 2 && (tolower(host[0]) != 'w' || + tolower(host[1]) != 'w')) || + (len >= 3 && + strncasecmp(host, "www", 3))) { + snprintf(buf, sizeof buf, "www.%s", host); + h = urldb_search_find( + search_trees[ST_DN + 'w' - 'a'], + buf); + if (!h) + return; + domain = buf; + } else + return; + } + + path = malloc(path_alloc); + if (!path) + return; + + path[0] = '/'; + path[1] = '\0'; + + urldb_iterate_partial_path(&h->paths, domain, slash + 1, + &path, &path_alloc, &path_used, callback); + + free(path); + } else { + int len = strlen(prefix); + + /* looking for hosts */ + if (!urldb_iterate_partial_host(tree, prefix, callback)) + return; + + if ((len == 1 && tolower(prefix[0]) != 'w') || + (len == 2 && (tolower(prefix[0]) != 'w' || + tolower(prefix[1]) != 'w')) || + (len >= 3 && + strncasecmp(prefix, "www", 3))) { + /* now look for www.prefix */ + snprintf(buf, sizeof buf, "www.%s", prefix); + if(!urldb_iterate_partial_host( + search_trees[ST_DN + 'w' - 'a'], + buf, callback)) + return; + } + } +} + +/** + * Partial host iterator (internal) + * + * \param root Root of (sub)tree to traverse + * \param prefix Prefix to match + * \param callback Callback function + * \return true to continue, false otherwise + */ +bool urldb_iterate_partial_host(struct search_node *root, const char *prefix, + bool (*callback)(const char *url)) +{ + int c; + const struct host_part *h; + char domain[256], *p, *end; + char *path; + int path_alloc = 64, path_used = 2; + + assert(root && prefix && callback); + + if (root == &empty) + return true; + + c = urldb_search_match_prefix(root->data, prefix); + + if (c > 0) + /* No match => look in left subtree */ + return urldb_iterate_partial_host(root->left, prefix, + callback); + else if (c < 0) + /* No match => look in right subtree */ + return urldb_iterate_partial_host(root->right, prefix, + callback); + else { + /* Match => iterate over l/r subtrees & process this node */ + if (!urldb_iterate_partial_host(root->left, prefix, + callback)) + return false; + + /* Generate host string */ + for (h = root->data, p = domain, + end = domain + sizeof domain; + h && h != &db_root && p < end; + h = h->parent) { + int written = snprintf(p, end - p, "%s%s", h->part, + (h->parent && h->parent->parent) ? "." : ""); + if (written < 0) + return false; + p += written; + } + + path = malloc(path_alloc); + if (!path) + return false; + + path[0] = '/'; + path[1] = '\0'; + + /* and extract all paths attached to this host */ + if (!urldb_iterate_entries_path(domain, &path, &path_alloc, + &path_used, &root->data->paths, callback)) { + free(path); + return false; + } + + free(path); + + if (!urldb_iterate_partial_host(root->right, prefix, + callback)) + return false; + } + + return true; +} + +/** + * Partial path iterator (internal) + * + * \param parent Root of (sub)tree to traverse + * \param host Host string + * \param prefix Prefix to match + * \param path The built path up to this point + * \param path_alloc Allocated size of path + * \param path_used Used size of path + * \param callback Callback function + * \return true to continue, false otherwise + */ +bool urldb_iterate_partial_path(const struct path_data *parent, + const char *host, const char *prefix, + char **path, int *path_alloc, int *path_used, + bool (*callback)(const char *url)) +{ + const struct path_data *p; + const char *slash, *end = prefix + strlen(prefix); + int pused = *path_used; + int c; + + slash = strchr(prefix, '/'); + if (!slash) + slash = end; + + if (slash == prefix && *prefix == '/') + /* Ignore "//" */ + return true; + + for (p = parent->children; p; p = p->next) { + if ((c = strncasecmp(p->segment, prefix, slash - prefix)) < 0) + /* didn't match, but may be more */ + continue; + else if (c > 0) + /* no more possible matches */ + break; + + /* prefix matches so far */ + int len = *path_used + strlen(p->segment) + 1; + if (*path_alloc < len) { + char *temp = realloc(*path, + (len > 64) ? len : *path_alloc + 64); + if (!temp) + return false; + *path = temp; + *path_alloc = (len > 64) ? len : *path_alloc + 64; + } + + strcat(*path, p->segment); + if (p->children) + strcat(*path, "/"); + else + len -= 1; + + *path_used = len; + + if (slash == end) { + /* we've run out of prefix, so all + * paths below this one match */ + if (!urldb_iterate_entries_path(host, path, + path_alloc, path_used, p, callback)) + return false; + } else { + /* more prefix to go => recurse */ + if (!urldb_iterate_partial_path(p, host, slash + 1, + path, path_alloc, path_used, + callback)) + return false; + } + + /* restore path to that from input for next child */ + *path_used = pused; + (*path)[pused - 1] = '\0'; + } + + return true; +} + +/** + * Iterate over all entries in database + * + * \param callback Function to callback for each entry + */ +void urldb_iterate_entries(bool (*callback)(const char *url)) +{ + int i; + + assert(callback); + + for (i = 0; i < NUM_SEARCH_TREES; i++) { + if (!urldb_iterate_entries_host(search_trees[i], + callback)) + break; + } +} + +/** + * Host data iterator (internal) + * + * \param parent Root of subtree to iterate over + * \param callback Callback function + * \return true to continue, false otherwise + */ +bool urldb_iterate_entries_host(struct search_node *parent, + bool (*callback)(const char *url)) +{ + char domain[256], *p, *end; + const struct host_part *h; + char *path; + int path_alloc = 64, path_used = 2; + + if (parent == &empty) + return true; + + if (!urldb_iterate_entries_host(parent->left, callback)) + return false; + + for (h = parent->data, p = domain, end = domain + sizeof domain; + h && h != &db_root && p < end; h = h->parent) { + int written = snprintf(p, end - p, "%s%s", h->part, + (h->parent && h->parent->parent) ? "." : ""); + if (written < 0) + return false; + p += written; + } + + path = malloc(path_alloc); + if (!path) + return false; + + path[0] = '/'; + path[1] = '\0'; + + if (!urldb_iterate_entries_path(domain, &path, &path_alloc, + &path_used, &parent->data->paths, callback)) { + free(path); + return false; + } + + free(path); + + if (!urldb_iterate_entries_host(parent->right, callback)) + return false; + + return true; +} + +/** + * Path data iterator (internal) + * + * \param host Host component of output URI + * \param path The built path up to this point + * \param path_alloc Allocated size of path + * \param path_used Used size of path + * \param parent Root of subtree to iterate over + * \param callback Callback function to call + * \return true to continue, false otherwise + */ +bool urldb_iterate_entries_path(const char *host, char **path, + int *path_alloc, int *path_used, + const struct path_data *parent, + bool (*callback)(const char *url)) +{ + const struct path_data *p; + int pused = *path_used; + + if (!parent->children) { + /* leaf node */ + int schemelen = strlen(parent->scheme); + int hostlen = strlen(host); + int prefixlen = schemelen + 3 /* :// */ + + hostlen + 6 /* :NNNNN */; + static char *url; + static int url_alloc; + int written; + + if (url_alloc < *path_used + prefixlen + 2) { + char *temp = realloc(url, *path_used + prefixlen + 2); + if (!temp) + return false; + url = temp; + url_alloc = *path_used + prefixlen + 2; + } + + written = sprintf(url, "%s://%s", parent->scheme, host); + if (written < 0) { + return false; + } + + if (parent->port) { + written = sprintf(url + schemelen + 3 + hostlen, + ":%d", parent->port); + if (written < 0) { + return false; + } + written += schemelen + 3 + hostlen; + } + + written = sprintf(url + written, "%s", *path); + if (written < 0) { + return false; + } + + /** \todo handle fragments? */ + + if (!callback(url)) + return false; + } + + for (p = parent->children; p; p = p->next) { + int len = *path_used + strlen(p->segment) + 1; + if (*path_alloc < len) { + char *temp = realloc(*path, + (len > 64) ? len : *path_alloc + 64); + if (!temp) + return false; + *path = temp; + *path_alloc = (len > 64) ? len : *path_alloc + 64; + } + + strcat(*path, p->segment); + if (p->children) { + strcat(*path, "/"); + } else { + len -= 1; + } + + *path_used = len; + + if (!urldb_iterate_entries_path(host, path, path_alloc, + path_used, p, callback)) + return false; + + /* restore path to its state on entry to this function */ + *path_used = pused; + (*path)[pused - 1] = '\0'; + } + + return true; +} + +/** + * Add a host node to the tree + * + * \param part Host segment to add (or whole IP address) (copied) + * \param parent Parent node to add to + * \return Pointer to added node, or NULL on memory exhaustion + */ +struct host_part *urldb_add_host_node(const char *part, + struct host_part *parent) +{ + struct host_part *d; + + assert(part && parent); + + d = calloc(1, sizeof(struct host_part)); + if (!d) + return NULL; + + d->part = strdup(part); + if (!d->part) { + free(d); + return NULL; + } + + d->next = parent->children; + if (parent->children) + parent->children->prev = d; + d->parent = parent; + parent->children = d; + + return d; +} + +/** + * Add a host to the database, creating any intermediate entries + * + * \param host Hostname to add + * \return Pointer to leaf node, or NULL on memory exhaustion + */ +struct host_part *urldb_add_host(const char *host) +{ + struct host_part *d = (struct host_part *) &db_root, *e; + struct search_node *s; + char buf[256]; /* 256 bytes is sufficient - domain names are + * limited to 255 chars. */ + char *part; + + assert(host); + + if (*(host) >= '0' && *(host) <= '9') { + /* Host is an IP, so simply add as TLD */ + + /* Check for existing entry */ + for (e = d->children; e; e = e->next) + if (strcasecmp(host, e->part) == 0) + /* found => return it */ + return e; + + d = urldb_add_host_node(host, d); + + s = urldb_search_insert(search_trees[ST_IP], d); + if (!s) { + /* failed */ + d = NULL; + } else { + search_trees[ST_IP] = s; + } + + return d; + } + + /* Copy host string, so we can corrupt it */ + strncpy(buf, host, sizeof buf); + buf[sizeof buf - 1] = '\0'; + + /* Process FQDN segments backwards */ + do { + part = strrchr(buf, '.'); + if (!part) { + /* last segment */ + /* Check for existing entry */ + for (e = d->children; e; e = e->next) + if (strcasecmp(buf, e->part) == 0) + break; + + if (e) { + d = e; + } else { + d = urldb_add_host_node(buf, d); + } + + /* And insert into search tree */ + if (d) { + if (isalpha(*buf)) { + struct search_node **r; + r = &search_trees[ + tolower(*buf) - 'a' + ST_DN]; + + s = urldb_search_insert(*r, d); + if (!s) { + /* failed */ + d = NULL; + } else { + *r = s; + } + } else { + d = NULL; + } + } + break; + } + + /* Check for existing entry */ + for (e = d->children; e; e = e->next) + if (strcasecmp(part + 1, e->part) == 0) + break; + + d = e ? e : urldb_add_host_node(part + 1, d); + if (!d) + break; + + *part = '\0'; + } while (1); + + return d; +} + +/** + * Add a path node to the tree + * + * \param scheme URL scheme associated with path (copied) + * \param port Port number on host associated with path + * \param segment Path segment to add (copied) + * \param fragment URL fragment (copied), or NULL + * \param parent Parent node to add to + * \return Pointer to added node, or NULL on memory exhaustion + */ +struct path_data *urldb_add_path_node(const char *scheme, unsigned int port, + const char *segment, const char *fragment, + struct path_data *parent) +{ + struct path_data *d, *e; + + assert(scheme && segment && parent); + + d = calloc(1, sizeof(struct path_data)); + if (!d) + return NULL; + + d->scheme = strdup(scheme); + if (!d->scheme) { + free(d); + return NULL; + } + + d->port = port; + + d->segment = strdup(segment); + if (!d->segment) { + free(d->scheme); + free(d); + return NULL; + } + + if (fragment) { + if (!urldb_add_path_fragment(d, fragment)) { + free(d->segment); + free(d->scheme); + free(d); + return NULL; + } + } + + for (e = parent->children; e; e = e->next) + if (strcmp(e->segment, d->segment) > 0) + break; + + if (e) { + d->prev = e->prev; + d->next = e; + if (e->prev) + e->prev->next = d; + else + parent->children = d; + e->prev = d; + } else if (!parent->children) { + d->prev = d->next = NULL; + parent->children = parent->last = d; + } else { + d->next = NULL; + d->prev = parent->last; + parent->last->next = d; + parent->last = d; + } + d->parent = parent; + + return d; +} + +/** + * Add a path to the database, creating any intermediate entries + * + * \param scheme URL scheme associated with path + * \param port Port number on host associated with path + * \param host Host tree node to attach to + * \param path Absolute path to add + * \param fragment URL fragment, or NULL + * \return Pointer to leaf node, or NULL on memory exhaustion + */ +struct path_data *urldb_add_path(const char *scheme, unsigned int port, + const struct host_part *host, const char *path, + const char *fragment) +{ + struct path_data *d, *e; + char *buf; + char *segment, *slash; + + assert(scheme && host && path); + + d = (struct path_data *) &host->paths; + + /* Copy path string, so we can corrupt it */ + buf = malloc(strlen(path) + 1); + if (!buf) + return NULL; + + /* + 1 to strip leading '/' */ + strcpy(buf, path + 1); + + segment = buf; + + /* Process path segments */ + do { + slash = strchr(segment, '/'); + if (!slash) { + /* last segment */ + /* look for existing entry */ + for (e = d->children; e; e = e->next) + if (strcmp(segment, e->segment) == 0 && + strcasecmp(scheme, + e->scheme) == 0 && + e->port == port) + break; + + d = e ? urldb_add_path_fragment(e, fragment) : + urldb_add_path_node(scheme, port, + segment, fragment, d); + break; + } + + *slash = '\0'; + + /* look for existing entry */ + for (e = d->children; e; e = e->next) + if (strcmp(segment, e->segment) == 0 && + strcasecmp(scheme, e->scheme) == 0 && + e->port == port) + break; + + d = e ? e : urldb_add_path_node(scheme, port, segment, + NULL, d); + if (!d) + break; + + segment = slash + 1; + } while (1); + + free(buf); + + return d; +} + +/** + * Fragment comparator callback for qsort + */ +int urldb_add_path_fragment_cmp(const void *a, const void *b) +{ + return strcasecmp(*((const char **) a), *((const char **) b)); +} + +/** + * Add a fragment to a path segment + * + * \param segment Path segment to add to + * \param fragment Fragment to add (copied), or NULL + * \return segment or NULL on memory exhaustion + */ +struct path_data *urldb_add_path_fragment(struct path_data *segment, + const char *fragment) +{ + char **temp; + + assert(segment); + + /* If no fragment, this function is a NOP + * This may seem strange, but it makes the rest + * of the code cleaner */ + if (!fragment) + return segment; + + temp = realloc(segment->fragment, + (segment->frag_cnt + 1) * sizeof(char *)); + if (!temp) + return NULL; + + segment->fragment = temp; + segment->fragment[segment->frag_cnt] = strdup(fragment); + if (!segment->fragment[segment->frag_cnt]) { + /* Don't free temp - it's now our buffer */ + return NULL; + } + + segment->frag_cnt++; + + /* We want fragments in alphabetical order, so sort them + * It may prove better to insert in alphabetical order instead */ + qsort(segment->fragment, segment->frag_cnt, sizeof (char *), + urldb_add_path_fragment_cmp); + + return segment; +} + +/** + * Find an URL in the database + * + * \param url The URL to find + * \return Pointer to path data, or NULL if not found + */ +struct path_data *urldb_find_url(const char *url) +{ + const struct host_part *h; + struct path_data *p; + struct search_node *tree; + char *host, *plq, *scheme, *colon; + unsigned short port; + url_func_result ret; + + assert(url); + + /** \todo consider file: URLs */ + + /* extract host */ + ret = url_host(url, &host); + if (ret != URL_FUNC_OK) + return NULL; + + /* extract path, leafname, query */ + ret = url_plq(url, &plq); + if (ret != URL_FUNC_OK) { + free(host); + return NULL; + } + + /* extract scheme */ + ret = url_scheme(url, &scheme); + if (ret != URL_FUNC_OK) { + free(plq); + free(host); + return NULL; + } + + colon = strrchr(host, ':'); + if (!colon) { + port = 0; + } else { + *colon = '\0'; + port = atoi(colon + 1); + } + + if (*host >= '0' && *host <= '9') + tree = search_trees[ST_IP]; + else if (isalpha(*host)) + tree = search_trees[ST_DN + tolower(*host) - 'a']; + else { + free(plq); + free(host); + free(scheme); + return NULL; + } + + h = urldb_search_find(tree, host); + if (!h) { + free(plq); + free(host); + free(scheme); + return NULL; + } + + p = urldb_match_path(&h->paths, plq, scheme, port); + + free(plq); + free(host); + free(scheme); + + return p; +} + +/** + * Match a path string + * + * \param parent Path (sub)tree to look in + * \param path The path to search for + * \param scheme The URL scheme associated with the path + * \param port The port associated with the path + * \return Pointer to path data or NULL if not found. + */ +struct path_data *urldb_match_path(const struct path_data *parent, + const char *path, const char *scheme, unsigned short port) +{ + struct path_data *p; + const char *slash; + + if (*path == '\0') + return (struct path_data *)parent; + + slash = strchr(path + 1, '/'); + if (!slash) + slash = path + strlen(path); + + for (p = parent->children; p; p = p->next) { + if (strncmp(p->segment, path + 1, slash - path - 1) == 0 && + strcmp(p->scheme, scheme) == 0 && + p->port == port) + break; + } + + if (p) { + return urldb_match_path(p, slash, scheme, port); + } + + return NULL; +} + +/** + * Dump URL database to stderr + */ +void urldb_dump(void) +{ + int i; + + urldb_dump_hosts(&db_root); + + for (i = 0; i != NUM_SEARCH_TREES; i++) + urldb_dump_search(search_trees[i], 0); +} + +/** + * Dump URL database hosts to stderr + * + * \param parent Parent node of tree to dump + */ +void urldb_dump_hosts(struct host_part *parent) +{ + struct host_part *h; + + if (parent->part) { + LOG(("%s", parent->part)); + + LOG(("\t%s invalid SSL certs", + parent->permit_invalid_certs ? "Permits" : "Denies")); + } + + /* Dump path data */ + urldb_dump_paths(&parent->paths); + + /* and recurse */ + for (h = parent->children; h; h = h->next) + urldb_dump_hosts(h); +} + +/** + * Dump URL database paths to stderr + * + * \param parent Parent node of tree to dump + */ +void urldb_dump_paths(struct path_data *parent) +{ + struct path_data *p; + unsigned int i; + + if (parent->segment) { + LOG(("\t%s : %u", parent->scheme, parent->port)); + + LOG(("\t\t'%s'", parent->segment)); + + for (i = 0; i != parent->frag_cnt; i++) + LOG(("\t\t\t#%s", parent->fragment[i])); + } + + /* and recurse */ + for (p = parent->children; p; p = p->next) + urldb_dump_paths(p); +} + +/** + * Dump search tree + * + * \param parent Parent node of tree to dump + * \param depth Tree depth + */ +void urldb_dump_search(struct search_node *parent, int depth) +{ + const struct host_part *h; + int i; + + if (parent == &empty) + return; + + urldb_dump_search(parent->left, depth + 1); + + for (i = 0; i != depth; i++) + fputc(' ', stderr); + + for (h = parent->data; h; h = h->parent) { + fprintf(stderr, "%s", h->part); + if (h->parent && h->parent->parent) + fputc('.', stderr); + } + + fputc('\n', stderr); + + urldb_dump_search(parent->right, depth + 1); +} + +/** + * Insert a node into the search tree + * + * \param root Root of tree to insert into + * \param data User data to insert + * \return Pointer to updated root, or NULL if failed + */ +struct search_node *urldb_search_insert(struct search_node *root, + const struct host_part *data) +{ + struct search_node *n; + + assert(root && data); + + n = malloc(sizeof(struct search_node)); + if (!n) + return NULL; + + n->level = 1; + n->data = data; + n->left = n->right = ∅ + + root = urldb_search_insert_internal(root, n); + + return root; +} + +/** + * Insert node into search tree + * + * \param root Root of (sub)tree to insert into + * \param n Node to insert + * \return Pointer to updated root + */ +struct search_node *urldb_search_insert_internal(struct search_node *root, + struct search_node *n) +{ + assert(root && n); + + if (root == &empty) { + root = n; + } else { + int c = urldb_search_match_host(root->data, n->data); + + if (c > 0) { + root->left = urldb_search_insert_internal( + root->left, n); + } else if (c < 0) { + root->right = urldb_search_insert_internal( + root->right, n); + } else { + /* exact match */ + free(n); + return root; + } + + root = urldb_search_skew(root); + root = urldb_search_split(root); + } + + return root; +} + +/** + * Delete a node from a search tree + * + * \param root Tree to remove from + * \param data Data to delete + * \return Updated root of tree + */ +struct search_node *urldb_search_remove(struct search_node *root, + const struct host_part *data) +{ + static struct search_node *last, *deleted; + + assert(root && data); + + if (root != &empty) { + int c = urldb_search_match_host(root->data, data); + + last = root; + if (c > 0) { + root->left = urldb_search_remove(root->left, data); + } else { + deleted = root; + root->right = urldb_search_remove(root->right, data); + } + } + + if (root == last) { + if (deleted != &empty && + urldb_search_match_host(deleted->data, + data) == 0) { + deleted->data = last->data; + deleted = ∅ + root = root->right; + } + } else { + if (root->left->level < root->level - 1 || + root->right->level < root->level - 1) { + if (root->right->level > --root->level) + root->right->level = root->level; + + root = urldb_search_skew(root); + root->right = urldb_search_skew(root->right); + root->right->right = + urldb_search_skew(root->right->right); + root = urldb_search_split(root); + root->right = urldb_search_split(root->right); + } + } + + return root; +} + +/** + * Find a node in a search tree + * + * \param root Tree to look in + * \param host Host to find + * \return Pointer to host tree node, or NULL if not found + */ +const struct host_part *urldb_search_find(struct search_node *root, + const char *host) +{ + int c; + + assert(root && host); + + if (root == &empty) { + return NULL; + } + + c = urldb_search_match_string(root->data, host); + + if (c > 0) + return urldb_search_find(root->left, host); + else if (c < 0) + return urldb_search_find(root->right, host); + else + return root->data; +} + +/** + * Compare a pair of host_parts + * + * \param a + * \param b + * \return 0 if match, non-zero, otherwise + */ +int urldb_search_match_host(const struct host_part *a, + const struct host_part *b) +{ + int ret; + + assert(a && b); + + /* traverse up tree to root, comparing parts as we go. */ + for (; a && b; a = a->parent, b = b->parent) + if ((ret = strcasecmp(a->part, b->part)) != 0) + /* They differ => return the difference here */ + return ret; + + /* If we get here then either: + * a) The path lengths differ + * or b) The hosts are identical + */ + if (a && !b) + /* len(a) > len(b) */ + return 1; + else if (!a && b) + /* len(a) < len(b) */ + return -1; + + /* identical */ + return 0; +} + +/** + * Compare host_part with a string + * + * \param a + * \param b + * \return 0 if match, non-zero, otherwise + */ +int urldb_search_match_string(const struct host_part *a, + const char *b) +{ + const char *end, *dot; + int plen, ret; + + assert(a && b); + + if (*b >= '0' && *b <= '9') { + /* IP address */ + return strcasecmp(a->part, b); + } + + end = b + strlen(b); + + while (b < end && a) { + dot = strchr(b, '.'); + if (!dot) { + /* last segment */ + dot = end; + } + + /* Compare strings (length limited) */ + if ((ret = strncasecmp(a->part, b, dot - b)) != 0) + /* didn't match => return difference */ + return ret; + + /* The strings matched, now check that the lengths do, too */ + plen = strlen(a->part); + + if (plen > dot - b) + /* len(a) > len(b) */ + return 1; + else if (plen < dot - b) + /* len(a) < len(b) */ + return -1; + + b = dot + 1; + a = a->parent; + } + + /* If we get here then either: + * a) The path lengths differ + * or b) The hosts are identical + */ + if (a && a != &db_root && b >= end) + /* len(a) > len(b) */ + return 1; + else if (!a && b < end) + /* len(a) < len(b) */ + return -1; + + /* Identical */ + return 0; +} + +/** + * Compare host_part with prefix + * + * \param a + * \param b + * \return 0 if match, non-zero, otherwise + */ +int urldb_search_match_prefix(const struct host_part *a, + const char *b) +{ + const char *end, *dot; + int plen, ret; + + assert(a && b); + + if (*b >= '0' && *b <= '9') { + /* IP address */ + return strncasecmp(a->part, b, strlen(b)); + } + + end = b + strlen(b); + + while (b < end && a) { + dot = strchr(b, '.'); + if (!dot) { + /* last segment */ + dot = end; + } + + /* Compare strings (length limited) */ + if ((ret = strncasecmp(a->part, b, dot - b)) != 0) + /* didn't match => return difference */ + return ret; + + /* The strings matched */ + if (dot < end) { + /* Consider segment lengths only in the case + * where the prefix contains segments */ + plen = strlen(a->part); + if (plen > dot - b) + /* len(a) > len(b) */ + return 1; + else if (plen < dot - b) + /* len(a) < len(b) */ + return -1; + } + + b = dot + 1; + a = a->parent; + } + + /* If we get here then either: + * a) The path lengths differ + * or b) The hosts are identical + */ + if (a && a != &db_root && b >= end) + /* len(a) > len(b) => prefix matches */ + return 0; + else if (!a && b < end) + /* len(a) < len(b) => prefix does not match */ + return -1; + + /* Identical */ + return 0; +} + +/** + * Rotate a subtree right + * + * \param root Root of subtree to rotate + * \return new root of subtree + */ +struct search_node *urldb_search_skew(struct search_node *root) +{ + struct search_node *temp; + + assert(root); + + if (root->left->level == root->level) { + temp = root->left; + root->left = temp->right; + temp->right = root; + root = temp; + } + + return root; +} + +/** + * Rotate a node left, increasing the parent's level + * + * \param root Root of subtree to rotate + * \return New root of subtree + */ +struct search_node *urldb_search_split(struct search_node *root) +{ + struct search_node *temp; + + assert(root); + + if (root->right->right->level == root->level) { + temp = root->right; + root->right = temp->left; + temp->left = root; + root = temp; + + root->level++; + } + + return root; +} + +#ifdef TEST +int main(void) +{ + struct host_part *h; + struct path_data *p; + + h = urldb_add_host("127.0.0.1"); + if (!h) { + LOG(("failed adding host")); + return 1; + } + + /* Get host entry */ + h = urldb_add_host("netsurf.strcprstskrzkrk.co.uk"); + if (!h) { + LOG(("failed adding host")); + return 1; + } + + /* Get path entry */ + p = urldb_add_path("http", 80, h, "/path/to/resource.htm?a=b", "zz"); + if (!p) { + LOG(("failed adding path")); + return 1; + } + + p = urldb_add_path("http", 80, h, "/path/to/resource.htm?a=b", "aa"); + if (!p) { + LOG(("failed adding path")); + return 1; + } + + p = urldb_add_path("http", 80, h, "/path/to/resource.htm?a=b", "yy"); + if (!p) { + LOG(("failed adding path")); + return 1; + } + + urldb_dump(); + + return 0; +} +#endif |