summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--content/fetch.c17
-rw-r--r--content/urldb.c990
-rw-r--r--content/urldb.h9
-rw-r--r--desktop/netsurf.c2
-rw-r--r--riscos/gui.c2
-rw-r--r--utils/url.c52
-rw-r--r--utils/url.h1
7 files changed, 1063 insertions, 10 deletions
diff --git a/content/fetch.c b/content/fetch.c
index a504e4b53..619df0dfb 100644
--- a/content/fetch.c
+++ b/content/fetch.c
@@ -70,6 +70,7 @@ struct fetch {
char *host; /**< Host part of URL. */
char *location; /**< Response Location header, or 0. */
unsigned long content_length; /**< Response Content-Length, or 0. */
+ char *cookie_string; /**< Cookie string for this fetch */
char *realm; /**< HTTP Auth Realm */
char *post_urlenc; /**< Url encoded POST string, or 0. */
struct curl_httppost *post_multipart; /**< Multipart post data, or 0. */
@@ -356,6 +357,7 @@ struct fetch * fetch_start(char *url, char *referer,
fetch->host = host;
fetch->location = 0;
fetch->content_length = 0;
+ fetch->cookie_string = 0;
fetch->realm = 0;
fetch->post_urlenc = 0;
fetch->post_multipart = 0;
@@ -635,13 +637,9 @@ CURLcode fetch_set_options(struct fetch *f)
SETOPT(CURLOPT_HTTPGET, 1L);
}
if (f->cookies) {
- if (option_cookie_file)
- SETOPT(CURLOPT_COOKIEFILE, option_cookie_file);
- if (option_cookie_jar)
- SETOPT(CURLOPT_COOKIEJAR, option_cookie_jar);
- } else {
- SETOPT(CURLOPT_COOKIEFILE, 0);
- SETOPT(CURLOPT_COOKIEJAR, 0);
+ f->cookie_string = urldb_get_cookie(f->url, f->referer);
+ if (f->cookie_string)
+ SETOPT(CURLOPT_COOKIE, f->cookie_string);
}
#ifdef WITH_AUTH
if ((auth = urldb_get_auth_details(f->url)) != NULL) {
@@ -771,6 +769,7 @@ void fetch_free(struct fetch *f)
free(f->host);
free(f->referer);
free(f->location);
+ free(f->cookie_string);
free(f->realm);
if (f->headers)
curl_slist_free_all(f->headers);
@@ -1175,6 +1174,10 @@ size_t fetch_curl_header(char *data, size_t size, size_t nmemb,
f->cachedata.last_modified =
curl_getdate(&data[i], NULL);
}
+ } else if (11 < size && strncasecmp(data, "Set-Cookie:", 11) == 0) {
+ /* extract Set-Cookie header */
+ SKIP_ST(11);
+ urldb_set_cookie(&data[i], f->url);
}
return size;
diff --git a/content/urldb.c b/content/urldb.c
index e72df512c..94e0c963a 100644
--- a/content/urldb.c
+++ b/content/urldb.c
@@ -77,6 +77,9 @@
#include <stdlib.h>
#include <string.h>
#include <time.h>
+
+#include <curl/curl.h>
+
#include "netsurf/image/bitmap.h"
#include "netsurf/content/urldb.h"
#include "netsurf/desktop/options.h"
@@ -92,7 +95,11 @@ struct cookie {
char *name; /**< Cookie name */
char *value; /**< Cookie value */
char *comment; /**< Cookie comment */
- time_t expires; /**< Expiry timestamp, or 0 for session */
+ bool domain_from_set; /**< Domain came from Set-Cookie: header */
+ char *domain; /**< Domain */
+ bool path_from_set; /**< Path came from Set-Cookie: header */
+ char *path; /**< Path */
+ time_t expires; /**< Expiry timestamp, or 1 for session */
time_t last_used; /**< Last used time */
bool secure; /**< Only send for HTTPS requests */
enum { COOKIE_NETSCAPE = 0,
@@ -102,6 +109,7 @@ struct cookie {
bool no_destroy; /**< Never destroy this cookie,
* unless it's expired */
+ struct cookie *prev; /**< Previous in list */
struct cookie *next; /**< Next in list */
};
@@ -230,6 +238,17 @@ static int urldb_search_match_string(const struct host_part *a,
static int urldb_search_match_prefix(const struct host_part *a,
const char *b);
+/* Cookies */
+static struct cookie *urldb_parse_cookie(const char *url,
+ const char *cookie);
+static bool urldb_insert_cookie(struct cookie *c, const char *scheme,
+ const char *url);
+static void urldb_free_cookie(struct cookie *c);
+static bool urldb_concat_cookie(struct cookie *c, int *used,
+ int *alloc, char **buf);
+static void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent);
+static void urldb_save_cookie_paths(FILE *fp, struct path_data *parent);
+
/** Root database handle */
static struct host_part db_root;
@@ -245,6 +264,27 @@ static struct search_node *search_trees[NUM_SEARCH_TREES] = {
&empty, &empty, &empty
};
+#define COOKIE_FILE_VERSION 100
+#define URL_FILE_VERSION 106
+
+regex_t expires_re;
+
+/**
+ * Initialise URL database code
+ *
+ * This should be called before any other urldb functions
+ */
+void urldb_init(void)
+{
+ regcomp_wrapper(&expires_re, "[a-zA-Z]{3},[[:space:]]" // "Wdy, "
+ "[0-9]{2}[[:space:]-]" // "DD[ -]"
+ "[a-zA-Z]{3}[[:space:]-]" // "MMM[ -]"
+ "[0-9]{4}[[:space:]]" // "YYYY "
+ "[0-9]{2}(:[0-9]{2}){2}" // "HH:MM:SS"
+ "[[:space:]]GMT", // " GMT"
+ REG_EXTENDED);
+}
+
/**
* Import an URL database from file, replacing any existing database
*
@@ -279,7 +319,7 @@ void urldb_load(const char *filename)
LOG(("Unsupported URL file version."));
return;
}
- if (version > 106) {
+ if (version > URL_FILE_VERSION) {
LOG(("Unknown URL file version."));
return;
}
@@ -475,7 +515,7 @@ void urldb_save(const char *filename)
}
/* file format version number */
- fprintf(fp, "106\n");
+ fprintf(fp, "%d\n", URL_FILE_VERSION);
for (i = 0; i != NUM_SEARCH_TREES; i++) {
urldb_save_search_tree(search_trees[i], fp);
@@ -2242,6 +2282,950 @@ struct search_node *urldb_search_split(struct search_node *root)
return root;
}
+/**
+ * Retrieve cookies for an URL
+ *
+ * \param url URL being fetched
+ * \param referer Referring resource, or NULL
+ * \return Cookies string for libcurl (on heap), or NULL on error/no cookies
+ *
+ * \todo Handle unvalidated fetches
+ */
+char *urldb_get_cookie(const char *url, const char *referer)
+{
+ const struct path_data *p, *q;
+ const struct host_part *h;
+ struct cookie *c;
+ int count = 0, version = COOKIE_RFC2965;
+ int ret_alloc = 4096, ret_used = 1;
+ char *path;
+ char *ret;
+ char *scheme;
+ time_t now;
+ url_func_result res;
+
+ assert(url);
+
+// LOG(("%s : %s", url, referer));
+
+// if (referer)
+// /* No unvalidated fetches for now */
+// return NULL;
+
+ urldb_add_url(url);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return NULL;
+
+ scheme = p->scheme;
+
+ ret = malloc(ret_alloc);
+ if (!ret)
+ return NULL;
+
+ ret[0] = '\0';
+
+ res = url_path(url, &path);
+ if (res != URL_FUNC_OK) {
+ free(ret);
+ return NULL;
+ }
+
+ now = time(NULL);
+
+ if (p->prev) {
+ for (q = p->prev; q && q->prev; q = q->prev)
+ ; /* do nothing */
+ } else {
+ q = p;
+ }
+
+ for (; q; q = q->next) {
+ if (strcmp(q->segment, p->segment))
+ continue;
+
+ /* Consider all cookies associated with this exact path */
+ for (c = q->cookies; c; c = c->next) {
+ if (c->expires != 1 && c->expires < now)
+ /* cookie has expired => ignore */
+ continue;
+
+ if (c->secure && strcasecmp(q->scheme, "https"))
+ /* secure cookie for insecure host. ignore */
+ continue;
+
+ if (!urldb_concat_cookie(c, &ret_used,
+ &ret_alloc, &ret)) {
+ free(path);
+ free(ret);
+ return NULL;
+ }
+
+ if (c->version < (unsigned int)version)
+ version = c->version;
+
+ c->last_used = now;
+
+ count++;
+ }
+ }
+
+// LOG(("%s", ret));
+
+ if (strlen(p->segment) == 0)
+ /* We're a directory; skip parent */
+ p = p->parent->parent;
+
+ /* Now consider cookies whose paths prefix-match ours */
+ for (; p; p = p->parent) {
+ /* Find parent directory's path entry(ies) */
+ /* There are potentially multiple due to differing schemes */
+ for (q = p->children; q; q = q->next) {
+ if (strlen(q->segment) > 0)
+ continue;
+
+ for (c = q->cookies; c; c = c->next) {
+// LOG(("%p: %s=%s", c, c->name, c->value));
+ if (c->expires != 1 && c->expires < now)
+ /* cookie has expired => ignore */
+ continue;
+
+ if (c->secure && strcasecmp(
+ q->scheme, "https"))
+ /* Secure cookie for insecure server
+ * => ignore */
+ continue;
+
+ if (!urldb_concat_cookie(c, &ret_used,
+ &ret_alloc, &ret)) {
+ free(path);
+ free(ret);
+ return NULL;
+ }
+
+ if (c->version < (unsigned int) version)
+ version = c->version;
+
+ c->last_used = now;
+
+ count++;
+ }
+ }
+
+ if (!p->parent) {
+ /* No parent, so bail here. This can't go in the
+ * loop exit condition as we want to process the
+ * top-level node, too */
+ break;
+ }
+ }
+
+// LOG(("%s", ret));
+
+ /* Finally consider domain cookies for hosts which domain match ours */
+ for (h = (const struct host_part *)p; h && h != &db_root;
+ h = h->parent) {
+ for (c = h->paths.cookies; c; c = c->next) {
+ if (c->expires != 1 && c->expires < now)
+ /* cookie has expired => ignore */
+ continue;
+
+ /* Ensure cookie path is a prefix of the resource */
+ if (strncmp(c->path, path, strlen(c->path)) != 0)
+ /* paths don't match => ignore */
+ continue;
+
+ if (c->secure && strcasecmp(scheme, "https"))
+ /* secure cookie for insecure host. ignore */
+ continue;
+
+ if (!urldb_concat_cookie(c, &ret_used, &ret_alloc,
+ &ret)) {
+ free(path);
+ free(ret);
+ return NULL;
+ }
+
+ if (c->version < (unsigned int)version)
+ version = c->version;
+
+ c->last_used = now;
+
+ count++;
+ }
+ }
+
+// LOG(("%s", ret));
+
+ if (count == 0) {
+ /* No cookies found */
+ free(path);
+ free(ret);
+ return NULL;
+ }
+
+ /* and build output string */
+ {
+ char *temp;
+ if (version > 0)
+ temp = malloc(12 + ret_used);
+ else
+ temp = malloc(ret_used);
+ if (!temp) {
+ free(path);
+ free(ret);
+ return NULL;
+ }
+
+ if (version > 0)
+ sprintf(temp, "$Version=%d%s", version, ret);
+ else {
+ /* Old-style cookies => no version & skip "; " */
+ sprintf(temp, "%s", ret + 2);
+ }
+
+ free(path);
+ free(ret);
+ ret = temp;
+ }
+
+ return ret;
+}
+
+/**
+ * Parse Set-Cookie header and insert cookie(s) into database
+ *
+ * \param header Header to parse, with Set-Cookie: stripped
+ * \param url URL being fetched
+ * \return true on success, false otherwise
+ */
+bool urldb_set_cookie(const char *header, const char *url)
+{
+ char cookie[8192];
+ const char *cur = header, *comma, *end;
+ char *path, *host, *scheme, *urlt;
+ url_func_result res;
+
+ assert(url && header);
+
+// LOG(("'%s' : '%s'", url, header));
+
+ /* strip fragment */
+ urlt = strdup(url);
+ scheme = strchr(urlt, '#');
+ if (scheme)
+ *scheme = '\0';
+
+ res = url_scheme(url, &scheme);
+ if (res != URL_FUNC_OK) {
+ free(urlt);
+ return false;
+ }
+
+ res = url_path(url, &path);
+ if (res != URL_FUNC_OK) {
+ free(scheme);
+ free(urlt);
+ return false;
+ }
+
+ res = url_host(url, &host);
+ if (res != URL_FUNC_OK) {
+ free(path);
+ free(scheme);
+ free(urlt);
+ return false;
+ }
+
+ end = cur + strlen(cur) - 2 /* Trailing CRLF */;
+
+ /* Find comma, if any */
+ comma = strchr(cur, ',');
+ if (comma) {
+ /* Check for Expires avpair: Wdy, DD-Mon-YYYY HH:MM:SS GMT */
+ /* Date parts of the form "DD Mon YYYY" have been seen in
+ * the wild, so we accept them too even though they're not
+ * strictly correct - it's more important to match something
+ * that looks like an Expires avpair than be strict here (as
+ * we're simply looking for the end of the cookie
+ * declaration) */
+ if (regexec(&expires_re, comma - 3, 0, NULL, 0) == 0) {
+ /* Part of Expires avpair => look for next comma */
+ comma = strchr(comma + 1, ',');
+ }
+ }
+
+ if (!comma) {/* Yes, if; not else - Expires check may modify comma */
+ /* No comma => 1 cookie in this header */
+ comma = end;
+ }
+
+ do {
+ struct cookie *c;
+ char *dot;
+
+ snprintf(cookie, sizeof cookie, "%.*s", comma - cur, cur);
+
+ c = urldb_parse_cookie(url, cookie);
+ if (!c) {
+ /* failed => stop parsing */
+ goto error;
+ }
+
+ /* validate cookie */
+
+ /* Cookie path must be a prefix of URL path */
+ if (strncmp(c->path, path, strlen(c->path)) != 0 ||
+ strlen(c->path) > strlen(path)) {
+ urldb_free_cookie(c);
+ goto error;
+ }
+
+ /* Cookie domain must contain embedded dots */
+ dot = strchr(c->domain + 1, '.');
+ if (!dot || *(dot + 1) == '\0') {
+ /* no embedded dots */
+ urldb_free_cookie(c);
+ goto error;
+ }
+
+ /* Domain match fetch host with cookie domain */
+ if (strcasecmp(host, c->domain) != 0) {
+ int hlen, dlen;
+
+ if (host[0] >= '0' && host[0] <= '9') {
+ /* IP address, so no partial match */
+ urldb_free_cookie(c);
+ goto error;
+ }
+
+ hlen = strlen(host);
+ dlen = strlen(c->domain);
+
+ if (hlen <= dlen) {
+ /* Partial match not possible */
+ urldb_free_cookie(c);
+ goto error;
+ }
+
+ if (strcasecmp(host + (hlen - dlen), c->domain)) {
+ urldb_free_cookie(c);
+ goto error;
+ }
+
+ /* Ensure H contains no dots */
+ for (int i = 0; i < (hlen - dlen); i++)
+ if (host[i] == '.') {
+ urldb_free_cookie(c);
+ goto error;
+ }
+ }
+
+ /* Now insert into database */
+ if (!urldb_insert_cookie(c, scheme, urlt))
+ goto error;
+
+ cur = comma + 1;
+ if (cur < end) {
+ comma = strchr(cur, ',');
+ if (comma) {
+ /* Check if it's an Expires avpair */
+ if (regexec(&expires_re, comma - 3, 0,
+ NULL, 0) == 0) {
+ /* Part of Expires avpair =>
+ * look for next comma */
+ comma = strchr(comma + 1, ',');
+ }
+ }
+ if (!comma)
+ comma = end;
+ }
+ } while (comma && cur < end);
+
+ free(host);
+ free(path);
+ free(scheme);
+ free(urlt);
+
+ return true;
+
+error:
+ free(host);
+ free(path);
+ free(scheme);
+ free(urlt);
+
+ return false;
+}
+
+/**
+ * Parse a cookie
+ *
+ * \param url URL being fetched
+ * \param cookie Cookie string
+ * \return Pointer to cookie structure (on heap, caller frees) or NULL
+ */
+struct cookie *urldb_parse_cookie(const char *url, const char *cookie)
+{
+ struct cookie *c;
+ char name[1024], value[4096];
+ const char *cur = cookie, *semi, *end;
+ time_t max_age = 0, expires = 0;
+ bool had_max_age = false, had_expires = false;
+ url_func_result res;
+
+ assert(url && cookie);
+
+ c = calloc(1, sizeof(struct cookie));
+ if (!c)
+ return NULL;
+
+ end = cur + strlen(cur);
+
+ /* Find semicolon */
+ semi = strchr(cur, ';');
+ if (!semi)
+ semi = end;
+
+ /* process name-value pairs */
+ do {
+ char *equals = strchr(cur, '=');
+ int vlen;
+
+ name[0] = value[0] = '\0';
+
+ if (equals && equals < semi) {
+ char *n, *v;
+ /* name = value */
+ if (sscanf(cur, "%1023[^=]=%4095[^;]",
+ name, value) != 2)
+ break;
+
+ /* Strip whitespace from start of name */
+ for (n = name; *n; n++) {
+ if (*n != ' ' && *n != '\t')
+ break;
+ }
+
+ /* Strip whitespace from end of name */
+ for (vlen = strlen(name); vlen; vlen--) {
+ if (name[vlen] == ' ' || name[vlen] == '\t')
+ name[vlen] = '\0';
+ else
+ break;
+ }
+
+ /* Strip whitespace from start of value */
+ for (v = value; *v; v++) {
+ if (*v != ' ' && *v != '\t')
+ break;
+ }
+ /* Strip quote from start of value */
+ if (*v == '"')
+ v++;
+
+ /* Strip whitespace from end of value */
+ for (vlen = strlen(value); vlen; vlen--) {
+ if (value[vlen] == ' ' ||
+ value[vlen] == '\t')
+ value[vlen] = '\0';
+ else
+ break;
+ }
+ /* Strip quote from end of value */
+ if (value[vlen] == '"')
+ value[vlen] = '\0';
+
+ if (!c->comment &&
+ strcasecmp(n, "Comment") == 0) {
+ c->comment = strdup(v);
+ if (!c->comment)
+ break;
+ } else if (!c->domain &&
+ strcasecmp(n, "Domain") == 0) {
+ if (v[0] == '.') {
+ /* Domain must start with a dot */
+ c->domain_from_set = true;
+ c->domain = strdup(v);
+ if (!c->domain)
+ break;
+ }
+ } else if (strcasecmp(n, "Max-Age") == 0) {
+ int temp = atoi(v);
+ had_max_age = true;
+ if (temp == 0)
+ /* Special case - 0 means delete */
+ max_age = 0;
+ else
+ max_age = time(NULL) + temp;
+ } else if (!c->path &&
+ strcasecmp(n, "Path") == 0) {
+ c->path_from_set = true;
+ c->path = strdup(v);
+ if (!c->path)
+ break;
+ } else if (strcasecmp(n, "Version") == 0) {
+ c->version = atoi(v);
+ } else if (strcasecmp(n, "Expires") == 0) {
+ had_expires = true;
+ expires = curl_getdate(v, NULL);
+ } else if (!c->name) {
+ c->name = strdup(n);
+ c->value = strdup(v);
+ if (!c->name || !c->value)
+ break;
+ }
+ } else {
+ char *n;
+
+ /* name */
+ if (sscanf(cur, "%1023[^;] ", name) != 1)
+ break;
+
+ /* Strip whitespace from start of name */
+ for (n = name; *n; n++) {
+ if (*n != ' ' && *n != '\t')
+ break;
+ }
+
+ /* Strip whitespace from end of name */
+ for (vlen = strlen(name); vlen; vlen--) {
+ if (name[vlen] == ' ' || name[vlen] == '\t')
+ name[vlen] = '\0';
+ else
+ break;
+ }
+
+ if (strcasecmp(n, "Secure") == 0)
+ c->secure = true;
+ }
+
+ cur = semi + 1;
+ if (cur < end) {
+ semi = strchr(cur, ';');
+ if (!semi)
+ semi = end;
+ }
+ } while(semi && cur < end);
+
+ if (cur < end) {
+ /* parsing failed */
+ urldb_free_cookie(c);
+ return NULL;
+ }
+
+ /* Now fix-up default values */
+ if (!c->domain) {
+ res = url_host(url, &c->domain);
+ if (res != URL_FUNC_OK) {
+ urldb_free_cookie(c);
+ return NULL;
+ }
+ }
+
+ if (!c->path) {
+ res = url_path(url, &c->path);
+ if (res != URL_FUNC_OK) {
+ urldb_free_cookie(c);
+ return NULL;
+ }
+ }
+
+ if (had_max_age && had_expires) {
+ /* Max age takes precedence iff version 1 or later */
+ c->expires =
+ c->version == COOKIE_NETSCAPE ? expires : max_age;
+ } else if (had_max_age) {
+ c->expires = max_age;
+ } else if (had_expires) {
+ c->expires = expires;
+ } else
+ c->expires = 1;
+
+ return c;
+}
+
+/**
+ * Insert a cookie into the database
+ *
+ * \param c The cookie to insert
+ * \param scheme URL scheme associated with cookie path
+ * \param url URL (sans fragment) associated with cookie
+ * \return true on success, false on memory exhaustion (c will be freed)
+ */
+bool urldb_insert_cookie(struct cookie *c, const char *scheme,
+ const char *url)
+{
+ struct cookie *d;
+ const struct host_part *h;
+ struct path_data *p;
+
+ assert(c && scheme && url);
+
+ if (c->domain[0] == '.') {
+ h = urldb_search_find(
+ search_trees[tolower(c->domain[1]) - 'a' + ST_DN],
+ c->domain + 1);
+ if (!h) {
+ h = urldb_add_host(c->domain + 1);
+ if (!h) {
+ urldb_free_cookie(c);
+ return false;
+ }
+ }
+
+ p = &h->paths;
+ } else {
+ if (c->domain[0] >= '0' && c->domain[0] <= '9')
+ h = urldb_search_find(search_trees[ST_IP], c->domain);
+ else
+ h = urldb_search_find(search_trees[
+ tolower(c->domain[0]) - 'a' + ST_DN],
+ c->domain);
+
+ if (!h) {
+ h = urldb_add_host(c->domain);
+ if (!h) {
+ urldb_free_cookie(c);
+ return false;
+ }
+ }
+
+ /* find path */
+ p = urldb_add_path(scheme, 0, h,
+ c->path, NULL, url);
+ if (!p) {
+ urldb_free_cookie(c);
+ return false;
+ }
+ }
+
+ /* add cookie */
+ for (d = p->cookies; d; d = d->next) {
+ if (!strcmp(d->domain, c->domain) &&
+ !strcmp(d->path, c->path) &&
+ !strcmp(d->name, c->name))
+ break;
+ }
+
+ if (d) {
+ if (c->expires == 0) {
+ /* remove cookie */
+ if (d->next)
+ d->next->prev = d->prev;
+ if (d->prev)
+ d->prev->next = d->next;
+ else
+ p->cookies = d->next;
+ urldb_free_cookie(d);
+ urldb_free_cookie(c);
+ } else {
+ /* replace d with c */
+ c->prev = d->prev;
+ c->next = d->next;
+ if (c->next)
+ c->next->prev = c;
+ if (c->prev)
+ c->prev->next = c;
+ else
+ p->cookies = c;
+ urldb_free_cookie(d);
+// LOG(("%p: %s=%s", c, c->name, c->value));
+ }
+ } else {
+ c->prev = NULL;
+ c->next = p->cookies;
+ if (p->cookies)
+ p->cookies->prev = c;
+ p->cookies = c;
+// LOG(("%p: %s=%s", c, c->name, c->value));
+ }
+
+ return true;
+}
+
+/**
+ * Free a cookie
+ *
+ * \param c The cookie to free
+ */
+void urldb_free_cookie(struct cookie *c)
+{
+ assert(c);
+
+ free(c->comment);
+ free(c->domain);
+ free(c->path);
+ free(c->name);
+ free(c->value);
+ free(c);
+}
+
+/**
+ * Concatenate a cookie into the provided buffer
+ *
+ * \param c Cookie to concatenate
+ * \param used Pointer to amount of buffer used (updated)
+ * \param alloc Pointer to allocated size of buffer (updated)
+ * \param buf Pointer to Pointer to buffer (updated)
+ * \return true on success, false on memory exhaustion
+ */
+bool urldb_concat_cookie(struct cookie *c, int *used, int *alloc, char **buf)
+{
+ int clen;
+
+ assert(c && used && alloc && buf && *buf);
+
+ clen = 2 + strlen(c->name) + 1 + strlen(c->value) +
+ (c->path_from_set ?
+ 8 + strlen(c->path) : 0) +
+ (c->domain_from_set ?
+ 10 + strlen(c->domain) : 0);
+
+ if (*used + clen >= *alloc) {
+ char *temp = realloc(*buf, *alloc + 4096);
+ if (!temp) {
+ return false;
+ }
+ *buf = temp;
+ *alloc += 4096;
+ }
+
+ /** \todo Quote value strings iff version > 0 */
+ sprintf(*buf + *used - 1, "; %s=%s%s%s%s%s",
+ c->name, c->value,
+ (c->path_from_set ? "; $Path=" : "" ),
+ (c->path_from_set ? c->path : "" ),
+// (c->path_from_set ? "\"" : ""),
+ (c->domain_from_set ? "; $Domain=" : ""),
+ (c->domain_from_set ? c->domain : "")
+// ,(c->domain_from_set ? "\"" : "")
+ );
+ *used += clen;
+
+ return true;
+}
+
+/**
+ * Load a cookie file into the database
+ *
+ * \param filename File to load
+ */
+void urldb_load_cookies(const char *filename)
+{
+ FILE *fp;
+ char s[16*1024];
+ int file_version = 0;
+
+ assert(filename);
+
+ fp = fopen(filename, "r");
+ if (!fp)
+ return;
+
+#define FIND_WS { \
+ for (; *p && !isspace(*p) && !iscntrl(*p); p++) \
+ ; /* do nothing */ \
+ if (p >= end) { \
+ LOG(("Overran input")); \
+ continue; \
+ } \
+ *p++ = '\0'; \
+}
+
+#define SKIP_WS { \
+ for (; *p && isspace(*p); p++) \
+ ; /* do nothing */ \
+ if (p >= end) { \
+ LOG(("Overran input")); \
+ continue; \
+ } \
+}
+
+ while (fgets(s, sizeof s, fp)) {
+ char *p = s, *end = 0,
+ *domain, *path, *name, *value, *scheme, *url,
+ *comment;
+ int version, domain_specified, path_specified,
+ secure, no_destroy;
+ time_t expires, last_used;
+
+ if(s[0] == 0 || s[0] == '#')
+ /* Skip blank lines or comments */
+ continue;
+
+ s[strlen(s) - 1] = '\0'; /* lose terminating newline */
+ end = s + strlen(s);
+
+ /* Look for file version first
+ * (all input is ignored until this is read)
+ */
+ if (strncasecmp(s, "Version:", 8) == 0) {
+ FIND_WS; SKIP_WS; file_version = atoi(p);
+
+ if (file_version != COOKIE_FILE_VERSION) {
+ LOG(("Unknown Cookie file version"));
+ break;
+ }
+
+ continue;
+ } else if (file_version == 0) {
+ /* Haven't yet seen version; skip this input */
+ continue;
+ }
+
+ /* One cookie/line */
+
+ /* Parse input */
+ FIND_WS; version = atoi(s);
+ SKIP_WS; domain = p; FIND_WS;
+ SKIP_WS; domain_specified = atoi(p); FIND_WS;
+ SKIP_WS; path = p; FIND_WS;
+ SKIP_WS; path_specified = atoi(p); FIND_WS;
+ SKIP_WS; secure = atoi(p); FIND_WS;
+ SKIP_WS; expires = (time_t)atoi(p); FIND_WS;
+ SKIP_WS; last_used = (time_t)atoi(p); FIND_WS;
+ SKIP_WS; no_destroy = atoi(p); FIND_WS;
+ SKIP_WS; name = p; FIND_WS;
+ SKIP_WS; value = p; FIND_WS;
+ SKIP_WS; scheme = p; FIND_WS;
+ SKIP_WS; url = p; FIND_WS;
+
+ /* Comment may have no content, so don't
+ * use macros as they'll break */
+ for (; *p && isspace(*p); p++)
+ ; /* do nothing */
+ comment = p;
+
+ assert(p <= end);
+
+ /* Now create cookie */
+ struct cookie *c = malloc(sizeof(struct cookie));
+ if (!c)
+ break;
+
+ c->name = strdup(name);
+ c->value = strdup(value);
+ c->comment = strdup(comment);
+ c->domain_from_set = domain_specified;
+ c->domain = strdup(domain);
+ c->path_from_set = path_specified;
+ c->path = strdup(path);
+ c->expires = expires;
+ c->last_used = last_used;
+ c->secure = secure;
+ c->version = version;
+ c->no_destroy = no_destroy;
+
+ if (!(c->name && c->value && c->comment &&
+ c->domain && c->path)) {
+ urldb_free_cookie(c);
+ break;
+ }
+
+ /* And insert it into database */
+ if (!urldb_insert_cookie(c, scheme, url)) {
+ /* Cookie freed for us */
+ break;
+ }
+ }
+
+#undef SKIP_WS
+#undef FIND_WS
+
+ fclose(fp);
+}
+
+/**
+ * Save persistent cookies to file
+ *
+ * \param filename Path to save to
+ */
+void urldb_save_cookies(const char *filename)
+{
+ FILE *fp;
+
+ assert(filename);
+
+ fp = fopen(filename, "w");
+ if (!fp)
+ return;
+
+ fprintf(fp, "# >%s\n", filename);
+ fprintf(fp, "# NetSurf cookies file.\n"
+ "#\n"
+ "# Lines starting with a '#' are comments, "
+ "blank lines are ignored.\n"
+ "#\n"
+ "# All lines prior to \"Version: %d\" are discarded.\n"
+ "#\n"
+ "# Version\tDomain\tDomain from Set-Cookie\tPath\t"
+ "Path from Set-Cookie\tSecure\tExpires\tLast used\t"
+ "No destroy\tName\tValue\tScheme\tURL\tComment\n",
+ COOKIE_FILE_VERSION);
+ fprintf(fp, "Version: %d\n", COOKIE_FILE_VERSION);
+
+
+ urldb_save_cookie_hosts(fp, &db_root);
+
+ fclose(fp);
+}
+
+/**
+ * Save a host subtree's cookies
+ *
+ * \param fp File pointer to write to
+ * \param parent Parent host
+ */
+void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent)
+{
+ assert(fp && parent);
+
+ urldb_save_cookie_paths(fp, &parent->paths);
+
+ for (struct host_part *h = parent->children; h; h = h->next)
+ urldb_save_cookie_hosts(fp, h);
+}
+
+/**
+ * Save a path subtree's cookies
+ *
+ * \param fp File pointer to write to
+ * \param parent Parent path
+ */
+void urldb_save_cookie_paths(FILE *fp, struct path_data *parent)
+{
+ time_t now = time(NULL);
+
+ assert(fp && parent);
+
+ if (parent->cookies) {
+ for (struct cookie *c = parent->cookies; c; c = c->next) {
+
+ if (c->expires < now)
+ /* Skip expired cookies */
+ continue;
+
+ fprintf(fp, "%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t"
+ "%s\t%s\t%s\t%s\t%s\n",
+ c->version, c->domain,
+ c->domain_from_set, c->path,
+ c->path_from_set, c->secure,
+ (int)c->expires, (int)c->last_used,
+ c->no_destroy, c->name, c->value,
+ parent->scheme ? parent->scheme
+ : "unused",
+ parent->url ? parent->url : "unused",
+ c->comment ? c->comment : "");
+ }
+ }
+
+ for (struct path_data *p = parent->children; p; p = p->next)
+ urldb_save_cookie_paths(fp, p);
+}
+
+
#ifdef TEST
int main(void)
{
diff --git a/content/urldb.h b/content/urldb.h
index e919549bf..02d217593 100644
--- a/content/urldb.h
+++ b/content/urldb.h
@@ -25,6 +25,9 @@ struct url_data {
struct bitmap;
+/* Initialisation */
+void urldb_init(void);
+
/* Persistence support */
void urldb_load(const char *filename);
void urldb_save(const char *filename);
@@ -66,4 +69,10 @@ void urldb_iterate_entries(bool (*callback)(const char *url,
/* Debug */
void urldb_dump(void);
+/* Cookies */
+bool urldb_set_cookie(const char *header, const char *url);
+char *urldb_get_cookie(const char *url, const char *referer);
+void urldb_load_cookies(const char *filename);
+void urldb_save_cookies(const char *filename);
+
#endif
diff --git a/desktop/netsurf.c b/desktop/netsurf.c
index 606774bf3..92f8afcd3 100644
--- a/desktop/netsurf.c
+++ b/desktop/netsurf.c
@@ -15,6 +15,7 @@
#include "netsurf/utils/config.h"
#include "netsurf/content/fetch.h"
#include "netsurf/content/fetchcache.h"
+#include "netsurf/content/urldb.h"
#include "netsurf/desktop/netsurf.h"
#include "netsurf/desktop/browser.h"
#include "netsurf/desktop/gui.h"
@@ -77,6 +78,7 @@ void netsurf_init(int argc, char** argv)
lib_init();
url_init();
+ urldb_init();
gui_init(argc, argv);
setlocale(LC_ALL, "");
fetch_init();
diff --git a/riscos/gui.c b/riscos/gui.c
index b62bc0f53..b6f74f79b 100644
--- a/riscos/gui.c
+++ b/riscos/gui.c
@@ -367,6 +367,7 @@ void gui_init(int argc, char** argv)
bitmap_initialise_memory();
urldb_load(option_url_path);
+ urldb_load_cookies(option_cookie_file);
nsdir_temp = getenv("NetSurf$Dir");
if (!nsdir_temp)
@@ -724,6 +725,7 @@ void gui_init2(int argc, char** argv)
void gui_quit(void)
{
bitmap_quit();
+ urldb_save_cookies(option_cookie_jar);
urldb_save(option_url_save);
ro_gui_window_quit();
ro_gui_global_history_save();
diff --git a/utils/url.c b/utils/url.c
index 126bbcebd..897faab77 100644
--- a/utils/url.c
+++ b/utils/url.c
@@ -677,6 +677,58 @@ url_func_result url_plq(const char *url, char **result)
/**
+ * Extract path segment from an URL
+ *
+ * \param url an absolute URL
+ * \param result pointer to pointer to buffer to hold result
+ * \return URL_FUNC_OK on success
+ */
+
+url_func_result url_path(const char *url, char **result)
+{
+ int m, path_len = 0;
+ regmatch_t match[10];
+
+ (*result) = 0;
+
+ m = regexec(&url_re, url, 10, match, 0);
+ if (m) {
+ LOG(("url '%s' failed to match regex", url));
+ return URL_FUNC_FAILED;
+ }
+ if (match[URL_RE_SCHEME].rm_so == -1 ||
+ match[URL_RE_AUTHORITY].rm_so == -1)
+ return URL_FUNC_FAILED;
+
+ if (match[URL_RE_PATH].rm_so != -1)
+ path_len = match[URL_RE_PATH].rm_eo -
+ match[URL_RE_PATH].rm_so;
+
+ (*result) = malloc((path_len ? path_len : 1) + 1);
+ if (!(*result)) {
+ LOG(("malloc failed"));
+ return URL_FUNC_NOMEM;
+ }
+
+ m = 0;
+ if (path_len > 1) {
+ strncpy((*result), url + match[URL_RE_PATH].rm_so,
+ path_len);
+ for (; path_len != 0 && (*result)[m + path_len - 1] != '/';
+ path_len--)
+ /* do nothing */;
+ m += path_len;
+ }
+ else
+ (*result)[m++] = '/';
+
+ (*result)[m] = '\0';
+
+ return URL_FUNC_OK;
+}
+
+
+/**
* Attempt to find a nice filename for a URL.
*
* \param url an absolute URL
diff --git a/utils/url.h b/utils/url.h
index 7dc0b12ca..275879947 100644
--- a/utils/url.h
+++ b/utils/url.h
@@ -30,6 +30,7 @@ url_func_result url_escape(const char *unescaped, char **result);
url_func_result url_canonical_root(const char *url, char **result);
url_func_result url_strip_lqf(const char *url, char **result);
url_func_result url_plq(const char *url, char **result);
+url_func_result url_path(const char *url, char **result);
char *path_to_url(const char *path);
char *url_to_path(const char *url);