diff options
author | John Mark Bell <jmb@netsurf-browser.org> | 2006-02-19 18:26:23 +0000 |
---|---|---|
committer | John Mark Bell <jmb@netsurf-browser.org> | 2006-02-19 18:26:23 +0000 |
commit | 5ce5fe084c733c95544825e35bcd63cc775aee94 (patch) | |
tree | 23a1fae1a862e20c891c576325731963dbeaef39 | |
parent | 7dbc14cf05a7417f372fdda22f20622f8f72175f (diff) | |
download | netsurf-5ce5fe084c733c95544825e35bcd63cc775aee94.tar.gz netsurf-5ce5fe084c733c95544825e35bcd63cc775aee94.tar.bz2 |
[project @ 2006-02-19 18:26:23 by jmb]
Rewrite HTTP authentication.
Fix extraction of realm from WWW-Authenticate header.
Tidy up login dialog code.
svn path=/import/netsurf/; revision=2085
-rw-r--r-- | content/authdb.c | 366 | ||||
-rw-r--r-- | content/authdb.h | 18 | ||||
-rw-r--r-- | content/fetch.c | 23 | ||||
-rw-r--r-- | desktop/401login.h | 11 | ||||
-rw-r--r-- | desktop/loginlist.c | 217 | ||||
-rw-r--r-- | makefile | 5 | ||||
-rw-r--r-- | riscos/401login.c | 89 | ||||
-rw-r--r-- | utils/url.c | 115 | ||||
-rw-r--r-- | utils/url.h | 2 |
9 files changed, 570 insertions, 276 deletions
diff --git a/content/authdb.c b/content/authdb.c new file mode 100644 index 000000000..f97adb1b0 --- /dev/null +++ b/content/authdb.c @@ -0,0 +1,366 @@ +/* + * This file is part of NetSurf, http://netsurf.sourceforge.net/ + * Licensed under the GNU General Public License, + * http://www.opensource.org/licenses/gpl-license + * Copyright 2006 John M Bell <jmb202@ecs.soton.ac.uk> + */ + +/** \file + * HTTP authentication database (implementation) + * + * Authentication details are stored hashed by canonical root URI + * (absoluteURI with no abs_path part - see RFC 2617) for fast lookup. + * + * A protection space is specified by the root URI and a case sensitive + * realm match. User-agents may preemptively send authentication details + * for locations within a currently known protected space (i.e: + * Given a known realm URI of scheme://authority/path/to/realm/ + * the URI scheme://authority/path/to/realm/foo/ can be assumed to + * be within the protection space.) + * + * In order to deal with realms within realms, the realm details are stored + * such that the most specific URI comes first (where "most specific" is + * classed as the one with the longest abs_path segment). + * + * Realms spanning domains are stored multiple times (once per domain). + * + * Where a higher level resource is found to be within a known realm, the + * existing match is replaced with the new one (i.e: + * Given a known realm of scheme://authority/path/to/realm/ (uri1) + * and the newly-acquired knowledge that scheme://authority/path/to/ (uri2) + * lies within the same realm, the realm details for uri1 are replaced with + * those for uri2. - in most cases, this is likely to be a simple + * replacement of the realm URI) + * + * There is currently no mechanism for retaining authentication details over + * sessions. + */ +#include <assert.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include "netsurf/content/authdb.h" +#define NDEBUG +#include "netsurf/utils/log.h" +#include "netsurf/utils/url.h" + +#define HASH_SIZE 77 + +struct realm_details { + char *realm; /**< Realm identifier */ + char *url; /**< Base URL of realm */ + char *auth; /**< Authentication details */ + struct realm_details *next; + struct realm_details *prev; +}; + +struct auth_entry { + char *root_url; /**< Canonical root URL of realms */ + struct realm_details *realms; /**< List of realms on this host */ + struct auth_entry *next; +}; + +static struct auth_entry *auth_table[HASH_SIZE]; + +static unsigned int authdb_hash(const char *s); +static struct realm_details *authdb_get_rd(const char *canon, + const char *url, const char *realm); +static void authdb_dump(void); + +/** + * Insert an entry into the database, potentially replacing any + * existing entry. + * + * \param url Absolute URL to resource + * \param realm Authentication realm containing resource + * \param auth Authentication details in form "username:password" + * \return true on success, false on error. + */ +bool authdb_insert(const char *url, const char *realm, const char *auth) +{ + char *canon, *stripped; + unsigned int hash; + struct realm_details *rd; + struct auth_entry *entry; + url_func_result ret; + + assert(url && realm && auth); + + LOG(("Adding '%s' - '%s'", url, realm)); + + ret = url_canonical_root(url, &canon); + if (ret != URL_FUNC_OK) + return false; + + LOG(("'%s'", canon)); + + ret = url_strip_lqf(url, &stripped); + if (ret != URL_FUNC_OK) { + free(canon); + return false; + } + + hash = authdb_hash(canon); + + /* Look for existing entry */ + for (entry = auth_table[hash]; entry; entry = entry->next) + if (strcmp(entry->root_url, canon) == 0) + break; + + rd = authdb_get_rd(canon, stripped, realm); + if (rd) { + /* We have a match */ + if (strlen(stripped) < strlen(rd->url)) { + /* more generic, so update URL and move to + * appropriate location in list (s.t. the invariant + * that most specific URLs come first is maintained) + */ + struct realm_details *r, *s; + char *temp = strdup(auth); + + if (!temp) { + free(temp); + free(stripped); + free(canon); + return false; + } + + free(rd->url); + rd->url = stripped; + + free(rd->auth); + rd->auth = temp; + + for (r = rd->next; r; r = s) { + s = r->next; + if (strlen(r->url) > strlen(rd->url)) { + rd->next->prev = rd->prev; + if (rd->prev) + rd->prev->next = rd->next; + else + entry->realms = r; + + rd->prev = r; + rd->next = r->next; + if (r->next) + r->next->prev = rd; + r->next = rd; + } + } + } + else if (strlen(stripped) == strlen(rd->url)) { + /* exact match, so replace auth details */ + char *temp = strdup(auth); + if (!temp) { + free(stripped); + free(canon); + return false; + } + + free(rd->auth); + rd->auth = temp; + + free(stripped); + } + /* otherwise, nothing to do */ + + free(canon); + return true; + } + + /* no existing entry => create one */ + rd = malloc(sizeof(struct realm_details)); + if (!rd) { + free(stripped); + free(canon); + return false; + } + + rd->realm = strdup(realm); + rd->auth = strdup(auth); + rd->url = stripped; + rd->prev = 0; + + if (!rd->realm || !rd->auth || ret != URL_FUNC_OK) { + free(rd->url); + free(rd->auth); + free(rd->realm); + free(rd); + free(canon); + return false; + } + + if (entry) { + /* found => add to it */ + rd->next = entry->realms; + if (entry->realms) + entry->realms->prev = rd; + entry->realms = rd; + + free(canon); + return true; + } + + /* not found => create new */ + entry = malloc(sizeof(struct auth_entry)); + if (!entry) { + free(rd->url); + free(rd->auth); + free(rd->realm); + free(rd); + free(canon); + return false; + } + + rd->next = 0; + entry->root_url = canon; + entry->realms = rd; + entry->next = auth_table[hash]; + auth_table[hash] = entry; + + return true; +} + +/** + * Find realm details entry + * + * \param canon Canonical root URL + * \param url Stripped URL to resource + * \param realm Realm containing resource + * \return Realm details or NULL if not found + */ +struct realm_details *authdb_get_rd(const char *canon, const char *url, + const char *realm) +{ + struct auth_entry *entry; + struct realm_details *ret; + + assert(canon && url); + + for (entry = auth_table[authdb_hash(canon)]; entry; + entry = entry->next) + if (strcmp(entry->root_url, canon) == 0) + break; + + if (!entry) + return NULL; + + for (ret = entry->realms; ret; ret = ret->next) { + if (strcmp(ret->realm, realm)) + /* skip realms that don't match */ + continue; + if (strlen(url) >= strlen(ret->url) && + !strncmp(url, ret->url, strlen(ret->url))) + /* If the requested URL is of equal or greater + * specificity than the stored one, but is within + * the same realm, then use the more generic details + */ + return ret; + else if (strncmp(url, ret->url, strlen(url)) == 0) { + /* We have a more general URL in the same realm */ + return ret; + } + } + + return NULL; +} + +/** + * Retrieve authentication details for an URL from the database + * + * \param url Absolute URL to consider + * \return authentication details, or NULL if none found. + */ +const char *authdb_get(const char *url) +{ + char *canon, *stripped; + struct auth_entry *entry; + struct realm_details *rd; + url_func_result ret; + + assert(url); + + LOG(("Searching for '%s'", url)); + + authdb_dump(); + + ret = url_canonical_root(url, &canon); + if (ret != URL_FUNC_OK) + return NULL; + + ret = url_strip_lqf(url, &stripped); + if (ret != URL_FUNC_OK) { + free(canon); + return NULL; + } + + /* Find auth entry */ + for (entry = auth_table[authdb_hash(canon)]; entry; + entry = entry->next) + if (strcmp(entry->root_url, canon) == 0) + break; + + if (!entry) { + free(stripped); + free(canon); + return NULL; + } + + LOG(("Found entry")); + + /* Find realm details */ + for (rd = entry->realms; rd; rd = rd->next) + if (strlen(stripped) >= strlen(rd->url) && + !strncmp(stripped, rd->url, strlen(rd->url))) + break; + + if (!rd) { + free(stripped); + free(canon); + return NULL; + } + + LOG(("Found realm")); + + free(stripped); + free(canon); + return rd->auth; +} + +/** + * Hash function for keys. + */ +unsigned int authdb_hash(const char *s) +{ + unsigned int i, z = 0, m; + if (!s) + return 0; + + m = strlen(s); + + for (i = 0; i != m && s[i]; i++) + z += s[i] & 0x1f; /* lower 5 bits, case insensitive */ + return z % HASH_SIZE; +} + +/** + * Dump contents of auth db to stderr + */ +void authdb_dump(void) +{ +#ifndef NDEBUG + int i; + struct auth_entry *e; + struct realm_details *r; + + for (i = 0; i != HASH_SIZE; i++) { + LOG(("%d:", i)); + for (e = auth_table[i]; e; e = e->next) { + LOG(("\t%s", e->root_url)); + for (r = e->realms; r; r = r->next) { + LOG(("\t\t%s - %s", r->url, r->realm)); + } + } + } +#endif +} diff --git a/content/authdb.h b/content/authdb.h new file mode 100644 index 000000000..ece7b763d --- /dev/null +++ b/content/authdb.h @@ -0,0 +1,18 @@ +/* + * This file is part of NetSurf, http://netsurf.sourceforge.net/ + * Licensed under the GNU General Public License, + * http://www.opensource.org/licenses/gpl-license + * Copyright 2006 John M Bell <jmb202@ecs.soton.ac.uk> + */ + +/** \file + * HTTP authentication database (interface) + */ + +#ifndef _NETSURF_CONTENT_AUTHDB_H_ +#define _NETSURF_CONTENT_AUTHDB_H_ + +bool authdb_insert(const char *url, const char *realm, const char *auth); +const char *authdb_get(const char *url); + +#endif diff --git a/content/fetch.c b/content/fetch.c index b63bf5f91..0d9ccd583 100644 --- a/content/fetch.c +++ b/content/fetch.c @@ -31,11 +31,11 @@ #endif #include "curl/curl.h" #include "netsurf/utils/config.h" -#include "netsurf/content/fetch.h" -#include "netsurf/desktop/options.h" #ifdef WITH_AUTH -#include "netsurf/desktop/401login.h" +#include "netsurf/content/authdb.h" #endif +#include "netsurf/content/fetch.h" +#include "netsurf/desktop/options.h" #include "netsurf/render/form.h" #define NDEBUG #include "netsurf/utils/log.h" @@ -414,7 +414,7 @@ failed: CURLcode fetch_set_options(struct fetch *f) { CURLcode code; - struct login *li; + const char *auth; #undef SETOPT #define SETOPT(option, value) \ @@ -445,12 +445,16 @@ CURLcode fetch_set_options(struct fetch *f) SETOPT(CURLOPT_COOKIEFILE, 0); SETOPT(CURLOPT_COOKIEJAR, 0); } - if ((li = login_list_get(f->url)) != NULL) { +#ifdef WITH_AUTH + if ((auth = authdb_get(f->url)) != NULL) { SETOPT(CURLOPT_HTTPAUTH, CURLAUTH_ANY); - SETOPT(CURLOPT_USERPWD, li->logindetails); + SETOPT(CURLOPT_USERPWD, auth); } else { +#endif SETOPT(CURLOPT_USERPWD, 0); +#ifdef WITH_AUTH } +#endif if (option_http_proxy && option_http_proxy_host) { SETOPT(CURLOPT_PROXY, option_http_proxy_host); SETOPT(CURLOPT_PROXYPORT, (long) option_http_proxy_port); @@ -796,8 +800,13 @@ size_t fetch_curl_header(char *data, size_t size, size_t nmemb, return size; } SKIP_ST(17); - while (i < (int)size && data[++i] == '"') + + while (i < (int) size && strncasecmp(data + i, "realm", 5)) + i++; + while (i < (int)size && data[++i] != '"') /* */; + i++; + strncpy(f->realm, data + i, size - i); f->realm[size - i] = '\0'; for (i = size - i - 1; i >= 0 && diff --git a/desktop/401login.h b/desktop/401login.h index 27f76f65a..fc94d971b 100644 --- a/desktop/401login.h +++ b/desktop/401login.h @@ -14,19 +14,8 @@ #ifdef WITH_AUTH -struct login { - - char *host; /**< hostname */ - char *logindetails; /**< string containing "username:password" */ - struct login *next; /**< next in list */ - struct login *prev; /**< previous in list */ -}; - void gui_401login_open(struct browser_window *bw, struct content *c, char *realm); -void login_list_add(char *host, char *logindets); -struct login *login_list_get(char *host); -void login_list_remove(char *host); #endif diff --git a/desktop/loginlist.c b/desktop/loginlist.c deleted file mode 100644 index ca5ab7ea8..000000000 --- a/desktop/loginlist.c +++ /dev/null @@ -1,217 +0,0 @@ -/* - * This file is part of NetSurf, http://netsurf.sourceforge.net/ - * Licensed under the GNU General Public License, - * http://www.opensource.org/licenses/gpl-license - * Copyright 2003 John M Bell <jmb202@ecs.soton.ac.uk> - */ - -#define NDEBUG - -#include <assert.h> -#include <string.h> -#include "netsurf/utils/config.h" -#include "netsurf/desktop/401login.h" -#include "netsurf/utils/log.h" -#include "netsurf/utils/url.h" -#include "netsurf/utils/utils.h" - -#ifdef WITH_AUTH - -void login_list_dump(void); - -/** - * Pointer into the linked list - */ -static struct login login = {0, 0, &login, &login}; -static struct login *loginlist = &login; - -/** - * Adds an item to the list of login details - */ -void login_list_add(char *host, char* logindets) -{ - struct login *nli; - char *temp; - char *i; - url_func_result res; - - nli = calloc(1, sizeof(*nli)); - if (!nli) { - warn_user("NoMemory", 0); - return; - } - - res = url_host(host, &temp); - - if (res != URL_FUNC_OK) { - free(temp); - free(nli); - warn_user("NoMemory", 0); - return; - } - - /* Go back to the path base ie strip the document name - * eg. http://www.blah.com/blah/test.htm becomes - * http://www.blah.com/blah/ - * This does, however, mean that directories MUST have a '/' at the end - */ - if (strlen(temp) < strlen(host)) { - free(temp); - temp = strdup(host); - if (!temp) { - free(nli); - warn_user("NoMemory", 0); - return; - } - if (temp[strlen(temp)-1] != '/') { - i = strrchr(temp, '/'); - temp[(i-temp)+1] = 0; - } - } - - nli->host = strdup(temp); - if (!nli->host) { - free(temp); - free(nli); - warn_user("NoMemory", 0); - return; - } - nli->logindetails = strdup(logindets); - if (!nli->logindetails) { - free(nli->host); - free(temp); - free(nli); - warn_user("NoMemory", 0); - return; - } - - /* prepend to list so that more recent additions are - encountered first in login_list_get */ - nli->next = loginlist->next; - nli->prev = loginlist; - loginlist->next->prev = nli; - loginlist->next = nli; - - LOG(("Adding %s", temp)); - #ifndef NDEBUG - login_list_dump(); - #endif - free(temp); -} - -/** - * Retrieves an element from the login list - */ -/** \todo Make the matching spec compliant (see RFC 2617) */ -struct login *login_list_get(char *url) -{ - struct login *nli; - char *temp, *host; - char *i; - int reached_scheme = 0; - url_func_result res; - - if (url == NULL) - return NULL; - - if ((strncasecmp(url, "http://", 7) != 0) && - (strncasecmp(url, "https://", 8) != 0)) - return NULL; - - res = url_host(url, &host); - if (res != URL_FUNC_OK || strlen(host) == 0) return NULL; - - temp = strdup(url); - if (!temp) { - warn_user("NoMemory", 0); - free(host); - return NULL; - } - - /* Smallest thing to check for is the scheme + host name + - * trailing '/' - * So make sure we've got that at least - */ - if (strlen(host) > strlen(temp)) { - free(temp); - res = url_host(url, &temp); - if (res != URL_FUNC_OK || strlen(temp) == 0) { - free(host); - return NULL; - } - } - free(host); - - /* Work backwards through the path, directory at at time. - * Finds the closest match. - * eg. http://www.blah.com/moo/ matches the url - * http://www.blah.com/moo/test/index.htm - * This allows multiple realms (and login details) per host. - * Only one set of login details per realm are allowed. - */ - do { - LOG(("%s, %d", temp, strlen(temp))); - - for (nli = loginlist->next; nli != loginlist && - (strcasecmp(nli->host, temp)!=0); - nli = nli->next) - /* do nothing */; - - if (nli != loginlist) { - LOG(("Got %s", nli->host)); - free(temp); - return nli; - } - else { - if (temp[strlen(temp)-1] == '/') { - temp[strlen(temp)-1] = 0; - } - - i = strrchr(temp, '/'); - - if (temp[(i-temp)-1] != '/') /* reached the scheme? */ - temp[(i-temp)+1] = 0; - else { - reached_scheme = 1; - } - } - } while (reached_scheme == 0); - - free(temp); - return NULL; -} - -/** - * Remove a realm's login details from the list - */ -void login_list_remove(char *host) -{ - struct login *nli = login_list_get(host); - - if (nli != NULL) { - nli->prev->next = nli->next; - nli->next->prev = nli->prev; - free(nli->logindetails); - free(nli->host); - free(nli); - } - - LOG(("Removing %s", host)); -#ifndef NDEBUG - login_list_dump(); -#endif -} - -/** - * Dumps the list of login details (base paths only) - */ -void login_list_dump(void) -{ - struct login *nli; - - for (nli = loginlist->next; nli != loginlist; nli = nli->next) { - LOG(("%s", nli->host)); - } -} - -#endif @@ -17,14 +17,15 @@ # "riscos", "riscos_small", "ncos", and "riscos_debug" can be compiled under # RISC OS, or cross-compiled using GCCSDK. -OBJECTS_COMMON = content.o fetch.o fetchcache.o url_store.o # content/ +OBJECTS_COMMON = authdb.o content.o fetch.o fetchcache.o \ + url_store.o # content/ OBJECTS_COMMON += css.o css_enum.o parser.o ruleset.o scanner.o # css/ OBJECTS_COMMON += box.o box_construct.o box_normalise.o form.o \ html.o html_redraw.o imagemap.o layout.o list.o \ table.o textplain.o # render/ OBJECTS_COMMON += messages.o talloc.o url.o utf8.o \ utils.o # utils/ -OBJECTS_COMMON += loginlist.o options.o tree.o # desktop/ +OBJECTS_COMMON += options.o tree.o # desktop/ OBJECTS_IMAGE = gif.o gifread.o jpeg.o mng.o # image/ diff --git a/riscos/401login.c b/riscos/401login.c index 7fc6e63c5..01479a069 100644 --- a/riscos/401login.c +++ b/riscos/401login.c @@ -8,9 +8,11 @@ #include <assert.h> #include <ctype.h> +#include <stdbool.h> #include <string.h> #include "oslib/wimp.h" #include "netsurf/utils/config.h" +#include "netsurf/content/authdb.h" #include "netsurf/content/content.h" #include "netsurf/desktop/browser.h" #include "netsurf/desktop/401login.h" @@ -33,12 +35,12 @@ static void ro_gui_401login_open(struct browser_window *bw, char *host, static wimp_window *dialog_401_template; struct session_401 { - char *host; - char *realm; - char uname[256]; - char *url; - char pwd[256]; - struct browser_window *bwin; + char *host; /**< Host for user display */ + char *realm; /**< Authentication realm */ + char uname[256]; /**< Buffer for username */ + char *url; /**< URL being fetched */ + char pwd[256]; /**< Buffer for password */ + struct browser_window *bwin; /**< Browser window handle */ }; @@ -52,7 +54,11 @@ void ro_gui_401login_init(void) } -void gui_401login_open(struct browser_window *bw, struct content *c, char *realm) +/** + * Open the login dialog + */ +void gui_401login_open(struct browser_window *bw, struct content *c, + char *realm) { char *murl, *host; url_func_result res; @@ -76,7 +82,7 @@ void ro_gui_401login_open(struct browser_window *bw, char *host, char *realm, { struct session_401 *session; wimp_w w; - + session = calloc(1, sizeof(struct session_401)); if (!session) { warn_user("NoMemory", 0); @@ -85,7 +91,7 @@ void ro_gui_401login_open(struct browser_window *bw, char *host, char *realm, session->url = strdup(fetchurl); if (!session->url) { - free(session); + free(session); warn_user("NoMemory", 0); return; } @@ -95,9 +101,11 @@ void ro_gui_401login_open(struct browser_window *bw, char *host, char *realm, session->realm = strdup(realm); session->bwin = bw; if ((!session->host) || (!session->realm)) { - free(session->host); - free(session->realm); - free(session); + free(session->host); + free(session->realm); + free(session); + warn_user("NoMemory", 0); + return; } /* fill in download window icons */ @@ -120,7 +128,7 @@ void ro_gui_401login_open(struct browser_window *bw, char *host, char *realm, /* create and open the window */ w = wimp_create_window(dialog_401_template); - + ro_gui_wimp_event_register_text_field(w, ICON_401LOGIN_USERNAME); ro_gui_wimp_event_register_text_field(w, ICON_401LOGIN_PASSWORD); ro_gui_wimp_event_register_cancel(w, ICON_401LOGIN_CANCEL); @@ -130,22 +138,24 @@ void ro_gui_401login_open(struct browser_window *bw, char *host, char *realm, ro_gui_wimp_event_set_user_data(w, session); ro_gui_dialog_open_persistent(bw->window->window, w, false); - } - -void ro_gui_401login_close(wimp_w w) { +/** + * Handle closing of login dialog + */ +void ro_gui_401login_close(wimp_w w) +{ os_error *error; - struct session_401 *session; - - session = (struct session_401 *)ro_gui_wimp_event_get_user_data(w); - - assert(session); - - free(session->host); - free(session->realm); - free(session->url); - free(session); + struct session_401 *session; + + session = (struct session_401 *)ro_gui_wimp_event_get_user_data(w); + + assert(session); + + free(session->host); + free(session->realm); + free(session->url); + free(session); ro_gui_wimp_event_finalise(w); @@ -153,7 +163,6 @@ void ro_gui_401login_close(wimp_w w) { if (error) LOG(("xwimp_delete_window: 0x%x: %s", error->errnum, error->errmess)); - } @@ -163,23 +172,29 @@ void ro_gui_401login_close(wimp_w w) { bool ro_gui_401login_apply(wimp_w w) { struct session_401 *session; - char *lidets; + char *auth; - session = (struct session_401 *)ro_gui_wimp_event_get_user_data(w); - - assert(session); + session = (struct session_401 *)ro_gui_wimp_event_get_user_data(w); - lidets = calloc(strlen(session->uname) + strlen(session->pwd) + 2, - sizeof(char)); - if (!lidets) { - LOG(("Insufficient memory for calloc")); + assert(session); + + auth = malloc(strlen(session->uname) + strlen(session->pwd) + 2); + if (!auth) { + LOG(("calloc failed")); warn_user("NoMemory", 0); return false; } - sprintf(lidets, "%s:%s", session->uname, session->pwd); + sprintf(auth, "%s:%s", session->uname, session->pwd); + + if (!authdb_insert(session->url, session->realm, auth)) { + LOG(("failed")); + free(auth); + return false; + } + + free(auth); - login_list_add(session->url, lidets); browser_window_go(session->bwin, session->url, 0); return true; } diff --git a/utils/url.c b/utils/url.c index 1eb0f6f5e..c696659f4 100644 --- a/utils/url.c +++ b/utils/url.c @@ -507,6 +507,117 @@ url_func_result url_scheme(const char *url, char **result) /** + * Return the canonical root of an URL + * + * \param url an absolute URL + * \param result pointer to pointer to buffer to hold canonical rool URL + * \return URL_FUNC_OK on success + */ + +url_func_result url_canonical_root(const char *url, char **result) +{ + int m, scheme_len, authority_len; + regmatch_t match[10]; + + (*result) = 0; + + m = regexec(&url_re, url, 10, match, 0); + if (m) { + LOG(("url '%s' failed to match regex", url)); + return URL_FUNC_FAILED; + } + if (match[URL_RE_SCHEME].rm_so == -1 || + match[URL_RE_AUTHORITY].rm_so == -1) + return URL_FUNC_FAILED; + + scheme_len = match[URL_RE_SCHEME].rm_eo - match[URL_RE_SCHEME].rm_so; + authority_len = match[URL_RE_AUTHORITY].rm_eo - + match[URL_RE_AUTHORITY].rm_so; + + (*result) = malloc(scheme_len + 1 + 2 + authority_len + 1); + if (!(*result)) { + LOG(("malloc failed")); + return URL_FUNC_NOMEM; + } + + strncpy((*result), url + match[URL_RE_SCHEME].rm_so, scheme_len); + m = scheme_len; + (*result)[m++] = ':'; + (*result)[m++] = '/'; + (*result)[m++] = '/'; + strncpy((*result) + m, url + match[URL_RE_AUTHORITY].rm_so, + authority_len); + (*result)[m + authority_len] = '\0'; + + return URL_FUNC_OK; +} + + +/** + * Strip leafname, query and fragment segments from an URL + * + * \param url an absolute URL + * \param result pointer to pointer to buffer to hold result + * \return URL_FUNC_OK on success + */ + +url_func_result url_strip_lqf(const char *url, char **result) +{ + int m, scheme_len, authority_len, path_len = 0; + regmatch_t match[10]; + + (*result) = 0; + + m = regexec(&url_re, url, 10, match, 0); + if (m) { + LOG(("url '%s' failed to match regex", url)); + return URL_FUNC_FAILED; + } + if (match[URL_RE_SCHEME].rm_so == -1 || + match[URL_RE_AUTHORITY].rm_so == -1) + return URL_FUNC_FAILED; + + scheme_len = match[URL_RE_SCHEME].rm_eo - match[URL_RE_SCHEME].rm_so; + authority_len = match[URL_RE_AUTHORITY].rm_eo - + match[URL_RE_AUTHORITY].rm_so; + if (match[URL_RE_PATH].rm_so != -1) + path_len = match[URL_RE_PATH].rm_eo - + match[URL_RE_PATH].rm_so; + + (*result) = malloc(scheme_len + 1 + 2 + authority_len + + (path_len ? path_len : 1) + 1); + if (!(*result)) { + LOG(("malloc failed")); + return URL_FUNC_NOMEM; + } + + strncpy((*result), url + match[URL_RE_SCHEME].rm_so, scheme_len); + m = scheme_len; + (*result)[m++] = ':'; + (*result)[m++] = '/'; + (*result)[m++] = '/'; + strncpy((*result) + m, url + match[URL_RE_AUTHORITY].rm_so, + authority_len); + m += authority_len; + + if (path_len) { + strncpy((*result) + m, url + match[URL_RE_AUTHORITY].rm_so, + path_len); + for (; path_len != 0 && (*result)[m + path_len - 1] != '/'; + path_len--) + /* do nothing */; + m += path_len; + } + else + (*result)[m++] = '/'; + + (*result)[m] = '\0'; + + return URL_FUNC_OK; +} + + +/** * Attempt to find a nice filename for a URL. * * \param url an absolute URL @@ -611,8 +722,8 @@ no_path: (*result)[i] = '_'; return URL_FUNC_OK; - } - + } + return URL_FUNC_FAILED; } diff --git a/utils/url.h b/utils/url.h index 3dd4b9859..3bda22969 100644 --- a/utils/url.h +++ b/utils/url.h @@ -27,6 +27,8 @@ url_func_result url_scheme(const char *url, char **result); url_func_result url_nice(const char *url, char **result, bool remove_extensions); url_func_result url_escape(const char *unescaped, char **result); +url_func_result url_canonical_root(const char *url, char **result); +url_func_result url_strip_lqf(const char *url, char **result); char *path_to_url(const char *path); char *url_to_path(const char *url); |