diff options
Diffstat (limited to 'utils/nsurl/nsurl.c')
-rw-r--r-- | utils/nsurl/nsurl.c | 850 |
1 files changed, 850 insertions, 0 deletions
diff --git a/utils/nsurl/nsurl.c b/utils/nsurl/nsurl.c new file mode 100644 index 000000000..7166a2707 --- /dev/null +++ b/utils/nsurl/nsurl.c @@ -0,0 +1,850 @@ +/* + * Copyright 2011 Michael Drake <tlsa@netsurf-browser.org> + * + * This file is part of NetSurf, http://www.netsurf-browser.org/ + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * \file + * NetSurf URL handling implementation. + * + * This is the common implementation of all URL handling within the + * browser. This implementation is based upon RFC3986 although this has + * been superceeded by https://url.spec.whatwg.org/ which is based on + * actual contemporary implementations. + * + * Care must be taken with character encodings within this module as + * the specifications work with specific ascii ranges and must not be + * affected by locale. Hence the c library character type functions + * are not used. + */ + +#include <assert.h> +#include <libwapcaplet/libwapcaplet.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> + +#include "utils/ascii.h" +#include "utils/corestrings.h" +#include "utils/errors.h" +#include "utils/idna.h" +#include "utils/log.h" +#include "utils/nsurl/private.h" +#include "utils/nsurl.h" +#include "utils/utils.h" + + +/** + * Compare two component values. + * + * Sets match to false if the components are not the same. + * Does nothing if the components are the same, so ensure match is + * preset to true. + */ +#define nsurl__component_compare(c1, c2, match) \ + if (c1 && c2 && lwc_error_ok == \ + lwc_string_isequal(c1, c2, match)) { \ + /* do nothing */ \ + } else if (c1 || c2) { \ + *match = false; \ + } + + + +/****************************************************************************** + * NetSurf URL Public API * + ******************************************************************************/ + +/* exported interface, documented in nsurl.h */ +nsurl *nsurl_ref(nsurl *url) +{ + assert(url != NULL); + + url->count++; + + return url; +} + + +/* exported interface, documented in nsurl.h */ +void nsurl_unref(nsurl *url) +{ + assert(url != NULL); + assert(url->count > 0); + + if (--url->count > 0) + return; + +#ifdef NSURL_DEBUG + nsurl__dump(url); +#endif + + /* Release lwc strings */ + nsurl__components_destroy(&url->components); + + /* Free the NetSurf URL */ + free(url); +} + + +/* exported interface, documented in nsurl.h */ +bool nsurl_compare(const nsurl *url1, const nsurl *url2, nsurl_component parts) +{ + bool match = true; + + assert(url1 != NULL); + assert(url2 != NULL); + + /* Compare URL components */ + + /* Path, host and query first, since they're most likely to differ */ + + if (parts & NSURL_PATH) { + nsurl__component_compare(url1->components.path, + url2->components.path, &match); + + if (match == false) + return false; + } + + if (parts & NSURL_HOST) { + nsurl__component_compare(url1->components.host, + url2->components.host, &match); + + if (match == false) + return false; + } + + if (parts & NSURL_QUERY) { + nsurl__component_compare(url1->components.query, + url2->components.query, &match); + + if (match == false) + return false; + } + + if (parts & NSURL_SCHEME) { + nsurl__component_compare(url1->components.scheme, + url2->components.scheme, &match); + + if (match == false) + return false; + } + + if (parts & NSURL_USERNAME) { + nsurl__component_compare(url1->components.username, + url2->components.username, &match); + + if (match == false) + return false; + } + + if (parts & NSURL_PASSWORD) { + nsurl__component_compare(url1->components.password, + url2->components.password, &match); + + if (match == false) + return false; + } + + if (parts & NSURL_PORT) { + nsurl__component_compare(url1->components.port, + url2->components.port, &match); + + if (match == false) + return false; + } + + if (parts & NSURL_FRAGMENT) { + nsurl__component_compare(url1->components.fragment, + url2->components.fragment, &match); + + if (match == false) + return false; + } + + return true; +} + + +/* exported interface, documented in nsurl.h */ +nserror nsurl_get(const nsurl *url, nsurl_component parts, + char **url_s, size_t *url_l) +{ + assert(url != NULL); + + return nsurl__components_to_string(&(url->components), parts, 0, + url_s, url_l); +} + + +/* exported interface, documented in nsurl.h */ +lwc_string *nsurl_get_component(const nsurl *url, nsurl_component part) +{ + assert(url != NULL); + + switch (part) { + case NSURL_SCHEME: + return (url->components.scheme != NULL) ? + lwc_string_ref(url->components.scheme) : NULL; + + case NSURL_USERNAME: + return (url->components.username != NULL) ? + lwc_string_ref(url->components.username) : NULL; + + case NSURL_PASSWORD: + return (url->components.password != NULL) ? + lwc_string_ref(url->components.password) : NULL; + + case NSURL_HOST: + return (url->components.host != NULL) ? + lwc_string_ref(url->components.host) : NULL; + + case NSURL_PORT: + return (url->components.port != NULL) ? + lwc_string_ref(url->components.port) : NULL; + + case NSURL_PATH: + return (url->components.path != NULL) ? + lwc_string_ref(url->components.path) : NULL; + + case NSURL_QUERY: + return (url->components.query != NULL) ? + lwc_string_ref(url->components.query) : NULL; + + case NSURL_FRAGMENT: + return (url->components.fragment != NULL) ? + lwc_string_ref(url->components.fragment) : NULL; + + default: + LOG("Unsupported value passed to part param."); + assert(0); + } + + return NULL; +} + + +/* exported interface, documented in nsurl.h */ +bool nsurl_has_component(const nsurl *url, nsurl_component part) +{ + assert(url != NULL); + + switch (part) { + case NSURL_SCHEME: + if (url->components.scheme != NULL) + return true; + else + return false; + + case NSURL_CREDENTIALS: + /* Only username required for credentials section */ + /* Fall through */ + case NSURL_USERNAME: + if (url->components.username != NULL) + return true; + else + return false; + + case NSURL_PASSWORD: + if (url->components.password != NULL) + return true; + else + return false; + + case NSURL_HOST: + if (url->components.host != NULL) + return true; + else + return false; + + case NSURL_PORT: + if (url->components.port != NULL) + return true; + else + return false; + + case NSURL_PATH: + if (url->components.path != NULL) + return true; + else + return false; + + case NSURL_QUERY: + if (url->components.query != NULL) + return true; + else + return false; + + case NSURL_FRAGMENT: + if (url->components.fragment != NULL) + return true; + else + return false; + + default: + LOG("Unsupported value passed to part param."); + assert(0); + } + + return false; +} + + +/* exported interface, documented in nsurl.h */ +const char *nsurl_access(const nsurl *url) +{ + assert(url != NULL); + + return url->string; +} + + +/* exported interface, documented in nsurl.h */ +nserror nsurl_get_utf8(const nsurl *url, char **url_s, size_t *url_l) +{ + nserror err; + lwc_string *host; + char *idna_host = NULL; + size_t idna_host_len; + char *scheme = NULL; + size_t scheme_len; + char *path = NULL; + size_t path_len; + + assert(url != NULL); + + if (url->components.host == NULL) { + return nsurl_get(url, NSURL_WITH_FRAGMENT, url_s, url_l); + } + + host = url->components.host; + err = idna_decode(lwc_string_data(host), lwc_string_length(host), + &idna_host, &idna_host_len); + if (err != NSERROR_OK) { + goto cleanup; + } + + err = nsurl_get(url, + NSURL_SCHEME | NSURL_CREDENTIALS, + &scheme, &scheme_len); + if (err != NSERROR_OK) { + goto cleanup; + } + + err = nsurl_get(url, + NSURL_PORT | NSURL_PATH | NSURL_QUERY | NSURL_FRAGMENT, + &path, &path_len); + if (err != NSERROR_OK) { + goto cleanup; + } + + *url_l = scheme_len + idna_host_len + path_len + 1; /* +1 for \0 */ + *url_s = malloc(*url_l); + + if (*url_s == NULL) { + err = NSERROR_NOMEM; + goto cleanup; + } + + snprintf(*url_s, *url_l, "%s%s%s", scheme, idna_host, path); + + err = NSERROR_OK; + +cleanup: + free(idna_host); + free(scheme); + free(path); + + return err; +} + + +/* exported interface, documented in nsurl.h */ +const char *nsurl_access_leaf(const nsurl *url) +{ + size_t path_len; + const char *path; + const char *leaf; + + assert(url != NULL); + + if (url->components.path == NULL) + return ""; + + path = lwc_string_data(url->components.path); + path_len = lwc_string_length(url->components.path); + + if (path_len == 0) + return ""; + + if (path_len == 1 && *path == '/') + return "/"; + + leaf = path + path_len; + + do { + leaf--; + } while ((leaf != path) && (*leaf != '/')); + + if (*leaf == '/') + leaf++; + + return leaf; +} + + +/* exported interface, documented in nsurl.h */ +size_t nsurl_length(const nsurl *url) +{ + assert(url != NULL); + + return url->length; +} + + +/* exported interface, documented in nsurl.h */ +uint32_t nsurl_hash(const nsurl *url) +{ + assert(url != NULL); + + return url->hash; +} + + +/* exported interface, documented in nsurl.h */ +nserror nsurl_defragment(const nsurl *url, nsurl **no_frag) +{ + size_t length; + char *pos; + + assert(url != NULL); + + /* check for source url having no fragment already */ + if (url->components.fragment == NULL) { + *no_frag = (nsurl *)url; + + (*no_frag)->count++; + + return NSERROR_OK; + } + + /* Find the change in length from url to new_url */ + length = url->length; + if (url->components.fragment != NULL) { + length -= 1 + lwc_string_length(url->components.fragment); + } + + /* Create NetSurf URL object */ + *no_frag = malloc(sizeof(nsurl) + length + 1); /* Add 1 for \0 */ + if (*no_frag == NULL) { + return NSERROR_NOMEM; + } + + /* Copy components */ + (*no_frag)->components.scheme = + nsurl__component_copy(url->components.scheme); + (*no_frag)->components.username = + nsurl__component_copy(url->components.username); + (*no_frag)->components.password = + nsurl__component_copy(url->components.password); + (*no_frag)->components.host = + nsurl__component_copy(url->components.host); + (*no_frag)->components.port = + nsurl__component_copy(url->components.port); + (*no_frag)->components.path = + nsurl__component_copy(url->components.path); + (*no_frag)->components.query = + nsurl__component_copy(url->components.query); + (*no_frag)->components.fragment = NULL; + + (*no_frag)->components.scheme_type = url->components.scheme_type; + + (*no_frag)->length = length; + + /* Fill out the url string */ + pos = (*no_frag)->string; + memcpy(pos, url->string, length); + pos += length; + *pos = '\0'; + + /* Get the nsurl's hash */ + nsurl__calc_hash(*no_frag); + + /* Give the URL a reference */ + (*no_frag)->count = 1; + + return NSERROR_OK; +} + + +/* exported interface, documented in nsurl.h */ +nserror nsurl_refragment(const nsurl *url, lwc_string *frag, nsurl **new_url) +{ + int frag_len; + int base_len; + char *pos; + size_t len; + + assert(url != NULL); + assert(frag != NULL); + + /* Find the change in length from url to new_url */ + base_len = url->length; + if (url->components.fragment != NULL) { + base_len -= 1 + lwc_string_length(url->components.fragment); + } + frag_len = lwc_string_length(frag); + + /* Set new_url's length */ + len = base_len + 1 /* # */ + frag_len; + + /* Create NetSurf URL object */ + *new_url = malloc(sizeof(nsurl) + len + 1); /* Add 1 for \0 */ + if (*new_url == NULL) { + return NSERROR_NOMEM; + } + + (*new_url)->length = len; + + /* Set string */ + pos = (*new_url)->string; + memcpy(pos, url->string, base_len); + pos += base_len; + *pos = '#'; + memcpy(++pos, lwc_string_data(frag), frag_len); + pos += frag_len; + *pos = '\0'; + + /* Copy components */ + (*new_url)->components.scheme = + nsurl__component_copy(url->components.scheme); + (*new_url)->components.username = + nsurl__component_copy(url->components.username); + (*new_url)->components.password = + nsurl__component_copy(url->components.password); + (*new_url)->components.host = + nsurl__component_copy(url->components.host); + (*new_url)->components.port = + nsurl__component_copy(url->components.port); + (*new_url)->components.path = + nsurl__component_copy(url->components.path); + (*new_url)->components.query = + nsurl__component_copy(url->components.query); + (*new_url)->components.fragment = + lwc_string_ref(frag); + + (*new_url)->components.scheme_type = url->components.scheme_type; + + /* Get the nsurl's hash */ + nsurl__calc_hash(*new_url); + + /* Give the URL a reference */ + (*new_url)->count = 1; + + return NSERROR_OK; +} + + +/* exported interface, documented in nsurl.h */ +nserror nsurl_replace_query(const nsurl *url, const char *query, + nsurl **new_url) +{ + int query_len; /* Length of new query string, including '?' */ + int frag_len = 0; /* Length of fragment, including '#' */ + int base_len; /* Length of URL up to start of query */ + char *pos; + size_t len; + lwc_string *lwc_query; + + assert(url != NULL); + assert(query != NULL); + assert(query[0] == '?'); + + /* Get the length of the new query */ + query_len = strlen(query); + + /* Find the change in length from url to new_url */ + base_len = url->length; + if (url->components.query != NULL) { + base_len -= lwc_string_length(url->components.query); + } + if (url->components.fragment != NULL) { + frag_len = 1 + lwc_string_length(url->components.fragment); + base_len -= frag_len; + } + + /* Set new_url's length */ + len = base_len + query_len + frag_len; + + /* Create NetSurf URL object */ + *new_url = malloc(sizeof(nsurl) + len + 1); /* Add 1 for \0 */ + if (*new_url == NULL) { + return NSERROR_NOMEM; + } + + if (lwc_intern_string(query, query_len, &lwc_query) != lwc_error_ok) { + free(*new_url); + return NSERROR_NOMEM; + } + + (*new_url)->length = len; + + /* Set string */ + pos = (*new_url)->string; + memcpy(pos, url->string, base_len); + pos += base_len; + memcpy(pos, query, query_len); + pos += query_len; + if (url->components.fragment != NULL) { + const char *frag = lwc_string_data(url->components.fragment); + *pos = '#'; + memcpy(++pos, frag, frag_len - 1); + pos += frag_len - 1; + } + *pos = '\0'; + + /* Copy components */ + (*new_url)->components.scheme = + nsurl__component_copy(url->components.scheme); + (*new_url)->components.username = + nsurl__component_copy(url->components.username); + (*new_url)->components.password = + nsurl__component_copy(url->components.password); + (*new_url)->components.host = + nsurl__component_copy(url->components.host); + (*new_url)->components.port = + nsurl__component_copy(url->components.port); + (*new_url)->components.path = + nsurl__component_copy(url->components.path); + (*new_url)->components.query = lwc_query; + (*new_url)->components.fragment = + nsurl__component_copy(url->components.fragment); + + (*new_url)->components.scheme_type = url->components.scheme_type; + + /* Get the nsurl's hash */ + nsurl__calc_hash(*new_url); + + /* Give the URL a reference */ + (*new_url)->count = 1; + + return NSERROR_OK; +} + + +/* exported interface documented in utils/nsurl.h */ +nserror nsurl_nice(const nsurl *url, char **result, bool remove_extensions) +{ + const char *data; + size_t len; + size_t pos; + bool match; + char *name; + + assert(url != NULL); + + *result = 0; + + /* extract the last component of the path, if possible */ + if ((url->components.path != NULL) && + (lwc_string_length(url->components.path) != 0) && + (lwc_string_isequal(url->components.path, + corestring_lwc_slash_, &match) == lwc_error_ok) && + (match == false)) { + bool first = true; + bool keep_looking; + + /* Get hold of the string data we're examining */ + data = lwc_string_data(url->components.path); + len = lwc_string_length(url->components.path); + pos = len; + + do { + keep_looking = false; + pos--; + + /* Find last '/' with stuff after it */ + while (pos != 0) { + if (data[pos] == '/' && pos < len - 1) { + break; + } + pos--; + } + + if (pos == 0) { + break; + } + + if (first) { + if (strncasecmp("/default.", data + pos, + SLEN("/default.")) == 0) { + keep_looking = true; + + } else if (strncasecmp("/index.", + data + pos, + 6) == 0) { + keep_looking = true; + + } + first = false; + } + + } while (keep_looking); + + if (data[pos] == '/') + pos++; + + if (strncasecmp("default.", data + pos, 8) != 0 && + strncasecmp("index.", data + pos, 6) != 0) { + size_t end = pos; + while (data[end] != '\0' && data[end] != '/') { + end++; + } + if (end - pos != 0) { + name = malloc(end - pos + 1); + if (name == NULL) { + return NSERROR_NOMEM; + } + memcpy(name, data + pos, end - pos); + name[end - pos] = '\0'; + if (remove_extensions) { + /* strip any extenstion */ + char *dot = strchr(name, '.'); + if (dot && dot != name) { + *dot = '\0'; + } + } + *result = name; + return NSERROR_OK; + } + } + } + + if (url->components.host != NULL) { + name = strdup(lwc_string_data(url->components.host)); + + for (pos = 0; name[pos] != '\0'; pos++) { + if (name[pos] == '.') { + name[pos] = '_'; + } + } + + *result = name; + return NSERROR_OK; + } + + return NSERROR_NOT_FOUND; +} + + +/* exported interface, documented in nsurl.h */ +nserror nsurl_parent(const nsurl *url, nsurl **new_url) +{ + lwc_string *lwc_path; + size_t old_path_len, new_path_len; + size_t len; + const char* path = NULL; + char *pos; + + assert(url != NULL); + + old_path_len = (url->components.path == NULL) ? 0 : + lwc_string_length(url->components.path); + + /* Find new path length */ + if (old_path_len == 0) { + new_path_len = old_path_len; + } else { + path = lwc_string_data(url->components.path); + + new_path_len = old_path_len; + if (old_path_len > 1) { + /* Skip over any trailing / */ + if (path[new_path_len - 1] == '/') + new_path_len--; + + /* Work back to next / */ + while (new_path_len > 0 && + path[new_path_len - 1] != '/') + new_path_len--; + } + } + + /* Find the length of new_url */ + len = url->length; + if (url->components.query != NULL) { + len -= lwc_string_length(url->components.query); + } + if (url->components.fragment != NULL) { + len -= 1; /* # */ + len -= lwc_string_length(url->components.fragment); + } + len -= old_path_len - new_path_len; + + /* Create NetSurf URL object */ + *new_url = malloc(sizeof(nsurl) + len + 1); /* Add 1 for \0 */ + if (*new_url == NULL) { + return NSERROR_NOMEM; + } + + /* Make new path */ + if (old_path_len == 0) { + lwc_path = NULL; + } else if (old_path_len == new_path_len) { + lwc_path = lwc_string_ref(url->components.path); + } else { + if (lwc_intern_string(path, old_path_len - new_path_len, + &lwc_path) != lwc_error_ok) { + free(*new_url); + return NSERROR_NOMEM; + } + } + + (*new_url)->length = len; + + /* Set string */ + pos = (*new_url)->string; + memcpy(pos, url->string, len); + pos += len; + *pos = '\0'; + + /* Copy components */ + (*new_url)->components.scheme = + nsurl__component_copy(url->components.scheme); + (*new_url)->components.username = + nsurl__component_copy(url->components.username); + (*new_url)->components.password = + nsurl__component_copy(url->components.password); + (*new_url)->components.host = + nsurl__component_copy(url->components.host); + (*new_url)->components.port = + nsurl__component_copy(url->components.port); + (*new_url)->components.path = lwc_path; + (*new_url)->components.query = NULL; + (*new_url)->components.fragment = NULL; + + (*new_url)->components.scheme_type = url->components.scheme_type; + + /* Get the nsurl's hash */ + nsurl__calc_hash(*new_url); + + /* Give the URL a reference */ + (*new_url)->count = 1; + + return NSERROR_OK; +} + |