From 3fde9589c1ba89d16ec4d294fb308631f0257e3a Mon Sep 17 00:00:00 2001 From: Michael Drake Date: Thu, 6 Oct 2011 12:38:47 +0000 Subject: Remove unused url_normalise() and don't include regex.h. svn path=/trunk/netsurf/; revision=12971 --- utils/url.c | 199 +----------------------------------------------------------- utils/url.h | 1 - 2 files changed, 1 insertion(+), 199 deletions(-) diff --git a/utils/url.c b/utils/url.c index 831f16761..a44bd8f3e 100644 --- a/utils/url.c +++ b/utils/url.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include "curl/curl.h" @@ -168,208 +167,12 @@ out_true: return true; } -/** - * Normalize a URL. - * - * \param url an absolute URL - * \param result pointer to pointer to buffer to hold cleaned up url. Caller - * gets ownership of pointer to buffer value. On failure the - * pointer to buffer value will be NULL. - * \return URL_FUNC_OK on success - * - * If there is no scheme, http:// is added. The scheme and host are - * lower-cased. Default ports are removed (http only). An empty path is - * replaced with "/". Characters are unescaped if safe. - */ - -url_func_result url_normalize(const char *url, char **result) -{ - char c; - int m; - size_t i; - size_t len; - size_t bufsize; - char* norm; - bool http = false; - regmatch_t match[10]; - - *result = NULL; - - /* skip past any leading whitespace (likely if URL was copy-pasted) */ - while (isspace(*url)) - url++; - - /* allocate sufficiently large buffer for new URL */ - len = strlen(url); - /* "+ 1" for the terminating NUL character. */ - bufsize = len + 1 + SLEN("http://") + SLEN("/"); - /* work out how much extra to leave for internal whitespace */ - for(i = 0; i < len; i++) { - if(isspace(url[i])) bufsize += 2; /* ' ' -> '%20' */ - } - if ((norm = malloc(bufsize)) == NULL) { - LOG(("malloc failed")); - return URL_FUNC_NOMEM; - } - *result = norm; - strcpy(norm, url); - - /* truncate trailing whitespace (significant should be uriencoded) */ - for (i = len - 1; (i > 0) && isspace(norm[i]); i--) { - norm[i] = '\0'; - len--; - } - - /* encode any remaining (internal) whitespace */ - for (i = 0; i < len; i++) { - if(isspace(norm[i])) { - char space = norm[i]; - memmove(norm + i + 2, norm + i, 1 + len - i); - len += 2; - norm[ i] = '%'; - norm[++i] = digit2lowcase_hex(space >> 4); - norm[++i] = digit2lowcase_hex(space & 0xf); - } - } - - /* finally verify that it's actually an URL we're working on - * (RFC regex too fussy to tolerate above WSP problems) */ - if (regexec(&url_re, norm, 10, match, 0)) { - LOG(("url '%s' failed to match regex", url)); - free(norm); - *result = NULL; - return URL_FUNC_FAILED; - } - - if (match[URL_RE_SCHEME].rm_so == -1) { - /* scheme missing: add http:// and reparse */ - memmove(norm + SLEN("http://"), norm, len + 1); - memcpy(norm, "http://", SLEN("http://")); /* do NOT copy NUL */ - len += SLEN("http://"); - if (regexec(&url_re, norm, 10, match, 0)) { - LOG(("url '%s' failed to match regex", norm)); - free(norm); - *result = NULL; - return URL_FUNC_FAILED; - } - } - - /*for (unsigned int i = 0; i != 10; i++) { - if (match[i].rm_so == -1) - continue; - fprintf(stderr, "%i: '%.*s'\n", i, - match[i].rm_eo - match[i].rm_so, - res + match[i].rm_so); - }*/ - - /* see RFC 2616 section 3.2.3 */ - /* make scheme lower-case */ - if (match[URL_RE_SCHEME].rm_so != -1) { - for (i = match[URL_RE_SCHEME].rm_so; - (regoff_t) i != match[URL_RE_SCHEME].rm_eo; i++) - norm[i] = tolower(norm[i]); - if (match[URL_RE_SCHEME].rm_eo == 4 - && norm[0] == 'h' - && norm[1] == 't' - && norm[2] == 't' - && norm[3] == 'p') - http = true; - } - - /* make empty path into "/" */ - if (match[URL_RE_PATH].rm_so != -1 && - match[URL_RE_PATH].rm_so == match[URL_RE_PATH].rm_eo) { - memmove(norm + match[URL_RE_PATH].rm_so + 1, - norm + match[URL_RE_PATH].rm_so, - len - match[URL_RE_PATH].rm_so + 1); - norm[match[URL_RE_PATH].rm_so] = '/'; - len++; - } - - /* make host lower-case */ - if (match[URL_RE_AUTHORITY].rm_so != -1) { - /* Find @ delimiting credentials from host, if any */ - for (i = match[URL_RE_AUTHORITY].rm_so; - (regoff_t) i != match[URL_RE_AUTHORITY].rm_eo; - i++) { - if (norm[i] == '@') { - i++; - break; - } - } - - /* No credentials; transform entire host */ - if ((regoff_t) i == match[URL_RE_AUTHORITY].rm_eo) - i = match[URL_RE_AUTHORITY].rm_so; - - for (; (regoff_t) i != match[URL_RE_AUTHORITY].rm_eo; i++) { - if (norm[i] == ':' && (i + 3) < len) { - if (http && norm[i + 1] == '8' && - norm[i + 2] == '0' && - (regoff_t) i + 3 == - match[URL_RE_AUTHORITY].rm_eo) { - memmove(norm + i, - norm + i + 3, - len - - match[URL_RE_AUTHORITY]. - rm_eo); - len -= 3; - norm[len] = '\0'; - } else if ((regoff_t) i + 1 == match[4].rm_eo) { - memmove(norm + i, - norm + i + 1, - len - - match[URL_RE_AUTHORITY]. - rm_eo); - len--; - norm[len] = '\0'; - } - break; - } - norm[i] = tolower(norm[i]); - } - } - - /* unescape non-"reserved" escaped characters */ - for (i = 0; i + 2 < len; i++) { - if (norm[i] != '%') - continue; - c = tolower(norm[i + 1]); - if ('0' <= c && c <= '9') - m = 16 * (c - '0'); - else if ('a' <= c && c <= 'f') - m = 16 * (c - 'a' + 10); - else - continue; - c = tolower(norm[i + 2]); - if ('0' <= c && c <= '9') - m += c - '0'; - else if ('a' <= c && c <= 'f') - m += c - 'a' + 10; - else - continue; - - if (m <= 0x20 || strchr(";/?:@&=+$," "<>#%\"{}|\\^[]`", m) || - m >= 0x7f) { - i += 2; - continue; - } - - norm[i] = m; - memmove(norm + i + 1, norm + i + 3, len - i - 2); - len -= 2; - } - - /* norm and *result point to same memory, so just return ok */ - return URL_FUNC_OK; -} - /** * Resolve a relative URL to absolute form. * * \param rel relative URL - * \param base base URL, must be absolute and cleaned as by url_normalize() + * \param base base URL, must be absolute and cleaned as by nsurl_create() * \param result pointer to pointer to buffer to hold absolute url * \return URL_FUNC_OK on success */ diff --git a/utils/url.h b/utils/url.h index 8b5da77f2..8d41b139c 100644 --- a/utils/url.h +++ b/utils/url.h @@ -45,7 +45,6 @@ struct url_components { void url_init(void); bool url_host_is_ip_address(const char *host); -url_func_result url_normalize(const char *url, char **result); url_func_result url_join(const char *rel, const char *base, char **result); url_func_result url_host(const char *url, char **result); url_func_result url_scheme(const char *url, char **result); -- cgit v1.2.3