From 3fde9589c1ba89d16ec4d294fb308631f0257e3a Mon Sep 17 00:00:00 2001
From: Michael Drake <tlsa@netsurf-browser.org>
Date: Thu, 6 Oct 2011 12:38:47 +0000
Subject: Remove unused url_normalise() and don't include regex.h.

svn path=/trunk/netsurf/; revision=12971
---
 utils/url.c | 199 +-----------------------------------------------------------
 utils/url.h |   1 -
 2 files changed, 1 insertion(+), 199 deletions(-)

diff --git a/utils/url.c b/utils/url.c
index 831f16761..a44bd8f3e 100644
--- a/utils/url.c
+++ b/utils/url.c
@@ -28,7 +28,6 @@
 #include <stdbool.h>
 #include <stdlib.h>
 #include <string.h>
-#include <regex.h>
 #include <unistd.h>
 
 #include "curl/curl.h"
@@ -168,208 +167,12 @@ out_true:
 	return true;
 }
 
-/**
- * Normalize a URL.
- *
- * \param  url	   an absolute URL
- * \param  result  pointer to pointer to buffer to hold cleaned up url. Caller
- *                 gets ownership of pointer to buffer value. On failure the
- *                 pointer to buffer value will be NULL.
- * \return  URL_FUNC_OK on success
- *
- * If there is no scheme, http:// is added. The scheme and host are
- * lower-cased. Default ports are removed (http only). An empty path is
- * replaced with "/". Characters are unescaped if safe.
- */
-
-url_func_result url_normalize(const char *url, char **result)
-{
-	char c;
-	int m;
-	size_t i;
-	size_t len;
-	size_t bufsize;
-	char* norm;
-	bool http = false;
-	regmatch_t match[10];
-
-	*result = NULL;
-
-	/* skip past any leading whitespace (likely if URL was copy-pasted) */
-	while (isspace(*url))
-		url++;
-
-	/* allocate sufficiently large buffer for new URL */
-	len = strlen(url);
-	/* "+ 1" for the terminating NUL character.  */
-	bufsize = len + 1 + SLEN("http://") + SLEN("/");
-	/* work out how much extra to leave for internal whitespace */
-	for(i = 0; i < len; i++) {
-		if(isspace(url[i])) bufsize += 2; /* ' ' -> '%20' */
-	}
-	if ((norm = malloc(bufsize)) == NULL) {
-		LOG(("malloc failed"));
-		return URL_FUNC_NOMEM;
-	}
-	*result = norm;
-	strcpy(norm, url);
-
-	/* truncate trailing whitespace (significant should be uriencoded) */
-	for (i = len - 1; (i > 0) && isspace(norm[i]); i--) {
-		norm[i] = '\0';
-		len--;
-	}
-
-	/* encode any remaining (internal) whitespace */
-	for (i = 0; i < len; i++) {
-		if(isspace(norm[i])) {
-			char space = norm[i];
-			memmove(norm + i + 2, norm + i, 1 + len - i);
-			len += 2;
-			norm[  i] = '%';
-			norm[++i] = digit2lowcase_hex(space >> 4);
-			norm[++i] = digit2lowcase_hex(space & 0xf);
-		}
-	}
-
-	/* finally verify that it's actually an URL we're working on
-	 * (RFC regex too fussy to tolerate above WSP problems) */
-	if (regexec(&url_re, norm, 10, match, 0)) {
-		LOG(("url '%s' failed to match regex", url));
-		free(norm);
-		*result = NULL;
-		return URL_FUNC_FAILED;
-	}
-
-	if (match[URL_RE_SCHEME].rm_so == -1) {
-		/* scheme missing: add http:// and reparse */
-		memmove(norm + SLEN("http://"), norm, len + 1);
-		memcpy(norm, "http://", SLEN("http://")); /* do NOT copy NUL */
-		len += SLEN("http://");
-		if (regexec(&url_re, norm, 10, match, 0)) {
-			LOG(("url '%s' failed to match regex", norm));
-			free(norm);
-			*result = NULL;
-			return URL_FUNC_FAILED;
-		}
-	}
-
-	/*for (unsigned int i = 0; i != 10; i++) {
-		if (match[i].rm_so == -1)
-			continue;
-		fprintf(stderr, "%i: '%.*s'\n", i,
-				match[i].rm_eo - match[i].rm_so,
-				res + match[i].rm_so);
-	}*/
-
-	/* see RFC 2616 section 3.2.3 */
-	/* make scheme lower-case */
-	if (match[URL_RE_SCHEME].rm_so != -1) {
-		for (i = match[URL_RE_SCHEME].rm_so;
-				(regoff_t) i != match[URL_RE_SCHEME].rm_eo; i++)
-			norm[i] = tolower(norm[i]);
-		if (match[URL_RE_SCHEME].rm_eo == 4
-				&& norm[0] == 'h'
-				&& norm[1] == 't'
-				&& norm[2] == 't'
-				&& norm[3] == 'p')
-			http = true;
-	}
-
-	/* make empty path into "/" */
-	if (match[URL_RE_PATH].rm_so != -1 &&
-			match[URL_RE_PATH].rm_so == match[URL_RE_PATH].rm_eo) {
-		memmove(norm + match[URL_RE_PATH].rm_so + 1,
-				norm + match[URL_RE_PATH].rm_so,
-				len - match[URL_RE_PATH].rm_so + 1);
-		norm[match[URL_RE_PATH].rm_so] = '/';
-		len++;
-	}
-
-	/* make host lower-case */
-	if (match[URL_RE_AUTHORITY].rm_so != -1) {
-		/* Find @ delimiting credentials from host, if any */
-		for (i = match[URL_RE_AUTHORITY].rm_so;
-				(regoff_t) i != match[URL_RE_AUTHORITY].rm_eo;
-				i++) {
-			if (norm[i] == '@') {
-				i++;
-				break;
-			}
-		}
-
-		/* No credentials; transform entire host */
-		if ((regoff_t) i == match[URL_RE_AUTHORITY].rm_eo)
-			i = match[URL_RE_AUTHORITY].rm_so;
-
-		for (; (regoff_t) i != match[URL_RE_AUTHORITY].rm_eo; i++) {
-			if (norm[i] == ':' && (i + 3) < len) {
-				if (http && norm[i + 1] == '8' &&
-						norm[i + 2] == '0' &&
-						(regoff_t) i + 3 ==
-						match[URL_RE_AUTHORITY].rm_eo) {
-					memmove(norm + i,
-							norm + i + 3,
-							len -
-							match[URL_RE_AUTHORITY].
-							rm_eo);
-					len -= 3;
-					norm[len] = '\0';
-				} else if ((regoff_t) i + 1 == match[4].rm_eo) {
-					memmove(norm + i,
-							norm + i + 1,
-							len -
-							match[URL_RE_AUTHORITY].
-							rm_eo);
-					len--;
-					norm[len] = '\0';
-				}
-				break;
-			}
-			norm[i] = tolower(norm[i]);
-		}
-	}
-
-	/* unescape non-"reserved" escaped characters */
-	for (i = 0; i + 2 < len; i++) {
-		if (norm[i] != '%')
-			continue;
-		c = tolower(norm[i + 1]);
-		if ('0' <= c && c <= '9')
-			m = 16 * (c - '0');
-		else if ('a' <= c && c <= 'f')
-			m = 16 * (c - 'a' + 10);
-		else
-			continue;
-		c = tolower(norm[i + 2]);
-		if ('0' <= c && c <= '9')
-			m += c - '0';
-		else if ('a' <= c && c <= 'f')
-			m += c - 'a' + 10;
-		else
-			continue;
-
-		if (m <= 0x20 || strchr(";/?:@&=+$," "<>#%\"{}|\\^[]`", m) ||
-				m >= 0x7f) {
-			i += 2;
-			continue;
-		}
-
-		norm[i] = m;
-		memmove(norm + i + 1, norm + i + 3, len - i - 2);
-		len -= 2;
-	}
-
-	/* norm and *result point to same memory, so just return ok */
-	return URL_FUNC_OK;
-}
-
 
 /**
  * Resolve a relative URL to absolute form.
  *
  * \param  rel	   relative URL
- * \param  base	   base URL, must be absolute and cleaned as by url_normalize()
+ * \param  base	   base URL, must be absolute and cleaned as by nsurl_create()
  * \param  result  pointer to pointer to buffer to hold absolute url
  * \return  URL_FUNC_OK on success
  */
diff --git a/utils/url.h b/utils/url.h
index 8b5da77f2..8d41b139c 100644
--- a/utils/url.h
+++ b/utils/url.h
@@ -45,7 +45,6 @@ struct url_components {
 
 void url_init(void);
 bool url_host_is_ip_address(const char *host);
-url_func_result url_normalize(const char *url, char **result);
 url_func_result url_join(const char *rel, const char *base, char **result);
 url_func_result url_host(const char *url, char **result);
 url_func_result url_scheme(const char *url, char **result);
-- 
cgit v1.2.3