From d1b79f3e414b8dd770f109730d8de77d9d1b13a1 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Sun, 11 Feb 2007 22:28:00 +0000 Subject: Bring percent-encoding closer to what other browsers do. svn path=/trunk/netsurf/; revision=3179 --- utils/url.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'utils') diff --git a/utils/url.c b/utils/url.c index f6f7fdc9d..8edd373b6 100644 --- a/utils/url.c +++ b/utils/url.c @@ -74,7 +74,7 @@ bool url_host_is_ip_address(const char *host) { bool n; assert(host); - + /* an IP address is of the format XXX.XXX.XXX.XXX, ie totally * numeric with 3 full stops between the numbers */ b = 0; // number of breaks @@ -821,19 +821,24 @@ url_func_result url_escape(const char *unescaped, bool sptoplus, return URL_FUNC_NOMEM; for (c = unescaped, d = escaped; *c; c++) { - if (!isascii(*c) || - strchr(";/?:@&=+$," "<>#%\"{}|\\^[]`", *c) || + /* Check if we should escape this byte. + * '~' is unreserved and should not be percent encoded, if + * you believe the spec; however, leaving it unescaped + * breaks a bunch of websites, so we escape it anyway. */ + if (!isascii(*c) || strchr(":/?#[]@" /* gen-delims */ + "!$&'()*+,;=" /* sub-delims */ + "<>%\"{}|\\^`~", /* others */ + *c) || *c <= 0x20 || *c == 0x7f) { - if (*c == 0x20 && sptoplus) + if (*c == 0x20 && sptoplus) { *d++ = '+'; - else { + } else { *d++ = '%'; *d++ = "0123456789ABCDEF"[((*c >> 4) & 0xf)]; *d++ = "0123456789ABCDEF"[(*c & 0xf)]; } - } - else { - /* unreserved characters: [a-zA-Z0-9-_.!~*'()] */ + } else { + /* unreserved characters: [a-zA-Z0-9-._] */ *d++ = *c; } } -- cgit v1.2.3