From a25f87a739943312b0a0ffa3943b584fd8265102 Mon Sep 17 00:00:00 2001 From: Vincent Sanders Date: Tue, 4 Oct 2011 22:32:04 +0000 Subject: make nsurl__create_from_section correctly escape characters as per RFC3986 svn path=/trunk/netsurf/; revision=12948 --- utils/nsurl.c | 33 ++++++++++++++++++++++++--------- utils/utils.h | 10 ++++++++++ 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/utils/nsurl.c b/utils/nsurl.c index ff7a8d71d..642d8b79f 100644 --- a/utils/nsurl.c +++ b/utils/nsurl.c @@ -35,6 +35,23 @@ /* Define to enable NSURL debugging */ #undef NSURL_DEBUG +/* From RFC3986 section 2.2 (reserved characters) + * reserved = gen-delims / sub-delims + * + * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + * + * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + * / "*" / "+" / "," / ";" / "=" + */ +#define URL_RESERVED_S ":/?#[]@!$&'()*+,;=" + +/* From RFC3986 section 2.3 (unreserved characters) + * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + */ +#define URL_UNRESERVED_S "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~" + +/* The characters which should not be percent escaped */ +#define URL_NO_ESCAPE URL_RESERVED_S URL_UNRESERVED_S /** * NetSurf URL object @@ -528,10 +545,8 @@ static nserror nsurl__create_from_section(const char const *url_s, ascii_offset = nsurl__get_ascii_offset(*(pos + 1), *(pos + 2)); - if (ascii_offset <= 0x20 || - strchr(";/?:@&=+$,<>#%\"{}|\\^[]`", - ascii_offset) || - ascii_offset >= 0x7f) { + + if (strchr(URL_UNRESERVED_S, ascii_offset) == NULL) { /* This character should be escaped after all, * just let it get copied */ copy_len += 3; @@ -553,20 +568,20 @@ static nserror nsurl__create_from_section(const char const *url_s, length -= 2; - } else if (isspace(*pos)) { - /* This whitespace needs to be escaped */ + } else if (strchr(URL_NO_ESCAPE, (*pos)) == NULL) { + /* This needs to be escaped */ if (copy_len > 0) { /* Copy up to here */ memcpy(pos_norm, pos_url_s, copy_len); pos_norm += copy_len; copy_len = 0; } - /* escape */ + /* escape */ *(pos_norm++) = '%'; - *(pos_norm++) = digit2lowcase_hex(*pos >> 4); - *(pos_norm++) = digit2lowcase_hex(*pos & 0xf); + *(pos_norm++) = digit2uppercase_hex(((unsigned char)*pos) >> 4); + *(pos_norm++) = digit2uppercase_hex(((unsigned char)*pos) & 0xf); pos_url_s = pos + 1; length += 2; diff --git a/utils/utils.h b/utils/utils.h index 079708843..ffd4f2977 100644 --- a/utils/utils.h +++ b/utils/utils.h @@ -151,6 +151,16 @@ inline static char digit2lowcase_hex(unsigned char digit) { return "0123456789abcdef"[digit]; } +/** + * Return a hex digit for the given numerical value. + * + * \return character in range 0-9A-F + */ +inline static char digit2uppercase_hex(unsigned char digit) { + assert(digit < 16); + return "0123456789ABCDEF"[digit]; +} + /* Platform specific functions */ void die(const char * const error); -- cgit v1.2.3