summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--utils/Makefile3
-rw-r--r--utils/idna.c625
-rw-r--r--utils/idna.h54
-rw-r--r--utils/nsurl.c21
4 files changed, 697 insertions, 6 deletions
diff --git a/utils/Makefile b/utils/Makefile
index c808b91ba..b1a6df4fc 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -2,6 +2,7 @@
S_UTILS := base64.c corestrings.c filename.c filepath.c hashtable.c \
libdom.c locale.c log.c messages.c nsurl.c talloc.c url.c \
- utf8.c utils.c useragent.c bloom.c nsoption.c file.c
+ utf8.c utils.c useragent.c bloom.c nsoption.c file.c idna.c \
+ punycode.c utf8proc.c
S_UTILS := $(addprefix utils/,$(S_UTILS))
diff --git a/utils/idna.c b/utils/idna.c
new file mode 100644
index 000000000..826d041fb
--- /dev/null
+++ b/utils/idna.c
@@ -0,0 +1,625 @@
+/*
+ * Copyright 2014 Chris Young <chris@unsatisfactorysoftware.co.uk>
+ *
+ * This file is part of NetSurf, http://www.netsurf-browser.org/
+ *
+ * NetSurf is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * NetSurf is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/** \file
+ * NetSurf international domain name handling (implementation).
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "utils/errors.h"
+#include "utils/idna.h"
+#include "utils/idna_props.h"
+#include "utils/log.h"
+#include "utils/punycode.h"
+#include "utils/utf8.h"
+#include "utils/utf8proc.h"
+#include "utils/utils.h"
+
+
+int32_t idna_contexto[] = {
+ /* CONTEXTO codepoints which have a rule defined */
+ 0x00b7, 0x0375, 0x05f3, 0x05f4, 0x30fb, 0x0660, 0x0661,
+ 0x0662, 0x0663, 0x0664, 0x0665, 0x0666, 0x0667, 0x0668,
+ 0x0669, 0x06f0, 0x06f1, 0x06f2, 0x06f3, 0x06f4, 0x06f5,
+ 0x06f6, 0x06f7, 0x06f8, 0x06f9, 0
+};
+
+
+/**
+ * Find the IDNA property of a UCS-4 codepoint
+ *
+ * \param cp Unicode codepoint
+ * \return IDNA property
+ */
+static idna_property idna__cp_property(int32_t cp)
+{
+ const idna_table *t;
+
+ t = idna_derived;
+ while (t->p.property) {
+ if ((cp >= t->start) && (cp <= t->end))
+ return t->p.property;
+ t++;
+ };
+
+ return IDNA_P_DISALLOWED;
+}
+
+
+/**
+ * Find the Joining_Type property of a UCS-4 codepoint
+ *
+ * \param cp Unicode codepoint
+ * \return JT property
+ */
+static idna_unicode_jt idna__jt_property(int32_t cp)
+{
+ const idna_table *t;
+
+ t = idna_joiningtype;
+ while (t->p.jt) {
+ if ((cp >= t->start) && (cp <= t->end))
+ return t->p.jt;
+ t++;
+ };
+
+ return IDNA_UNICODE_JT_U;
+}
+
+
+/**
+ * Check if a CONTEXTO codepoint has a rule defined
+ *
+ * \param cp Unicode codepoint
+ * \return true if a rule is defined
+ */
+static bool idna__contexto_rule(int32_t cp)
+{
+ bool rule = false;
+ int32_t *t = idna_contexto;
+ while (*t) {
+ if (*t == cp) rule = true;
+ };
+
+ return rule;
+}
+
+
+/**
+ * Check if a CONTEXTJ codepoint has a rule defined,
+ * and conforms to that rule.
+ *
+ * \param string UCS-4 string
+ * \param index character in the string which is CONTEXTJ
+ * \return true if conforming
+ */
+static bool idna__contextj_rule(int32_t *label, int index, size_t len)
+{
+ const utf8proc_property_t *unicode_props;
+ idna_unicode_jt joining_type;
+ int i;
+ bool match;
+
+ /* These CONTEXTJ rules are defined at
+ * http://www.iana.org/assignments/idna-tables-5.2.0/idna-tables-5.2.0.xml
+ */
+
+ if (label[index] == 0x200c) {
+ if (index == 0) return false; /* No previous character */
+ unicode_props = utf8proc_get_property(label[index - 1]);
+ if (unicode_props->combining_class == UTF8PROC_CCC_VIRAMA)
+ return true;
+
+ match = false;
+ for (i = 0; i < (index - 1); i++) {
+ joining_type = idna__jt_property(label[i]);
+ if (((joining_type == IDNA_UNICODE_JT_L) ||
+ (joining_type == IDNA_UNICODE_JT_D)) &&
+ (idna__jt_property(label[i+1]) == IDNA_UNICODE_JT_T)) {
+ match = true;
+ break;
+ }
+ }
+
+ if (match == false) return false;
+
+ if (idna__jt_property(label[index+1]) != IDNA_UNICODE_JT_T)
+ return false;
+
+ for (i = (index + 1); i < (int)len; i++) {
+ joining_type = idna__jt_property(label[i]);
+ if ((joining_type == IDNA_UNICODE_JT_R) ||
+ (joining_type == IDNA_UNICODE_JT_D)) {
+ return true;
+ break;
+ }
+ }
+
+ return false;
+
+ } else if (label[index] == 0x200d) {
+ if (index == 0) return false; /* No previous character */
+ unicode_props = utf8proc_get_property(label[index - 1]);
+ if (unicode_props->combining_class == UTF8PROC_CCC_VIRAMA)
+ return true;
+ return false;
+ }
+
+ /* No rule defined */
+ return false;
+}
+
+
+/**
+ * Convert a UTF-8 string to UCS-4
+ *
+ * \param utf8_label UTF-8 string containing host label
+ * \param len Length of host label (in bytes)
+ * \param ucs4_label Pointer to update with the output
+ * \param ucs4_len Pointer to update with the length
+ * \return NSERROR_OK on success, appropriate error otherwise
+ *
+ * If return value != NSERROR_OK, output will be left untouched.
+ */
+static nserror idna__utf8_to_ucs4(const char *utf8_label, size_t len, int32_t **ucs4_label, size_t *ucs4_len)
+{
+ int32_t *nfc_label;
+ ssize_t nfc_size;
+
+ nfc_label = malloc(len * 4);
+ if (nfc_label == NULL) return NSERROR_NOMEM;
+
+ nfc_size = utf8proc_decompose((const uint8_t *)utf8_label, len,
+ nfc_label, len * 4, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
+ if(nfc_size < 0) return NSERROR_NOMEM;
+
+ nfc_size = utf8proc_normalise(nfc_label, nfc_size,
+ UTF8PROC_STABLE | UTF8PROC_COMPOSE);
+ if(nfc_size < 0) return NSERROR_NOMEM;
+
+ *ucs4_label = nfc_label;
+ *ucs4_len = nfc_size;
+ return NSERROR_OK;
+}
+
+
+/**
+ * Convert a UCS-4 string to UTF-8
+ *
+ * \param ucs4_label UCS-4 string containing host label
+ * \param ucs4_len Length of host label (in bytes)
+ * \param utf8_label Pointer to update with the output
+ * \param utf8_len Pointer to update with the length
+ * \return NSERROR_OK on success, appropriate error otherwise
+ *
+ * If return value != NSERROR_OK, output will be left untouched.
+ */
+static nserror idna__ucs4_to_utf8(const int32_t *ucs4_label, size_t ucs4_len, char **utf8_label, size_t *utf8_len)
+{
+ int32_t *nfc_label;
+ ssize_t nfc_size = ucs4_len;
+
+ nfc_label = malloc(1 + ucs4_len * 4);
+ if (nfc_label == NULL) return NSERROR_NOMEM;
+ memcpy(nfc_label, ucs4_label, ucs4_len * 4);
+
+ nfc_size = utf8proc_reencode(nfc_label, ucs4_len,
+ UTF8PROC_STABLE | UTF8PROC_COMPOSE);
+ if(nfc_size < 0) return NSERROR_NOMEM;
+
+ *utf8_label = (char *)nfc_label;
+ *utf8_len = nfc_size;
+
+ return NSERROR_OK;
+}
+
+
+/**
+ * Convert a host label in UCS-4 to an ACE version
+ *
+ * \param ucs4_label UCS-4 NFC string containing host label
+ * \param len Length of host label (in characters/codepoints)
+ * \param ace_label ASCII-compatible encoded version
+ * \param out_len Length of ace_label
+ * \return NSERROR_OK on success, appropriate error otherwise
+ *
+ * If return value != NSERROR_OK, output will be left untouched.
+ */
+static nserror idna__ucs4_to_ace(int32_t *ucs4_label, size_t len, char **ace_label, size_t *out_len)
+{
+ char punycode[65]; /* max length of host label + NULL */
+ enum punycode_status status;
+ size_t output_length = 60; /* punycode length - 4 - 1 */
+
+ punycode[0] = 'x';
+ punycode[1] = 'n';
+ punycode[2] = '-';
+ punycode[3] = '-';
+
+ status = punycode_encode(len, (const punycode_uint *)ucs4_label,
+ NULL, &output_length, punycode + 4);
+ if (status == punycode_bad_input) {
+ LOG(("Bad input"));
+ return NSERROR_BAD_ENCODING;
+ } else if (status == punycode_big_output) {
+ LOG(("Output too big"));
+ return NSERROR_NOMEM;
+ } else if (status == punycode_overflow) {
+ LOG(("Overflow"));
+ return NSERROR_NOMEM;
+ }
+
+ output_length += SLEN("xn--");
+ punycode[output_length] = '\0';
+
+ *ace_label = strdup(punycode);
+ *out_len = output_length;
+
+ return NSERROR_OK;
+}
+
+
+/**
+ * Convert a host label in ACE format to UCS-4
+ *
+ * \param ace_label ASCII string containing host label
+ * \param ace_len Length of host label
+ * \param ucs4_label Pointer to hold UCS4 decoded version
+ * \param ucs4_len Pointer to hold length of ucs4_label
+ * \return NSERROR_OK on success, appropriate error otherwise
+ *
+ * If return value != NSERROR_OK, output will be left untouched.
+ */
+static nserror idna__ace_to_ucs4(const char *ace_label, size_t ace_len, int32_t **ucs4_label, size_t *ucs4_len)
+{
+ int32_t *ucs4;
+ enum punycode_status status;
+ size_t output_length = ace_len; /* never exceeds input length */
+
+ /* The header should always have been checked before calling */
+ assert((ace_label[0] == 'x') && (ace_label[1] == 'n') &&
+ (ace_label[2] == '-') && (ace_label[3] == '-'));
+
+ ucs4 = malloc(output_length * 4);
+ if (ucs4 == NULL) return NSERROR_NOMEM;
+
+ status = punycode_decode(ace_len - 4, ace_label + 4,
+ &output_length, (punycode_uint *)ucs4, NULL);
+
+ if (status == punycode_bad_input) {
+ LOG(("Bad input"));
+ return NSERROR_BAD_ENCODING;
+ } else if (status == punycode_big_output) {
+ LOG(("Output too big"));
+ return NSERROR_NOMEM;
+ } else if (status == punycode_overflow) {
+ LOG(("Overflow"));
+ return NSERROR_NOMEM;
+ }
+
+ ucs4[output_length] = '\0';
+
+ *ucs4_label = ucs4;
+ *ucs4_len = output_length;
+
+ return NSERROR_OK;
+}
+
+
+/**
+ * Find the length of a host label
+ *
+ * \param host String containing a host or FQDN
+ * \param max_length Length of host string to search (in bytes)
+ * \return Distance to next separator character or end of string
+ */
+static size_t idna__host_label_length(const char *host, size_t max_length)
+{
+ const char *p = host;
+ size_t length = 0;
+
+ while (length < max_length) {
+ if ((*p == '.') || (*p == ':') || (*p == '\0')) break;
+ length++;
+ p++;
+ }
+
+ return length;
+}
+
+
+/**
+ * Check if a host label is valid for IDNA2008
+ *
+ * \param label Host label to check (UCS-4)
+ * \param len Length of host label (in characters/codepoints)
+ * \return true if compliant, false otherwise
+ */
+static bool idna__is_valid(int32_t *label, size_t len)
+{
+ const utf8proc_property_t *unicode_props;
+ idna_property idna_prop;
+ size_t i = 0;
+
+ /* 1. Check that the string is NFC.
+ * This check is skipped as the conversion to Unicode
+ * does normalisation as part of the conversion.
+ */
+
+ /* 2. Check characters 3 and 4 are not '--'. */
+ if ((label[2] == 0x002d) && (label[3] == 0x002d)) {
+ LOG(("Check failed: characters 2 and 3 are '--'"));
+ return false;
+ }
+
+ /* 3. Check the first character is not a combining mark */
+ unicode_props = utf8proc_get_property(label[0]);
+
+ if ((unicode_props->category == UTF8PROC_CATEGORY_MN) ||
+ (unicode_props->category == UTF8PROC_CATEGORY_MC) ||
+ (unicode_props->category == UTF8PROC_CATEGORY_ME)) {
+ LOG(("Check failed: character 0 is a combining mark"));
+ return false;
+ }
+
+ for (i = 0; i < len; i++) {
+ idna_prop = idna__cp_property(label[i]);
+
+ /* 4. Check characters not DISALLOWED by RFC5892 */
+ if (idna_prop == IDNA_P_DISALLOWED) {
+ LOG(("Check failed: character %d (%x) is DISALLOWED", i, label[i]));
+ return false;
+ }
+
+ /* 5. Check CONTEXTJ characters conform to defined rules */
+ if (idna_prop == IDNA_P_CONTEXTJ) {
+ if (idna__contextj_rule(label, i, len) == false) {
+ LOG(("Check failed: character %d (%x) does not conform to CONTEXTJ rule", i, label[i]));
+ return false;
+ }
+ }
+
+ /* 6. Check CONTEXTO characters have a rule defined */
+ /*\todo optionally we can check conformance to this rule */
+ if (idna_prop == IDNA_P_CONTEXTO) {
+ if (idna__contexto_rule(label[i]) == false) {
+ LOG(("Check failed: character %d (%x) has no CONTEXTO rule defined", i, label[i]));
+ return false;
+ }
+ }
+
+ /* 7. Check characters are not UNASSIGNED */
+ if (idna_prop == IDNA_P_UNASSIGNED) {
+ LOG(("Check failed: character %d (%x) is UNASSIGNED", i, label[i]));
+ return false;
+ }
+
+ /*\todo 8. (optionally) check Bidi compliance */
+ }
+
+ return true;
+}
+
+
+/**
+ * Check if a host label is LDH
+ *
+ * \param label Host label to check
+ * \param len Length of host label
+ * \return true if LDH compliant, false otherwise
+ */
+static bool idna__is_ldh(const char *label, size_t len)
+{
+ const char *p = label;
+ size_t i = 0;
+
+ /* Check for leading or trailing hyphens */
+ if ((p[0] == '-') || (p[len - 1] == '-'))
+ return false;
+
+ /* Check for non-alphanumeric, non-hyphen characters */
+ for (i = 0; i < len; p++) {
+ i++;
+ if (*p == '-') continue;
+ if ((*p >= '0') && (*p <= '9')) continue;
+ if ((*p >= 'a') && (*p <= 'z')) continue;
+ if ((*p >= 'A') && (*p <= 'Z')) continue;
+
+ return false;
+ }
+
+ return true;
+}
+
+
+/**
+ * Check if a host label appears to be ACE
+ *
+ * \param label Host label to check
+ * \param len Length of host label
+ * \return true if ACE compliant, false otherwise
+ */
+static bool idna__is_ace(const char *label, size_t len)
+{
+ /* Check it is a valid DNS string */
+ if (idna__is_ldh(label, len) == false)
+ return false;
+
+ /* Check the ACE prefix is present */
+ if ((label[0] == 'x') && (label[1] == 'n') &&
+ (label[2] == '-') && (label[3] == '-'))
+ return true;
+
+ return false;
+}
+
+
+/**
+ * Verify an ACE label is valid
+ *
+ * \param label Host label to check
+ * \param len Length of label
+ * \return true if valid, false otherwise
+ */
+static bool idna__verify(const char *label, size_t len)
+{
+ nserror error;
+ int32_t *ucs4;
+ char *ace;
+ size_t ucs4_len, ace_len;
+
+ error = idna__ace_to_ucs4(label, len,
+ &ucs4, &ucs4_len);
+ if (error != NSERROR_OK) return false;
+
+ error = idna__ucs4_to_ace(ucs4, ucs4_len,
+ &ace, &ace_len);
+ free(ucs4);
+ if (error != NSERROR_OK) return false;
+
+ if ((len == ace_len) && (strncmp(label, ace, len) == 0)) {
+ free(ace);
+ return true;
+ }
+
+ LOG(("Re-encoded ACE label %s does not match input", ace));
+ free(ace);
+
+ return false;
+}
+
+
+/* exported interface documented in idna.h */
+nserror idna_encode(const char *host, size_t len, char **ace_host, size_t *ace_len)
+{
+ nserror error;
+ int32_t *ucs4_host;
+ size_t label_len, output_len, ucs4_len, fqdn_len = 0;
+ char fqdn[256];
+ char *output, *fqdn_p = fqdn;
+
+ while ((label_len = idna__host_label_length(host, len)) != 0) {
+ if (idna__is_ldh(host, label_len) == false) {
+ /* This string is IDN or invalid */
+
+ /* Convert to Unicode */
+ if ((error = idna__utf8_to_ucs4(host, label_len,
+ &ucs4_host, &ucs4_len)) != NSERROR_OK)
+ return error;
+
+ /* Check this is valid for conversion */
+ if (idna__is_valid(ucs4_host, ucs4_len) == false)
+ return NSERROR_BAD_URL;
+
+ /* Convert to ACE */
+ error = idna__ucs4_to_ace(ucs4_host, ucs4_len,
+ &output, &output_len);
+ free(ucs4_host);
+ if (error != NSERROR_OK) return error;
+ strncpy(fqdn_p, output, output_len);
+ free(output);
+ fqdn_p += output_len;
+ fqdn_len += output_len;
+ } else {
+ /* This is already a DNS-valid ASCII string */
+ if ((idna__is_ace(host, label_len) == true) &&
+ (idna__verify(host, label_len) == false)) {
+ LOG(("Cannot verify ACE label %s", host));
+ return NSERROR_BAD_URL;
+ }
+ strncpy(fqdn_p, host, label_len);
+ fqdn_p += label_len;
+ fqdn_len += label_len;
+ }
+
+ *fqdn_p = '.';
+ fqdn_p++;
+ fqdn_len++;
+
+ host += label_len;
+ if ((*host == '\0') || (*host == ':')) break;
+ host++;
+ len = len - label_len - 1;
+ }
+
+ fqdn_p--;
+ *fqdn_p = '\0';
+ *ace_host = strdup(fqdn);
+ *ace_len = fqdn_len - 1; /* last character is NULL */
+
+ return NSERROR_OK;
+}
+
+
+/* exported interface documented in idna.h */
+nserror idna_decode(const char *ace_host, size_t ace_len, char **host, size_t *host_len)
+{
+ nserror error;
+ int32_t *ucs4_host;
+ size_t label_len, output_len, ucs4_len, fqdn_len = 0;
+ char fqdn[256];
+ char *output, *fqdn_p = fqdn;
+
+ while ((label_len = idna__host_label_length(ace_host, ace_len)) != 0) {
+ if (idna__is_ace(ace_host, label_len) == true) {
+ /* This string is DNS-valid and (probably) encoded */
+
+ /* Decode to Unicode */
+ error = idna__ace_to_ucs4(ace_host, label_len,
+ &ucs4_host, &ucs4_len);
+ if (error != NSERROR_OK) return error;
+
+ /* Convert to UTF-8 */
+ if ((error = idna__ucs4_to_utf8(ucs4_host, ucs4_len,
+ &output, &output_len)) != NSERROR_OK)
+ return error;
+
+ free(ucs4_host);
+ memcpy(fqdn_p, output, output_len * 4);
+ free(output);
+ fqdn_p += output_len;
+ fqdn_len += output_len;
+ } else {
+ /* Not ACE */
+ memcpy(fqdn_p, ace_host, label_len);
+ fqdn_p += label_len;
+ fqdn_len += label_len;
+ }
+
+ *fqdn_p = '.';
+ fqdn_p++;
+ fqdn_len++;
+
+ ace_host += label_len;
+ if ((*ace_host == '\0') || (*ace_host == ':')) break;
+ ace_host++;
+ ace_len = ace_len - label_len - 1;
+ }
+
+ fqdn_p--;
+ *fqdn_p = '\0';
+ *host = strdup(fqdn);
+ *host_len = fqdn_len - 1; /* last character is NULL */
+
+ return NSERROR_OK;
+}
+
diff --git a/utils/idna.h b/utils/idna.h
new file mode 100644
index 000000000..c59a22763
--- /dev/null
+++ b/utils/idna.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2014 Chris Young <chris@unsatisfactorysoftware.co.uk>
+ *
+ * This file is part of NetSurf, http://www.netsurf-browser.org/
+ *
+ * NetSurf is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * NetSurf is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/** \file
+ * NetSurf international domain name handling (interface).
+ */
+
+#ifndef _NETSURF_UTILS_IDNA_H_
+#define _NETSURF_UTILS_IDNA_H_
+
+/**
+ * Convert a hostname to an ACE version suitable for DNS lookup
+ *
+ * \param host String containing host
+ * \param len Length of host string
+ * \param ace_host Pointer to update with the output
+ * \param ace_len Pointer to update with length of ace_host
+ * \return NSERROR_OK on success, appropriate error otherwise
+ *
+ * If return value != NSERROR_OK, output will be left untouched.
+ */
+nserror idna_encode(const char *host, size_t len, char **ace_host, size_t *ace_len);
+
+
+/**
+ * Convert a hostname from ACE to UTF-8 suitable for display
+ *
+ * \param ace_host String containing host
+ * \param ace_len Length of host string
+ * \param host Pointer to update with the output
+ * \param host_len Pointer to update with length of host
+ * \return NSERROR_OK on success, appropriate error otherwise
+ *
+ * If return value != NSERROR_OK, output will be left untouched.
+ */
+nserror idna_decode(const char *ace_host, size_t ace_len, char **host, size_t *host_len);
+
+#endif
+
diff --git a/utils/nsurl.c b/utils/nsurl.c
index 5c0a48511..7e8792cff 100644
--- a/utils/nsurl.c
+++ b/utils/nsurl.c
@@ -28,6 +28,7 @@
#include "utils/corestrings.h"
#include "utils/errors.h"
+#include "utils/idna.h"
#include "utils/log.h"
#include "utils/nsurl.h"
#include "utils/utils.h"
@@ -690,8 +691,10 @@ static nserror nsurl__create_from_section(const char * const url_s,
const char *pos;
const char *pos_url_s;
char *norm_start = pos_norm;
+ char *host;
size_t copy_len;
size_t length;
+ size_t host_len;
enum {
NSURL_F_NO_PORT = (1 << 0)
} flags = 0;
@@ -756,7 +759,8 @@ static nserror nsurl__create_from_section(const char * const url_s,
continue;
}
- if (nsurl__is_unreserved(ascii_offset) == false) {
+ if ((section != URL_SCHEME && section != URL_HOST) &&
+ (nsurl__is_unreserved(ascii_offset) == false)) {
/* This character should be escaped after all,
* just let it get copied */
copy_len += 3;
@@ -778,7 +782,8 @@ static nserror nsurl__create_from_section(const char * const url_s,
length -= 2;
- } else if (nsurl__is_no_escape(*pos) == false) {
+ } else if ((section != URL_SCHEME && section != URL_HOST) &&
+ (nsurl__is_no_escape(*pos) == false)) {
/* This needs to be escaped */
if (copy_len > 0) {
@@ -955,9 +960,15 @@ static nserror nsurl__create_from_section(const char * const url_s,
}
/* host */
- if (lwc_intern_string(norm_start, length,
- &url->host) != lwc_error_ok) {
- return NSERROR_NOMEM;
+ /* Encode host according to IDNA2008 */
+ if (idna_encode(norm_start, length, &host, &host_len) == NSERROR_OK) {
+ if (lwc_intern_string(host, host_len,
+ &url->host) != lwc_error_ok) {
+ return NSERROR_NOMEM;
+ }
+ free(host);
+ } else {
+ return NSERROR_BAD_URL;
}
}