summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--desktop/browser.c9
-rw-r--r--render/form.c17
-rw-r--r--riscos/menus.c19
-rw-r--r--riscos/window.c18
-rw-r--r--utils/utf8.c67
-rw-r--r--utils/utf8.h12
-rw-r--r--utils/utils.c34
-rw-r--r--utils/utils.h3
8 files changed, 89 insertions, 90 deletions
diff --git a/desktop/browser.c b/desktop/browser.c
index 49d81dadd..13ca3cab9 100644
--- a/desktop/browser.c
+++ b/desktop/browser.c
@@ -41,6 +41,7 @@
#include "netsurf/utils/talloc.h"
#include "netsurf/utils/url.h"
#include "netsurf/utils/utils.h"
+#include "netsurf/utils/utf8.h"
/** browser window which is being redrawn. Valid only during redraw. */
@@ -480,12 +481,16 @@ void browser_window_update(struct browser_window *bw,
char *title_local_enc;
struct box *pos;
int x, y;
+ utf8_convert_ret err;
if (!bw->current_content)
return;
- if (bw->current_content->title != NULL
- && (title_local_enc = cnv_str_local_enc(bw->current_content->title)) != NULL) {
+ if (bw->current_content->title != NULL) {
+ err = utf8_to_enc(bw->current_content->title,
+ local_encoding_name(), 0, &title_local_enc);
+ /* this should never fail */
+ assert(err == UTF8_CONVERT_OK);
gui_window_set_title(bw->window, title_local_enc);
free(title_local_enc);
} else
diff --git a/render/form.c b/render/form.c
index 40c761623..6e213cb7c 100644
--- a/render/form.c
+++ b/render/form.c
@@ -486,19 +486,28 @@ char *form_url_encode(struct form *form,
char *name, *value, *n_temp, *v_temp;
char *s = malloc(1), *s2;
unsigned int len = 0, len1;
+ utf8_convert_ret err;
if (!s)
return 0;
s[0] = 0;
for (; control; control = control->next) {
- n_temp = utf8_to_enc(control->name, form->charset, 0);
- if (!n_temp) {
+ /** \todo fallback to document encoding or 8859-1 as
+ * last resort.
+ * What would also be an improvement would be to choose
+ * an encoding acceptable by the server which covers as much
+ * of the input values as possible. Additionally, we need to
+ * handle the case where none of the acceptable encodings
+ * cover all the textual input values.
+ */
+ err = utf8_to_enc(control->name, form->charset, 0, &n_temp);
+ if (err != UTF8_CONVERT_OK) {
free(s);
return 0;
}
- v_temp = utf8_to_enc(control->value, form->charset, 0);
- if (!v_temp) {
+ err = utf8_to_enc(control->value, form->charset, 0, &v_temp);
+ if (err != UTF8_CONVERT_OK) {
free(n_temp);
free(s);
return 0;
diff --git a/riscos/menus.c b/riscos/menus.c
index 0fb7cec1f..e15c638d5 100644
--- a/riscos/menus.c
+++ b/riscos/menus.c
@@ -32,6 +32,7 @@
#include "netsurf/utils/log.h"
#include "netsurf/utils/messages.h"
#include "netsurf/utils/utils.h"
+#include "netsurf/utils/utf8.h"
struct ns_menu_entry {
@@ -534,7 +535,7 @@ void ro_gui_menu_closed(void) {
void ro_gui_menu_objects_moved(void) {
gui_form_select_control = NULL;
current_menu_object_box = NULL;
-
+
ro_gui_menu_prepare_action(0, BROWSER_OBJECT, false);
if ((current_menu) && (current_menu == gui_form_select_menu))
ro_gui_menu_closed();
@@ -566,6 +567,7 @@ void ro_gui_menu_selection(wimp_selection *selection) {
entries[selection->items[i]];
action = ro_gui_menu_find_action(current_menu, menu_entry);
+
/* perform menu action */
if (action != NO_ACTION)
ro_gui_menu_handle_action(current_menu_window, action, false);
@@ -904,6 +906,7 @@ void gui_create_form_select_menu(struct browser_window *bw,
wimp_pointer pointer;
os_error *error;
bool reopen = true;
+ utf8_convert_ret err;
assert(control);
@@ -959,13 +962,11 @@ void gui_create_form_select_menu(struct browser_window *bw,
wimp_ICON_FG_COLOUR_SHIFT) |
(wimp_COLOUR_WHITE <<
wimp_ICON_BG_COLOUR_SHIFT);
- text_convert = cnv_str_local_enc(option->text);
- if (!text_convert) {
- LOG(("cnv_str_local_enc failed."));
- warn_user("NoMemory", 0);
- ro_gui_menu_closed();
- return;
- }
+ err = utf8_to_enc(option->text,
+ local_encoding_name(), 0, &text_convert);
+ /* this should never fail */
+ assert(err == UTF8_CONVERT_OK);
+
gui_form_select_menu->entries[i].data.indirected_text.text =
text_convert;
/* convert spaces to hard spaces to stop things like 'Go Home'
@@ -2004,7 +2005,7 @@ int ro_gui_menu_get_checksum(void) {
int i = 0, j, checksum = 0;
os_error *error;
wimp_menu *menu;
-
+
if (!current_menu_open)
return 0;
diff --git a/riscos/window.c b/riscos/window.c
index 88e847bd8..01c7ce8c2 100644
--- a/riscos/window.c
+++ b/riscos/window.c
@@ -884,23 +884,19 @@ void gui_window_set_extent(struct gui_window *g, int width, int height)
void gui_window_set_status(struct gui_window *g, const char *text)
{
char *local_text;
+ utf8_convert_ret err;
if ((!g->toolbar) || (!g->toolbar->status_handle))
return;
/* convert text to local encoding */
- local_text = cnv_str_local_enc(text);
- if (!local_text) {
- LOG(("failed converting '%s' to local encoding", text));
- /* just use the UTF-8 text */
- ro_gui_set_icon_string(g->toolbar->status_handle,
- ICON_STATUS_TEXT, text);
- }
- else {
- ro_gui_set_icon_string(g->toolbar->status_handle,
+ err = utf8_to_enc(text, local_encoding_name(), 0, &local_text);
+ /* this should never fail */
+ assert(err == UTF8_CONVERT_OK);
+
+ ro_gui_set_icon_string(g->toolbar->status_handle,
ICON_STATUS_TEXT, local_text);
- free(local_text);
- }
+ free(local_text);
}
diff --git a/utils/utf8.c b/utils/utf8.c
index b2a219ced..3763b3af0 100644
--- a/utils/utf8.c
+++ b/utils/utf8.c
@@ -10,23 +10,23 @@
*/
#include <assert.h>
+#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <iconv.h>
+#include "netsurf/utils/log.h"
#include "netsurf/utils/utf8.h"
-static char *utf8_convert(const char *string, size_t len, const char *from,
- const char *to);
+static utf8_convert_ret utf8_convert(const char *string, size_t len,
+ const char *from, const char *to, char **result);
/**
* Convert a UTF-8 multibyte sequence into a single UCS4 character
*
* Encoding of UCS values outside the UTF-16 plane has been removed from
- * RFC3629. This function conforms to RFC2279, however, as it is possible
- * that the platform specific keyboard input handler will generate a UCS4
- * value outside the UTF-16 plane.
+ * RFC3629. This function conforms to RFC2279, however.
*
* \param s The sequence to process
* \param l Length of sequence
@@ -72,9 +72,7 @@ size_t utf8_to_ucs4(const char *s, size_t l)
* Convert a single UCS4 character into a UTF-8 multibyte sequence
*
* Encoding of UCS values outside the UTF-16 plane has been removed from
- * RFC3629. This function conforms to RFC2279, however, as it is possible
- * that the platform specific keyboard input handler will generate a UCS4
- * value outside the UTF-16 plane.
+ * RFC3629. This function conforms to RFC2279, however.
*
* \param c The character to process (0 <= c <= 0x7FFFFFFF)
* \param s Pointer to 6 byte long output buffer
@@ -207,24 +205,28 @@ size_t utf8_next(const char *s, size_t l, size_t o)
* \param string The NULL-terminated string to convert
* \param encname The encoding name (suitable for passing to iconv)
* \param len Length of input string to consider (in bytes), or 0
- * \return Pointer to converted string (on heap) or NULL on error
+ * \param result Pointer to location to store result (allocated on heap)
+ * \return Appropriate utf8_convert_ret value
*/
-char *utf8_to_enc(const char *string, const char *encname, size_t len)
+utf8_convert_ret utf8_to_enc(const char *string, const char *encname,
+ size_t len, char **result)
{
- return utf8_convert(string, len, "UTF-8", encname);
+ return utf8_convert(string, len, "UTF-8", encname, result);
}
/**
- * Convert a UTF8 string into the named encoding
+ * Convert a string in the named encoding into a UTF-8 string
*
* \param string The NULL-terminated string to convert
* \param encname The encoding name (suitable for passing to iconv)
* \param len Length of input string to consider (in bytes), or 0
- * \return Pointer to converted string (on heap) or NULL on error
+ * \param result Pointer to location to store result (allocated on heap)
+ * \return Appropriate utf8_convert_ret value
*/
-char *utf8_from_enc(const char *string, const char *encname, size_t len)
+utf8_convert_ret utf8_from_enc(const char *string, const char *encname,
+ size_t len, char **result)
{
- return utf8_convert(string, len, encname, "UTF-8");
+ return utf8_convert(string, len, encname, "UTF-8", result);
}
/**
@@ -234,23 +236,27 @@ char *utf8_from_enc(const char *string, const char *encname, size_t len)
* \param len Length of input string to consider (in bytes)
* \param from The encoding name to convert from
* \param to The encoding name to convert to
- * \return Pointer to converted string (on heap) or NULL on error
+ * \param result Pointer to location in which to store result
+ * \return Appropriate utf8_convert_ret value
*/
-char *utf8_convert(const char *string, size_t len, const char *from,
- const char *to)
+utf8_convert_ret utf8_convert(const char *string, size_t len,
+ const char *from, const char *to, char **result)
{
iconv_t cd;
char *ret, *temp, *out, *in;
size_t slen, rlen;
- if (!string || !from || !to)
- return NULL;
+ assert(string && from && to && result);
in = (char *)string;
cd = iconv_open(to, from);
- if (cd == (iconv_t)-1)
- return NULL;
+ if (cd == (iconv_t)-1) {
+ if (errno == EINVAL)
+ return UTF8_CONVERT_BADENC;
+ /* default to no memory */
+ return UTF8_CONVERT_NOMEM;
+ }
slen = len ? len : strlen(string);
/* Worst case = ACSII -> UCS4, so allocate an output buffer
@@ -262,14 +268,19 @@ char *utf8_convert(const char *string, size_t len, const char *from,
temp = out = calloc(rlen, sizeof(char));
if (!out) {
iconv_close(cd);
- return NULL;
+ return UTF8_CONVERT_NOMEM;
}
/* perform conversion */
if (iconv(cd, &in, &slen, &out, &rlen) == (size_t)-1) {
free(temp);
iconv_close(cd);
- return NULL;
+ /** \todo handle the various cases properly
+ * There are 3 possible error cases:
+ * a) Insufficiently large output buffer
+ * b) Invalid input byte sequence
+ * c) Incomplete input sequence */
+ return UTF8_CONVERT_NOMEM;
}
iconv_close(cd);
@@ -277,12 +288,18 @@ char *utf8_convert(const char *string, size_t len, const char *from,
if (rlen > 64 /* allow 64bytes wasted space */) {
/* and allocate a more sensibly sized output buffer */
ret = calloc(out - temp + 4, sizeof(char));
+ if (!ret) {
+ free(temp);
+ return UTF8_CONVERT_NOMEM;
+ }
memcpy(ret, temp, out - temp);
free(temp);
}
else
ret = temp;
- return ret;
+ *result = ret;
+
+ return UTF8_CONVERT_OK;
}
diff --git a/utils/utf8.h b/utils/utf8.h
index 02ff0322d..56d2534a5 100644
--- a/utils/utf8.h
+++ b/utils/utf8.h
@@ -12,6 +12,12 @@
#ifndef _NETSURF_UTILS_UTF8_H_
#define _NETSURF_UTILS_UTF8_H_
+typedef enum {
+ UTF8_CONVERT_OK,
+ UTF8_CONVERT_NOMEM,
+ UTF8_CONVERT_BADENC
+} utf8_convert_ret;
+
size_t utf8_to_ucs4(const char *s, size_t l);
size_t utf8_from_ucs4(size_t c, char *s);
@@ -20,7 +26,9 @@ size_t utf8_length(const char *s);
size_t utf8_prev(const char *s, size_t o);
size_t utf8_next(const char *s, size_t l, size_t o);
-char *utf8_to_enc(const char *string, const char *encname, size_t len);
-char *utf8_from_enc(const char *string, const char *encname, size_t len);
+utf8_convert_ret utf8_to_enc(const char *string, const char *encname,
+ size_t len, char **result);
+utf8_convert_ret utf8_from_enc(const char *string, const char *encname,
+ size_t len, char **result);
#endif
diff --git a/utils/utils.c b/utils/utils.c
index 03145df2d..0632b2318 100644
--- a/utils/utils.c
+++ b/utils/utils.c
@@ -104,40 +104,6 @@ char *cnv_space2nbsp(const char *s)
}
/**
- * Convert local encoding to NUL terminated UTF-8 string.
- * Caller needs to free return value.
- *
- * \param s string in local machine encoding. NUL or length terminated (which comes first).
- * \param length maximum number of bytes to consider at s.
- * \return malloc()'ed NUL termined string in UTF-8 encoding.
- */
-char *cnv_local_enc_str(const char *s, size_t length)
-{
- return utf8_from_enc(s, local_encoding_name(), length);
-}
-
-
-/**
- * Converts NUL terminated UTF-8 string <s> to the machine local encoding.
- * Caller needs to free return value.
- */
-char *cnv_str_local_enc(const char *s)
-{
- return cnv_strn_local_enc(s, 0);
-}
-
-
-/**
- * Converts UTF-8 string <s> of <length> bytes to the machine local encoding.
- * Caller needs to free return value.
- */
-char *cnv_strn_local_enc(const char *s, int length)
-{
- return utf8_to_enc(s, local_encoding_name(), length);
-}
-
-
-/**
* Check if a directory exists.
*/
diff --git a/utils/utils.h b/utils/utils.h
index 6f085af73..27c4be7d6 100644
--- a/utils/utils.h
+++ b/utils/utils.h
@@ -51,9 +51,6 @@ char * strip(char * const s);
int whitespace(const char * str);
char * squash_whitespace(const char * s);
char *cnv_space2nbsp(const char *s);
-char *cnv_local_enc_str(const char *s, size_t length);
-char *cnv_str_local_enc(const char *s);
-char *cnv_strn_local_enc(const char *s, int length);
bool is_dir(const char *path);
void regcomp_wrapper(regex_t *preg, const char *regex, int cflags);
void clean_cookiejar(void);