From 0bcfdbeb50b2435b848ea1fd50ccc79ba64fd129 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Tue, 7 Jun 2005 21:29:26 +0000 Subject: [project @ 2005-06-07 21:29:26 by jmb] Lose cnv_str_local_enc and friends. UTF-8 conversion functions now return an enumerated type allowing for fallbacks, if appropriate. svn path=/import/netsurf/; revision=1744 --- desktop/browser.c | 9 ++++++-- render/form.c | 17 ++++++++++---- riscos/menus.c | 19 ++++++++-------- riscos/window.c | 18 ++++++--------- utils/utf8.c | 67 ++++++++++++++++++++++++++++++++++--------------------- utils/utf8.h | 12 ++++++++-- utils/utils.c | 34 ---------------------------- utils/utils.h | 3 --- 8 files changed, 89 insertions(+), 90 deletions(-) diff --git a/desktop/browser.c b/desktop/browser.c index 49d81dadd..13ca3cab9 100644 --- a/desktop/browser.c +++ b/desktop/browser.c @@ -41,6 +41,7 @@ #include "netsurf/utils/talloc.h" #include "netsurf/utils/url.h" #include "netsurf/utils/utils.h" +#include "netsurf/utils/utf8.h" /** browser window which is being redrawn. Valid only during redraw. */ @@ -480,12 +481,16 @@ void browser_window_update(struct browser_window *bw, char *title_local_enc; struct box *pos; int x, y; + utf8_convert_ret err; if (!bw->current_content) return; - if (bw->current_content->title != NULL - && (title_local_enc = cnv_str_local_enc(bw->current_content->title)) != NULL) { + if (bw->current_content->title != NULL) { + err = utf8_to_enc(bw->current_content->title, + local_encoding_name(), 0, &title_local_enc); + /* this should never fail */ + assert(err == UTF8_CONVERT_OK); gui_window_set_title(bw->window, title_local_enc); free(title_local_enc); } else diff --git a/render/form.c b/render/form.c index 40c761623..6e213cb7c 100644 --- a/render/form.c +++ b/render/form.c @@ -486,19 +486,28 @@ char *form_url_encode(struct form *form, char *name, *value, *n_temp, *v_temp; char *s = malloc(1), *s2; unsigned int len = 0, len1; + utf8_convert_ret err; if (!s) return 0; s[0] = 0; for (; control; control = control->next) { - n_temp = utf8_to_enc(control->name, form->charset, 0); - if (!n_temp) { + /** \todo fallback to document encoding or 8859-1 as + * last resort. + * What would also be an improvement would be to choose + * an encoding acceptable by the server which covers as much + * of the input values as possible. Additionally, we need to + * handle the case where none of the acceptable encodings + * cover all the textual input values. + */ + err = utf8_to_enc(control->name, form->charset, 0, &n_temp); + if (err != UTF8_CONVERT_OK) { free(s); return 0; } - v_temp = utf8_to_enc(control->value, form->charset, 0); - if (!v_temp) { + err = utf8_to_enc(control->value, form->charset, 0, &v_temp); + if (err != UTF8_CONVERT_OK) { free(n_temp); free(s); return 0; diff --git a/riscos/menus.c b/riscos/menus.c index 0fb7cec1f..e15c638d5 100644 --- a/riscos/menus.c +++ b/riscos/menus.c @@ -32,6 +32,7 @@ #include "netsurf/utils/log.h" #include "netsurf/utils/messages.h" #include "netsurf/utils/utils.h" +#include "netsurf/utils/utf8.h" struct ns_menu_entry { @@ -534,7 +535,7 @@ void ro_gui_menu_closed(void) { void ro_gui_menu_objects_moved(void) { gui_form_select_control = NULL; current_menu_object_box = NULL; - + ro_gui_menu_prepare_action(0, BROWSER_OBJECT, false); if ((current_menu) && (current_menu == gui_form_select_menu)) ro_gui_menu_closed(); @@ -566,6 +567,7 @@ void ro_gui_menu_selection(wimp_selection *selection) { entries[selection->items[i]]; action = ro_gui_menu_find_action(current_menu, menu_entry); + /* perform menu action */ if (action != NO_ACTION) ro_gui_menu_handle_action(current_menu_window, action, false); @@ -904,6 +906,7 @@ void gui_create_form_select_menu(struct browser_window *bw, wimp_pointer pointer; os_error *error; bool reopen = true; + utf8_convert_ret err; assert(control); @@ -959,13 +962,11 @@ void gui_create_form_select_menu(struct browser_window *bw, wimp_ICON_FG_COLOUR_SHIFT) | (wimp_COLOUR_WHITE << wimp_ICON_BG_COLOUR_SHIFT); - text_convert = cnv_str_local_enc(option->text); - if (!text_convert) { - LOG(("cnv_str_local_enc failed.")); - warn_user("NoMemory", 0); - ro_gui_menu_closed(); - return; - } + err = utf8_to_enc(option->text, + local_encoding_name(), 0, &text_convert); + /* this should never fail */ + assert(err == UTF8_CONVERT_OK); + gui_form_select_menu->entries[i].data.indirected_text.text = text_convert; /* convert spaces to hard spaces to stop things like 'Go Home' @@ -2004,7 +2005,7 @@ int ro_gui_menu_get_checksum(void) { int i = 0, j, checksum = 0; os_error *error; wimp_menu *menu; - + if (!current_menu_open) return 0; diff --git a/riscos/window.c b/riscos/window.c index 88e847bd8..01c7ce8c2 100644 --- a/riscos/window.c +++ b/riscos/window.c @@ -884,23 +884,19 @@ void gui_window_set_extent(struct gui_window *g, int width, int height) void gui_window_set_status(struct gui_window *g, const char *text) { char *local_text; + utf8_convert_ret err; if ((!g->toolbar) || (!g->toolbar->status_handle)) return; /* convert text to local encoding */ - local_text = cnv_str_local_enc(text); - if (!local_text) { - LOG(("failed converting '%s' to local encoding", text)); - /* just use the UTF-8 text */ - ro_gui_set_icon_string(g->toolbar->status_handle, - ICON_STATUS_TEXT, text); - } - else { - ro_gui_set_icon_string(g->toolbar->status_handle, + err = utf8_to_enc(text, local_encoding_name(), 0, &local_text); + /* this should never fail */ + assert(err == UTF8_CONVERT_OK); + + ro_gui_set_icon_string(g->toolbar->status_handle, ICON_STATUS_TEXT, local_text); - free(local_text); - } + free(local_text); } diff --git a/utils/utf8.c b/utils/utf8.c index b2a219ced..3763b3af0 100644 --- a/utils/utf8.c +++ b/utils/utf8.c @@ -10,23 +10,23 @@ */ #include +#include #include #include #include +#include "netsurf/utils/log.h" #include "netsurf/utils/utf8.h" -static char *utf8_convert(const char *string, size_t len, const char *from, - const char *to); +static utf8_convert_ret utf8_convert(const char *string, size_t len, + const char *from, const char *to, char **result); /** * Convert a UTF-8 multibyte sequence into a single UCS4 character * * Encoding of UCS values outside the UTF-16 plane has been removed from - * RFC3629. This function conforms to RFC2279, however, as it is possible - * that the platform specific keyboard input handler will generate a UCS4 - * value outside the UTF-16 plane. + * RFC3629. This function conforms to RFC2279, however. * * \param s The sequence to process * \param l Length of sequence @@ -72,9 +72,7 @@ size_t utf8_to_ucs4(const char *s, size_t l) * Convert a single UCS4 character into a UTF-8 multibyte sequence * * Encoding of UCS values outside the UTF-16 plane has been removed from - * RFC3629. This function conforms to RFC2279, however, as it is possible - * that the platform specific keyboard input handler will generate a UCS4 - * value outside the UTF-16 plane. + * RFC3629. This function conforms to RFC2279, however. * * \param c The character to process (0 <= c <= 0x7FFFFFFF) * \param s Pointer to 6 byte long output buffer @@ -207,24 +205,28 @@ size_t utf8_next(const char *s, size_t l, size_t o) * \param string The NULL-terminated string to convert * \param encname The encoding name (suitable for passing to iconv) * \param len Length of input string to consider (in bytes), or 0 - * \return Pointer to converted string (on heap) or NULL on error + * \param result Pointer to location to store result (allocated on heap) + * \return Appropriate utf8_convert_ret value */ -char *utf8_to_enc(const char *string, const char *encname, size_t len) +utf8_convert_ret utf8_to_enc(const char *string, const char *encname, + size_t len, char **result) { - return utf8_convert(string, len, "UTF-8", encname); + return utf8_convert(string, len, "UTF-8", encname, result); } /** - * Convert a UTF8 string into the named encoding + * Convert a string in the named encoding into a UTF-8 string * * \param string The NULL-terminated string to convert * \param encname The encoding name (suitable for passing to iconv) * \param len Length of input string to consider (in bytes), or 0 - * \return Pointer to converted string (on heap) or NULL on error + * \param result Pointer to location to store result (allocated on heap) + * \return Appropriate utf8_convert_ret value */ -char *utf8_from_enc(const char *string, const char *encname, size_t len) +utf8_convert_ret utf8_from_enc(const char *string, const char *encname, + size_t len, char **result) { - return utf8_convert(string, len, encname, "UTF-8"); + return utf8_convert(string, len, encname, "UTF-8", result); } /** @@ -234,23 +236,27 @@ char *utf8_from_enc(const char *string, const char *encname, size_t len) * \param len Length of input string to consider (in bytes) * \param from The encoding name to convert from * \param to The encoding name to convert to - * \return Pointer to converted string (on heap) or NULL on error + * \param result Pointer to location in which to store result + * \return Appropriate utf8_convert_ret value */ -char *utf8_convert(const char *string, size_t len, const char *from, - const char *to) +utf8_convert_ret utf8_convert(const char *string, size_t len, + const char *from, const char *to, char **result) { iconv_t cd; char *ret, *temp, *out, *in; size_t slen, rlen; - if (!string || !from || !to) - return NULL; + assert(string && from && to && result); in = (char *)string; cd = iconv_open(to, from); - if (cd == (iconv_t)-1) - return NULL; + if (cd == (iconv_t)-1) { + if (errno == EINVAL) + return UTF8_CONVERT_BADENC; + /* default to no memory */ + return UTF8_CONVERT_NOMEM; + } slen = len ? len : strlen(string); /* Worst case = ACSII -> UCS4, so allocate an output buffer @@ -262,14 +268,19 @@ char *utf8_convert(const char *string, size_t len, const char *from, temp = out = calloc(rlen, sizeof(char)); if (!out) { iconv_close(cd); - return NULL; + return UTF8_CONVERT_NOMEM; } /* perform conversion */ if (iconv(cd, &in, &slen, &out, &rlen) == (size_t)-1) { free(temp); iconv_close(cd); - return NULL; + /** \todo handle the various cases properly + * There are 3 possible error cases: + * a) Insufficiently large output buffer + * b) Invalid input byte sequence + * c) Incomplete input sequence */ + return UTF8_CONVERT_NOMEM; } iconv_close(cd); @@ -277,12 +288,18 @@ char *utf8_convert(const char *string, size_t len, const char *from, if (rlen > 64 /* allow 64bytes wasted space */) { /* and allocate a more sensibly sized output buffer */ ret = calloc(out - temp + 4, sizeof(char)); + if (!ret) { + free(temp); + return UTF8_CONVERT_NOMEM; + } memcpy(ret, temp, out - temp); free(temp); } else ret = temp; - return ret; + *result = ret; + + return UTF8_CONVERT_OK; } diff --git a/utils/utf8.h b/utils/utf8.h index 02ff0322d..56d2534a5 100644 --- a/utils/utf8.h +++ b/utils/utf8.h @@ -12,6 +12,12 @@ #ifndef _NETSURF_UTILS_UTF8_H_ #define _NETSURF_UTILS_UTF8_H_ +typedef enum { + UTF8_CONVERT_OK, + UTF8_CONVERT_NOMEM, + UTF8_CONVERT_BADENC +} utf8_convert_ret; + size_t utf8_to_ucs4(const char *s, size_t l); size_t utf8_from_ucs4(size_t c, char *s); @@ -20,7 +26,9 @@ size_t utf8_length(const char *s); size_t utf8_prev(const char *s, size_t o); size_t utf8_next(const char *s, size_t l, size_t o); -char *utf8_to_enc(const char *string, const char *encname, size_t len); -char *utf8_from_enc(const char *string, const char *encname, size_t len); +utf8_convert_ret utf8_to_enc(const char *string, const char *encname, + size_t len, char **result); +utf8_convert_ret utf8_from_enc(const char *string, const char *encname, + size_t len, char **result); #endif diff --git a/utils/utils.c b/utils/utils.c index 03145df2d..0632b2318 100644 --- a/utils/utils.c +++ b/utils/utils.c @@ -103,40 +103,6 @@ char *cnv_space2nbsp(const char *s) return d; } -/** - * Convert local encoding to NUL terminated UTF-8 string. - * Caller needs to free return value. - * - * \param s string in local machine encoding. NUL or length terminated (which comes first). - * \param length maximum number of bytes to consider at s. - * \return malloc()'ed NUL termined string in UTF-8 encoding. - */ -char *cnv_local_enc_str(const char *s, size_t length) -{ - return utf8_from_enc(s, local_encoding_name(), length); -} - - -/** - * Converts NUL terminated UTF-8 string to the machine local encoding. - * Caller needs to free return value. - */ -char *cnv_str_local_enc(const char *s) -{ - return cnv_strn_local_enc(s, 0); -} - - -/** - * Converts UTF-8 string of bytes to the machine local encoding. - * Caller needs to free return value. - */ -char *cnv_strn_local_enc(const char *s, int length) -{ - return utf8_to_enc(s, local_encoding_name(), length); -} - - /** * Check if a directory exists. */ diff --git a/utils/utils.h b/utils/utils.h index 6f085af73..27c4be7d6 100644 --- a/utils/utils.h +++ b/utils/utils.h @@ -51,9 +51,6 @@ char * strip(char * const s); int whitespace(const char * str); char * squash_whitespace(const char * s); char *cnv_space2nbsp(const char *s); -char *cnv_local_enc_str(const char *s, size_t length); -char *cnv_str_local_enc(const char *s); -char *cnv_strn_local_enc(const char *s, int length); bool is_dir(const char *path); void regcomp_wrapper(regex_t *preg, const char *regex, int cflags); void clean_cookiejar(void); -- cgit v1.2.3