From ce0d5294d5898b6100269bd39d38c0884d5fd4b4 Mon Sep 17 00:00:00 2001 From: James Bursa Date: Sat, 27 Dec 2003 20:15:23 +0000 Subject: [project @ 2003-12-27 20:15:22 by bursa] Use charset from Content-Type header. svn path=/import/netsurf/; revision=460 --- content/content.c | 12 ++++++--- content/content.h | 3 ++- content/fetchcache.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++----- content/fetchcache.h | 1 + content/other.c | 2 +- content/other.h | 2 +- css/css.c | 2 +- css/css.h | 44 +++++++++++++++++++----------- debug/netsurfd.c | 3 ++- desktop/netsurf.c | 1 + render/html.c | 19 ++++++++++--- render/html.h | 2 +- render/textplain.c | 4 +-- render/textplain.h | 2 +- riscos/draw.c | 2 +- riscos/draw.h | 2 +- riscos/gif.c | 2 +- riscos/gif.h | 2 +- riscos/jpeg.c | 2 +- riscos/jpeg.h | 2 +- riscos/png.c | 2 +- riscos/png.h | 2 +- riscos/sprite.c | 2 +- riscos/sprite.h | 2 +- utils/utils.c | 22 +++++++++++++++ utils/utils.h | 3 +++ 26 files changed, 169 insertions(+), 48 deletions(-) diff --git a/content/content.c b/content/content.c index 280364b9d..a3ff9f777 100644 --- a/content/content.c +++ b/content/content.c @@ -57,7 +57,7 @@ static const struct mime_entry mime_map[] = { /** An entry in handler_map. */ struct handler_entry { - void (*create)(struct content *c); + void (*create)(struct content *c, const char *params[]); void (*process_data)(struct content *c, char *data, unsigned long size); int (*convert)(struct content *c, unsigned int width, unsigned int height); void (*revive)(struct content *c, unsigned int width, unsigned int height); @@ -169,9 +169,15 @@ struct content * content_create(char *url) * status is changed to CONTENT_STATUS_LOADING. CONTENT_MSG_LOADING is sent to * all users. The create function for the type is called to initialise the type * specific parts of the content structure. + * + * \param c content structure + * \param type content_type to initialise to + * \param mime_type MIME-type string for this content + * \param params array of strings, ordered attribute, value, attribute, ..., 0 */ -void content_set_type(struct content *c, content_type type, char* mime_type) +void content_set_type(struct content *c, content_type type, char* mime_type, + const char *params[]) { assert(c != 0); assert(c->status == CONTENT_STATUS_TYPE_UNKNOWN); @@ -180,7 +186,7 @@ void content_set_type(struct content *c, content_type type, char* mime_type) c->type = type; c->mime_type = xstrdup(mime_type); c->status = CONTENT_STATUS_LOADING; - handler_map[type].create(c); + handler_map[type].create(c, params); content_broadcast(c, CONTENT_MSG_LOADING, 0); /* c may be destroyed at this point as a result of * CONTENT_MSG_LOADING, so must not be accessed */ diff --git a/content/content.h b/content/content.h index 72034b316..2e4a36f31 100644 --- a/content/content.h +++ b/content/content.h @@ -139,7 +139,8 @@ struct browser_window; content_type content_lookup(const char *mime_type); struct content * content_create(char *url); -void content_set_type(struct content *c, content_type type, char *mime_type); +void content_set_type(struct content *c, content_type type, char *mime_type, + const char *params[]); void content_process_data(struct content *c, char *data, unsigned long size); void content_convert(struct content *c, unsigned long width, unsigned long height); void content_revive(struct content *c, unsigned long width, unsigned long height); diff --git a/content/fetchcache.c b/content/fetchcache.c index 9e89f5876..cc755e73c 100644 --- a/content/fetchcache.c +++ b/content/fetchcache.c @@ -15,6 +15,8 @@ #include #include +#include +#include #include "netsurf/content/cache.h" #include "netsurf/content/content.h" #include "netsurf/content/fetchcache.h" @@ -23,7 +25,9 @@ #include "netsurf/utils/utils.h" +static regex_t re_content_type; static void fetchcache_callback(fetch_msg msg, void *p, char *data, unsigned long size); +static char *fetchcache_parse_type(char *s, char **params[]); /** @@ -95,20 +99,21 @@ void fetchcache_callback(fetch_msg msg, void *p, char *data, unsigned long size) { struct content *c = p; content_type type; - char *mime_type; - char *semic; - char *url; + char *mime_type, *url; + char **params; + unsigned int i; switch (msg) { case FETCH_TYPE: c->total_size = size; - mime_type = xstrdup(data); - if ((semic = strchr(mime_type, ';')) != 0) - *semic = 0; /* remove "; charset=..." */ + mime_type = fetchcache_parse_type(data, ¶ms); type = content_lookup(mime_type); LOG(("FETCH_TYPE, type %u", type)); - content_set_type(c, type, mime_type); + content_set_type(c, type, mime_type, params); free(mime_type); + for (i = 0; params[i]; i++) + free(params[i]); + free(params); break; case FETCH_DATA: @@ -172,6 +177,62 @@ void fetchcache_callback(fetch_msg msg, void *p, char *data, unsigned long size) } +/** + * Initialise the fetchcache module. + */ + +void fetchcache_init(void) +{ + regcomp_wrapper(&re_content_type, + "^([-0-9a-zA-Z_.]+/[-0-9a-zA-Z_.]+)[ \t]*" + "(;[ \t]*([-0-9a-zA-Z_.]+)=" + "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$", + REG_EXTENDED); +} + + +/** + * Parse a Content-Type header. + * + * \param s a Content-Type header + * \param params updated to point to an array of strings, ordered attribute, + * value, attribute, ..., 0 + * \return a new string containing the MIME-type + */ + +#define MAX_ATTRS 10 + +char *fetchcache_parse_type(char *s, char **params[]) +{ + char *type; + unsigned int i; + int r; + regmatch_t pmatch[2 + MAX_ATTRS * 3]; + *params = xcalloc(MAX_ATTRS * 2 + 2, sizeof (*params)[0]); + + r = regexec(&re_content_type, s, 2 + MAX_ATTRS * 3, pmatch, 0); + if (r) { + LOG(("failed to parse content-type '%s'", s)); + return xstrdup(s); + } + + type = strndup(s + pmatch[1].rm_so, pmatch[1].rm_eo - pmatch[1].rm_so); + assert(type); + + /* parameters */ + for (i = 0; i != MAX_ATTRS && pmatch[2 + 3 * i].rm_so != -1; i++) { + (*params)[2 * i] = strndup(s + pmatch[2 + 3 * i + 1].rm_so, + pmatch[2 + 3 * i + 1].rm_eo - pmatch[2 + 3 * i + 1].rm_so); + (*params)[2 * i + 1] = strndup(s + pmatch[2 + 3 * i + 2].rm_so, + pmatch[2 + 3 * i + 2].rm_eo - pmatch[2 + 3 * i + 2].rm_so); + assert((*params)[2 * i] && (*params)[2 * i + 1]); + } + (*params)[2 * i] = 0; + + return type; +} + + #ifdef TEST #include diff --git a/content/fetchcache.h b/content/fetchcache.h index 36d40e098..12326970b 100644 --- a/content/fetchcache.h +++ b/content/fetchcache.h @@ -26,5 +26,6 @@ struct content * fetchcache(const char *url, char *referer, void *p1, void *p2, unsigned long width, unsigned long height, bool only_2xx, char *post_urlenc, struct form_successful_control *post_multipart, bool cookies); +void fetchcache_init(void); #endif diff --git a/content/other.c b/content/other.c index a974bd8ed..a4ef7bec7 100644 --- a/content/other.c +++ b/content/other.c @@ -17,7 +17,7 @@ #include "netsurf/utils/utils.h" -void other_create(struct content *c) +void other_create(struct content *c, char *params[]) { c->data.other.data = xcalloc(0, 1); c->data.other.length = 0; diff --git a/content/other.h b/content/other.h index 96326a159..71fd4aab4 100644 --- a/content/other.h +++ b/content/other.h @@ -24,7 +24,7 @@ struct content_other_data { unsigned long length; /**< Current length of stored data. */ }; -void other_create(struct content *c); +void other_create(struct content *c, char *params[]); void other_process_data(struct content *c, char *data, unsigned long size); int other_convert(struct content *c, unsigned int width, unsigned int height); void other_revive(struct content *c, unsigned int width, unsigned int height); diff --git a/css/css.c b/css/css.c index 1909bd536..45b3485d8 100644 --- a/css/css.c +++ b/css/css.c @@ -91,7 +91,7 @@ const struct css_style css_blank_style = { -void css_create(struct content *c) +void css_create(struct content *c, const char *params[]) { unsigned int i; LOG(("content %p", c)); diff --git a/css/css.h b/css/css.h index bdb13167a..640c0c041 100644 --- a/css/css.h +++ b/css/css.h @@ -5,6 +5,21 @@ * Copyright 2003 James Bursa */ +/** \file + * CSS handling (interface). + * + * This module aims to implement CSS 2.1. + * + * CSS stylesheets are held in a struct ::content with type CONTENT_CSS. + * Creation and parsing should be carried out via the content_* functions. + * + * Styles are stored in a struct ::css_style, which can be retrieved from a + * content using css_get_style(). + * + * css_parse_property_list() constructs a struct ::css_style from a CSS + * property list, as found in HTML style attributes. + */ + #ifndef _NETSURF_CSS_CSS_H_ #define _NETSURF_CSS_CSS_H_ @@ -12,7 +27,7 @@ #include "libxml/HTMLparser.h" #include "css_enum.h" -/** +/* * structures and typedefs */ @@ -21,6 +36,7 @@ typedef unsigned long colour; /* 0xbbggrr */ #define CSS_COLOR_INHERIT 0x2000000 #define CSS_COLOR_NONE 0x3000000 +/** Representation of a CSS 2 length. */ struct css_length { float value; css_unit unit; @@ -36,6 +52,7 @@ typedef enum { CSS_TEXT_DECORATION_UNKNOWN = 0x1000 } css_text_decoration; +/** Representation of a complete CSS 2 style. */ struct css_style { colour background_color; css_clear clear; @@ -98,19 +115,14 @@ struct css_style { struct css_stylesheet; -struct css_selector { - const char *element; - char *class; - char *id; -}; - +/** Data specific to CONTENT_CSS. */ struct content_css_data { - struct css_stylesheet *css; - unsigned int import_count; - char **import_url; - struct content **import_content; - char *data; - unsigned int length; + struct css_stylesheet *css; /**< Opaque stylesheet data. */ + unsigned int import_count; /**< Number of entries in import_url. */ + char **import_url; /**< Imported stylesheet urls. */ + struct content **import_content; /**< Imported stylesheet contents. */ + char *data; /**< Source data. */ + unsigned int length; /**< Current length of data. */ }; @@ -191,13 +203,13 @@ struct parse_params { #endif -/** +/* * interface */ struct content; -void css_create(struct content *c); +void css_create(struct content *c, const char *params[]); void css_process_data(struct content *c, char *data, unsigned long size); int css_convert(struct content *c, unsigned int width, unsigned int height); void css_revive(struct content *c, unsigned int width, unsigned int height); @@ -218,7 +230,7 @@ void css_add_declarations(struct css_style *style, struct css_node *declaration) unsigned int css_hash(const char *s); void css_parser_Trace(FILE *TraceFILE, char *zTracePrompt); -void *css_parser_Alloc(void *(*mallocProc)(int)); +void *css_parser_alloc(void *(*mallocProc)(size_t)); void css_parser_Free(void *p, void (*freeProc)(void*)); void css_parser_(void *yyp, int yymajor, char* yyminor, struct parse_params *param); diff --git a/debug/netsurfd.c b/debug/netsurfd.c index 75417d87b..3bf178ee0 100644 --- a/debug/netsurfd.c +++ b/debug/netsurfd.c @@ -39,6 +39,7 @@ int main(int argc, char *argv[]) fetch_init(); cache_init(); + fetchcache_init(); while (1) { puts("=== URL:"); @@ -46,7 +47,7 @@ int main(int argc, char *argv[]) return 0; url[strlen(url) - 1] = 0; destroyed = 0; - c = fetchcache(url, 0, callback, 0, 0, 100, 1000, false, 0, 0); + c = fetchcache(url, 0, callback, 0, 0, 100, 1000, false, 0, 0, true); if (c) { done = c->status == CONTENT_STATUS_DONE; while (!done) diff --git a/desktop/netsurf.c b/desktop/netsurf.c index 61b4568a9..607f931a8 100644 --- a/desktop/netsurf.c +++ b/desktop/netsurf.c @@ -52,6 +52,7 @@ void netsurf_init(int argc, char** argv) gui_init(argc, argv); fetch_init(); cache_init(); + fetchcache_init(); nspng_init(); nsgif_init(); } diff --git a/render/html.c b/render/html.c index 7345214d5..769610798 100644 --- a/render/html.c +++ b/render/html.c @@ -29,9 +29,21 @@ static void html_object_callback(content_msg msg, struct content *object, void *p1, void *p2, const char *error); -void html_create(struct content *c) +void html_create(struct content *c, const char *params[]) { - c->data.html.parser = htmlCreatePushParserCtxt(0, 0, "", 0, 0, XML_CHAR_ENCODING_8859_1); + unsigned int i; + xmlCharEncoding encoding = XML_CHAR_ENCODING_8859_1; + + for (i = 0; params[i]; i += 2) { + if (strcasecmp(params[i], "charset") == 0) { + encoding = xmlParseCharEncoding(params[i + 1]); + if (encoding == XML_CHAR_ENCODING_ERROR) + encoding = XML_CHAR_ENCODING_8859_1; + break; + } + } + + c->data.html.parser = htmlCreatePushParserCtxt(0, 0, "", 0, 0, encoding); c->data.html.layout = NULL; c->data.html.style = NULL; c->data.html.fonts = NULL; @@ -341,9 +353,10 @@ void html_find_stylesheets(struct content *c, xmlNode *head) /* create stylesheet */ LOG(("style element")); if (c->data.html.stylesheet_content[1] == 0) { + const char *params[] = { 0 }; c->data.html.stylesheet_content[1] = content_create(c->data.html.base_url); - content_set_type(c->data.html.stylesheet_content[1], CONTENT_CSS, "text/css"); + content_set_type(c->data.html.stylesheet_content[1], CONTENT_CSS, "text/css", params); } /* can't just use xmlNodeGetContent(node), because that won't give diff --git a/render/html.h b/render/html.h index 8a5047cc2..b48c35929 100644 --- a/render/html.h +++ b/render/html.h @@ -51,7 +51,7 @@ struct content_html_data { } *object; }; -void html_create(struct content *c); +void html_create(struct content *c, const char *params[]); void html_process_data(struct content *c, char *data, unsigned long size); int html_convert(struct content *c, unsigned int width, unsigned int height); void html_revive(struct content *c, unsigned int width, unsigned int height); diff --git a/render/textplain.c b/render/textplain.c index d0b0a981b..524865846 100644 --- a/render/textplain.c +++ b/render/textplain.c @@ -18,9 +18,9 @@ static const char header[] = "
";
 static const char footer[] = "
"; -void textplain_create(struct content *c) +void textplain_create(struct content *c, const char *params[]) { - html_create(c); + html_create(c, params); htmlParseChunk(c->data.html.parser, header, sizeof(header) - 1, 0); } diff --git a/render/textplain.h b/render/textplain.h index 80f428e86..75b52dd23 100644 --- a/render/textplain.h +++ b/render/textplain.h @@ -10,7 +10,7 @@ #include "netsurf/content/content.h" -void textplain_create(struct content *c); +void textplain_create(struct content *c, const char *params[]); void textplain_process_data(struct content *c, char *data, unsigned long size); int textplain_convert(struct content *c, unsigned int width, unsigned int height); void textplain_revive(struct content *c, unsigned int width, unsigned int height); diff --git a/riscos/draw.c b/riscos/draw.c index 3068649c1..c215b38f6 100644 --- a/riscos/draw.c +++ b/riscos/draw.c @@ -14,7 +14,7 @@ #include "netsurf/utils/log.h" #include "oslib/drawfile.h" -void draw_create(struct content *c) +void draw_create(struct content *c, const char *params[]) { c->data.draw.data = xcalloc(0, 1); c->data.draw.length = 0; diff --git a/riscos/draw.h b/riscos/draw.h index b7eef6db3..e16734b97 100644 --- a/riscos/draw.h +++ b/riscos/draw.h @@ -16,7 +16,7 @@ struct content_draw_data { }; void draw_init(void); -void draw_create(struct content *c); +void draw_create(struct content *c, const char *params[]); void draw_process_data(struct content *c, char *data, unsigned long size); int draw_convert(struct content *c, unsigned int width, unsigned int height); void draw_revive(struct content *c, unsigned int width, unsigned int height); diff --git a/riscos/gif.c b/riscos/gif.c index 97ed89e76..7f2acb40b 100644 --- a/riscos/gif.c +++ b/riscos/gif.c @@ -26,7 +26,7 @@ void nsgif_init(void) { } -void nsgif_create(struct content*c) +void nsgif_create(struct content *c, const char *params[]) { c->data.gif.sprite_area = 0; c->data.gif.data = xcalloc(0, 1); diff --git a/riscos/gif.h b/riscos/gif.h index 4b59e533e..f0b2bad43 100644 --- a/riscos/gif.h +++ b/riscos/gif.h @@ -21,7 +21,7 @@ struct content_gif_data { }; void nsgif_init(void); -void nsgif_create(struct content *c); +void nsgif_create(struct content *c, const char *params[]); void nsgif_process_data(struct content *c, char *data, unsigned long size); int nsgif_convert(struct content *c, unsigned int width, unsigned int height); void nsgif_revive(struct content *c, unsigned int width, unsigned int height); diff --git a/riscos/jpeg.c b/riscos/jpeg.c index b1f62c169..db119484d 100644 --- a/riscos/jpeg.c +++ b/riscos/jpeg.c @@ -19,7 +19,7 @@ #include "oslib/jpeg.h" -void jpeg_create(struct content *c) +void jpeg_create(struct content *c, const char *params[]) { c->data.jpeg.data = xcalloc(0, 1); c->data.jpeg.length = 0; diff --git a/riscos/jpeg.h b/riscos/jpeg.h index 12a567755..26bf1d74a 100644 --- a/riscos/jpeg.h +++ b/riscos/jpeg.h @@ -15,7 +15,7 @@ struct content_jpeg_data { unsigned long length; }; -void jpeg_create(struct content *c); +void jpeg_create(struct content *c, const char *params[]); void jpeg_process_data(struct content *c, char *data, unsigned long size); int jpeg_convert(struct content *c, unsigned int width, unsigned int height); void jpeg_revive(struct content *c, unsigned int width, unsigned int height); diff --git a/riscos/png.c b/riscos/png.c index c6fbb3cfc..d56c5d827 100644 --- a/riscos/png.c +++ b/riscos/png.c @@ -64,7 +64,7 @@ void nspng_init(void) } -void nspng_create(struct content *c) +void nspng_create(struct content *c, const char *params[]) { #ifndef NO_IFC if (imagefileconvert) { diff --git a/riscos/png.h b/riscos/png.h index a64ddef2d..f52514b73 100644 --- a/riscos/png.h +++ b/riscos/png.h @@ -27,7 +27,7 @@ struct content_png_data { }; void nspng_init(void); -void nspng_create(struct content *c); +void nspng_create(struct content *c, const char *params[]); void nspng_process_data(struct content *c, char *data, unsigned long size); int nspng_convert(struct content *c, unsigned int width, unsigned int height); void nspng_revive(struct content *c, unsigned int width, unsigned int height); diff --git a/riscos/sprite.c b/riscos/sprite.c index 7470ea0b1..51c3a057c 100644 --- a/riscos/sprite.c +++ b/riscos/sprite.c @@ -15,7 +15,7 @@ #include "oslib/colourtrans.h" #include "oslib/osspriteop.h" -void sprite_create(struct content *c) +void sprite_create(struct content *c, const char *params[]) { c->data.sprite.data = xcalloc(4, 1); c->data.sprite.length = 4; diff --git a/riscos/sprite.h b/riscos/sprite.h index bef511ad0..5096081f7 100644 --- a/riscos/sprite.h +++ b/riscos/sprite.h @@ -18,7 +18,7 @@ struct content_sprite_data { }; void sprite_init(void); -void sprite_create(struct content *c); +void sprite_create(struct content *c, const char *params[]); void sprite_process_data(struct content *c, char *data, unsigned long size); int sprite_convert(struct content *c, unsigned int width, unsigned int height); void sprite_revive(struct content *c, unsigned int width, unsigned int height); diff --git a/utils/utils.c b/utils/utils.c index 2ebdf9e5d..cb331a55f 100644 --- a/utils/utils.c +++ b/utils/utils.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include "libxml/encoding.h" #include "libxml/uri.h" #include "netsurf/utils/log.h" @@ -256,3 +258,23 @@ bool is_dir(const char *path) return S_ISDIR(s.st_mode) ? true : false; } + + +/** + * Compile a regular expression, handling errors. + * + * Parameters as for regcomp(), see man regex. + */ + +void regcomp_wrapper(regex_t *preg, const char *regex, int cflags) +{ + char errbuf[200]; + int r; + r = regcomp(preg, regex, cflags); + if (r) { + regerror(r, preg, errbuf, sizeof errbuf); + fprintf(stderr, "Failed to compile regexp '%s'\n", regex); + die(errbuf); + } +} + diff --git a/utils/utils.h b/utils/utils.h index 3ca3072af..e3a210352 100644 --- a/utils/utils.h +++ b/utils/utils.h @@ -10,6 +10,8 @@ #include #include +#include +#include #include "libxml/encoding.h" void die(const char * const error); @@ -27,5 +29,6 @@ char *squash_tolat1(xmlChar *s); char *url_join(char *rel_url, char *base_url); char *get_host_from_url(char* url); bool is_dir(const char *path); +void regcomp_wrapper(regex_t *preg, const char *regex, int cflags); #endif -- cgit v1.2.3