diff options
Diffstat (limited to 'render/html.c')
-rw-r--r-- | render/html.c | 513 |
1 files changed, 108 insertions, 405 deletions
diff --git a/render/html.c b/render/html.c index 996bfc3a5..8c11f35b9 100644 --- a/render/html.c +++ b/render/html.c @@ -28,8 +28,6 @@ #include <strings.h> #include <stdlib.h> -#include <dom/dom.h> - #include "utils/config.h" #include "content/content_protected.h" #include "content/fetch.h" @@ -38,7 +36,6 @@ #include "desktop/options.h" #include "desktop/selection.h" #include "desktop/scrollbar.h" -#include "javascript/js.h" #include "image/bitmap.h" #include "render/box.h" #include "render/font.h" @@ -86,15 +83,15 @@ static dom_string *html_dom_string_head; static dom_string *html_dom_string_rel; dom_string *html_dom_string_href; static dom_string *html_dom_string_hreflang; -static dom_string *html_dom_string_type; +dom_string *html_dom_string_type; static dom_string *html_dom_string_media; static dom_string *html_dom_string_sizes; static dom_string *html_dom_string_title; static dom_string *html_dom_string_base; static dom_string *html_dom_string_link; static dom_string *html_dom_string_script; -static dom_string *html_dom_string_text_javascript; -static dom_string *html_dom_string_src; +dom_string *html_dom_string_text_javascript; +dom_string *html_dom_string_src; dom_string *html_dom_string_target; static dom_string *html_dom_string__parent; static dom_string *html_dom_string__self; @@ -117,107 +114,6 @@ dom_string *html_dom_string_circle; dom_string *html_dom_string_poly; dom_string *html_dom_string_polygon; -typedef bool (script_handler_t)(struct jscontext *jscontext, const char *data, size_t size) ; - - -static script_handler_t *select_script_handler(content_type ctype) -{ - if (ctype == CONTENT_JS) { - return js_exec; - } - return NULL; -} - - -/* attempt to progress script execution - * - * execute scripts using algorithm found in: - * http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#the-script-element - * - */ -static bool html_scripts_exec(html_content *c) -{ - unsigned int i; - struct html_script *s; - script_handler_t *script_handler; - - if (c->jscontext == NULL) - return false; - - for (i = 0, s = c->scripts; i != c->scripts_count; i++, s++) { - if (s->already_started) { - continue; - } - - assert((s->type == HTML_SCRIPT_EXTERNAL) || - (s->type == HTML_SCRIPT_INTERNAL)); - - if (s->type == HTML_SCRIPT_EXTERNAL) { - /* ensure script content is present */ - if (s->data.external == NULL) - continue; - - /* ensure script content fetch status is not an error */ - if (content_get_status(s->data.external) == CONTENT_STATUS_ERROR) - continue; - - /* ensure script handler for content type */ - script_handler = select_script_handler(content_get_type(s->data.external)); - if (script_handler == NULL) - continue; /* unsupported type */ - - if (content_get_status(s->data.external) == CONTENT_STATUS_DONE) { - /* external script is now available */ - const char *data; - unsigned long size; - data = content_get_source_data(s->data.external, &size ); - script_handler(c->jscontext, data, size); - - s->already_started = true; - - } else { - /* script not yet available */ - - /* check if deferable or asynchronous */ - if (!s->defer && !s->async) { - break; - } - } - } - } - - return true; -} - -/* create new html script entry */ -static struct html_script * -html_process_new_script(html_content *c, enum html_script_type type) -{ - struct html_script *nscript; - /* add space for new script entry */ - nscript = realloc(c->scripts, - sizeof(struct html_script) * (c->scripts_count + 1)); - if (nscript == NULL) { - return NULL; - } - - c->scripts = nscript; - - /* increment script entry count */ - nscript = &c->scripts[c->scripts_count]; - c->scripts_count++; - - nscript->already_started = false; - nscript->parser_inserted = false; - nscript->force_async = true; - nscript->ready_exec = false; - nscript->async = false; - nscript->defer = false; - - nscript->type = type; - - return nscript; -} static void html_destroy_objects(html_content *html) { @@ -293,8 +189,8 @@ static void html_box_convert_done(html_content *c, bool success) /*imagemap_dump(c);*/ /* Destroy the parser binding */ - binding_destroy_tree(c->parser_binding); - c->parser_binding = NULL; + dom_hubbub_parser_destroy(c->parser); + c->parser = NULL; content_set_ready(&c->base); @@ -309,7 +205,7 @@ static void html_box_convert_done(html_content *c, bool success) * * \param c Content to convert */ -static void html_finish_conversion(html_content *c) +void html_finish_conversion(html_content *c) { union content_msg_data msg_data; dom_exception exc; /* returned by libdom functions */ @@ -400,217 +296,17 @@ static void html_finish_conversion(html_content *c) } -/** - * Callback for fetchcache() for linked stylesheets. - */ - -static nserror -html_convert_script_callback(hlcache_handle *script, - const hlcache_event *event, - void *pw) -{ - html_content *parent = pw; - unsigned int i; - struct html_script *s; - - /* Find script */ - for (i = 0, s = parent->scripts; i != parent->scripts_count; i++, s++) { - if (s->type == HTML_SCRIPT_EXTERNAL && - s->data.external == script) - break; - } - - assert(i != parent->scripts_count); - - switch (event->type) { - case CONTENT_MSG_LOADING: - break; - - case CONTENT_MSG_READY: - break; - - case CONTENT_MSG_DONE: - LOG(("script %d done '%s'", i, - nsurl_access(hlcache_handle_get_url(script)))); - parent->base.active--; - LOG(("%d fetches active", parent->base.active)); - - /* script finished loading so try and continue execution */ - html_scripts_exec(parent); - break; - - case CONTENT_MSG_ERROR: - LOG(("script %s failed: %s", - nsurl_access(hlcache_handle_get_url(script)), - event->data.error)); - hlcache_handle_release(script); - s->data.external = NULL; - parent->base.active--; - LOG(("%d fetches active", parent->base.active)); - content_add_error(&parent->base, "?", 0); - - /* script failed loading so try and continue execution */ - html_scripts_exec(parent); - - break; - - case CONTENT_MSG_STATUS: - html_set_status(parent, content_get_status_message(script)); - content_broadcast(&parent->base, CONTENT_MSG_STATUS, - event->data); - break; - - default: - assert(0); - } - - if (parent->base.active == 0) - html_finish_conversion(parent); - - return NSERROR_OK; -} - -/** process script node - * - * - */ -static dom_hubbub_error -html_process_script(void *ctx, dom_node *node) -{ - html_content *c = (html_content *)ctx; - dom_exception exc; /* returned by libdom functions */ - dom_string *src, *script, *mimetype; - struct html_script *nscript; - union content_msg_data msg_data; - - /* ensure javascript context is available */ - if (c->jscontext == NULL) { - union content_msg_data msg_data; - - msg_data.jscontext = &c->jscontext; - content_broadcast(&c->base, CONTENT_MSG_GETCTX, msg_data); - LOG(("javascript context %p ", c->jscontext)); - if (c->jscontext == NULL) { - /* no context and it could not be created, abort */ - return DOM_HUBBUB_OK; - } - } - - LOG(("content %p parser %p node %p",c,c->parser_binding, node)); - - exc = dom_element_get_attribute(node, html_dom_string_type, &mimetype); - if (exc != DOM_NO_ERR || mimetype == NULL) { - mimetype = dom_string_ref(html_dom_string_text_javascript); - } - - exc = dom_element_get_attribute(node, html_dom_string_src, &src); - if (exc != DOM_NO_ERR || src == NULL) { - struct lwc_string_s *lwcmimetype; - script_handler_t *script_handler; - - /* does not appear to be a src so script is inline content */ - exc = dom_node_get_text_content(node, &script); - if ((exc != DOM_NO_ERR) || (script == NULL)) { - dom_string_unref(mimetype); - return DOM_HUBBUB_OK; /* no contents, skip */ - } - - nscript = html_process_new_script(c, HTML_STYLESHEET_INTERNAL); - if (nscript == NULL) { - dom_string_unref(mimetype); - dom_string_unref(script); - goto html_process_script_no_memory; - } - - nscript->data.internal = script; - nscript->mimetype = mimetype; - nscript->already_started = true; - - /* charset (encoding) */ - - /* ensure script handler for content type */ - dom_string_intern(mimetype, &lwcmimetype); - script_handler = select_script_handler(content_factory_type_from_mime_type(lwcmimetype)); - lwc_string_unref(lwcmimetype); - - if (script_handler != NULL) { - script_handler(c->jscontext, - dom_string_data(script), - dom_string_byte_length(script)); - } - - - } else { - /* script with a src tag */ - nserror ns_error; - nsurl *joined; - hlcache_child_context child; - - - nscript = html_process_new_script(c, HTML_STYLESHEET_EXTERNAL); - if (nscript == NULL) { - dom_string_unref(src); - dom_string_unref(mimetype); - goto html_process_script_no_memory; - } - - /* charset (encoding) */ - - ns_error = nsurl_join(c->base_url, dom_string_data(src), &joined); - dom_string_unref(src); - if (ns_error != NSERROR_OK) { - dom_string_unref(mimetype); - goto html_process_script_no_memory; - } - - nscript->mimetype = mimetype; /* keep reference to mimetype */ - - LOG(("script %i '%s'", c->scripts_count, nsurl_access(joined))); - - child.charset = c->encoding; - child.quirks = c->base.quirks; - - ns_error = hlcache_handle_retrieve(joined, - 0, - content_get_url(&c->base), - NULL, - html_convert_script_callback, - c, - &child, - CONTENT_SCRIPT, - &nscript->data.external); - - nsurl_unref(joined); - - if (ns_error != NSERROR_OK) { - goto html_process_script_no_memory; - } - - c->base.active++; /* ensure base content knows the fetch is active */ - LOG(("%d fetches active", c->base.active)); - - } - html_scripts_exec(c); - - return DOM_HUBBUB_OK; - -html_process_script_no_memory: - msg_data.error = messages_get("NoMemory"); - content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); - return DOM_HUBBUB_NOMEM; -} static nserror html_create_html_data(html_content *c, const http_parameter *params) { lwc_string *charset; union content_msg_data msg_data; - binding_error error; nserror nerror; - c->parser_binding = NULL; + c->parser = NULL; c->document = NULL; - c->quirks = BINDING_QUIRKS_MODE_NONE; + c->quirks = DOM_DOCUMENT_QUIRKS_MODE_NONE; c->encoding = NULL; c->base_url = nsurl_ref(content_get_url(&c->base)); c->base_target = NULL; @@ -637,8 +333,10 @@ html_create_html_data(html_content *c, const http_parameter *params) c->jscontext = NULL; if (lwc_intern_string("*", SLEN("*"), &c->universal) != lwc_error_ok) { - error = BINDING_NOMEM; - goto error; + msg_data.error = messages_get("NoMemory"); + content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); + + return NSERROR_NOMEM; } selection_prepare(&c->sel, (struct content *)c, true); @@ -650,60 +348,56 @@ html_create_html_data(html_content *c, const http_parameter *params) lwc_string_unref(charset); if (c->encoding == NULL) { - error = BINDING_NOMEM; - goto error; + lwc_string_unref(c->universal); + c->universal = NULL; + + msg_data.error = messages_get("NoMemory"); + content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); + + return NSERROR_NOMEM; + } - c->encoding_source = ENCODING_SOURCE_HEADER; + c->encoding_source = DOM_HUBBUB_ENCODING_SOURCE_HEADER; } /* Create the parser binding */ - error = binding_create_tree(&c->parser_binding, - c->encoding, - nsoption_bool(enable_javascript), - html_process_script, - c); - if (error == BINDING_BADENCODING && c->encoding != NULL) { + c->parser = dom_hubbub_parser_create(c->encoding, + true, + nsoption_bool(enable_javascript), + NULL, + html_process_script, + c); + if ((c->parser == NULL) && (c->encoding != NULL)) { /* Ok, we don't support the declared encoding. Bailing out * isn't exactly user-friendly, so fall back to autodetect */ talloc_free(c->encoding); c->encoding = NULL; - error = binding_create_tree(&c->parser_binding, - c->encoding, - nsoption_bool(enable_javascript), - html_process_script, - c); + c->parser = dom_hubbub_parser_create(c->encoding, + true, + nsoption_bool(enable_javascript), + NULL, + html_process_script, + c); - } - - if (error != BINDING_OK) - goto error; - return NSERROR_OK; - -error: - if (error == BINDING_BADENCODING) { - LOG(("Bad encoding: %s", c->encoding ? c->encoding : "")); - msg_data.error = messages_get("ParsingFail"); - nerror = NSERROR_BAD_ENCODING; - } else { - msg_data.error = messages_get("NoMemory"); - nerror = NSERROR_NOMEM; } - content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); + if (c->parser == NULL) { + nsurl_unref(c->base_url); + c->base_url = NULL; - if (c->universal != NULL) { lwc_string_unref(c->universal); c->universal = NULL; - } - if (c->base_url != NULL) { - nsurl_unref(c->base_url); - c->base_url = NULL; + msg_data.error = messages_get("NoMemory"); + content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); + + return NSERROR_NOMEM; } - return nerror; + return NSERROR_OK; + } /** @@ -757,14 +451,16 @@ static bool html_process_data(struct content *c, const char *data, unsigned int size) { html_content *html = (html_content *) c; - binding_error err; + dom_hubbub_error error; const char *encoding; + const char *source_data; + unsigned long source_size; - err = binding_parse_chunk(html->parser_binding, - (const uint8_t *) data, size); - if (err == BINDING_ENCODINGCHANGE) { + error = dom_hubbub_parser_parse_chunk(html->parser, (const uint8_t *) data, size); + + if (error == (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_ENCODINGCHANGE)) { goto encoding_change; - } else if (err != BINDING_OK) { + } else if (error != DOM_HUBBUB_OK) { union content_msg_data msg_data; msg_data.error = messages_get("NoMemory"); @@ -778,9 +474,8 @@ html_process_data(struct content *c, const char *data, unsigned int size) encoding_change: /* Retrieve new encoding */ - encoding = binding_get_encoding( - html->parser_binding, - &html->encoding_source); + encoding = dom_hubbub_parser_get_encoding(html->parser, + &html->encoding_source); if (html->encoding != NULL) talloc_free(html->encoding); @@ -795,16 +490,17 @@ encoding_change: } /* Destroy binding */ - binding_destroy_tree(html->parser_binding); - html->parser_binding = NULL; + dom_hubbub_parser_destroy(html->parser); + html->parser = NULL; /* Create new binding, using the new encoding */ - err = binding_create_tree(&html->parser_binding, - html->encoding, - nsoption_bool(enable_javascript), - html_process_script, - html); - if (err == BINDING_BADENCODING) { + html->parser = dom_hubbub_parser_create(html->encoding, + true, + nsoption_bool(enable_javascript), + NULL, + html_process_script, + html); + if (html->parser == NULL) { /* Ok, we don't support the declared encoding. Bailing out * isn't exactly user-friendly, so fall back to Windows-1252 */ talloc_free(html->encoding); @@ -817,37 +513,35 @@ encoding_change: return false; } - err = binding_create_tree(&html->parser_binding, - html->encoding, - nsoption_bool(enable_javascript), - html_process_script, - html); - } + html->parser = dom_hubbub_parser_create(html->encoding, + true, + nsoption_bool(enable_javascript), + NULL, + html_process_script, + html); - if (err != BINDING_OK) { - union content_msg_data msg_data; + if (html->parser == NULL) { + union content_msg_data msg_data; + + /** @todo add a message callback function and pass the + * parser errors back instead of everything being + * OOM + */ - if (err == BINDING_BADENCODING) { - LOG(("Bad encoding: %s", html->encoding - ? html->encoding : "")); - msg_data.error = messages_get("ParsingFail"); - } else msg_data.error = messages_get("NoMemory"); - content_broadcast(c, CONTENT_MSG_ERROR, msg_data); - return false; - } + content_broadcast(c, CONTENT_MSG_ERROR, msg_data); + return false; + } - { - const char *source_data; - unsigned long source_size; + } - source_data = content__get_source_data(c, &source_size); + source_data = content__get_source_data(c, &source_size); - /* Recurse to reprocess all the data. This is safe because - * the encoding is now specified at parser start which means - * it cannot be changed again. */ - return html_process_data(c, source_data, source_size); - } + /* Recurse to reprocess all the data. This is safe because + * the encoding is now specified at parser start which means + * it cannot be changed again. */ + return html_process_data(c, source_data, source_size); + } @@ -2156,7 +1850,7 @@ static bool html_find_stylesheets(html_content *c, dom_node *html) c->base.active++; LOG(("%d fetches active", c->base.active)); - if (c->quirks == BINDING_QUIRKS_MODE_FULL) { + if (c->quirks == DOM_DOCUMENT_QUIRKS_MODE_FULL) { ns_error = hlcache_handle_retrieve(html_quirks_stylesheet_url, 0, content_get_url(&c->base), NULL, html_convert_css_callback, c, &child, @@ -2224,7 +1918,7 @@ html_find_stylesheets_no_memory: static bool html_convert(struct content *c) { html_content *htmlc = (html_content *) c; - binding_error err; + dom_hubbub_error err; dom_node *html, *head; union content_msg_data msg_data; unsigned long size; @@ -2235,18 +1929,18 @@ static bool html_convert(struct content *c) /* finish parsing */ content__get_source_data(c, &size); - err = binding_parse_completed(htmlc->parser_binding); - if (err != BINDING_OK) { + err = dom_hubbub_parser_completed(htmlc->parser); + if (err != DOM_HUBBUB_OK) { union content_msg_data msg_data; + /** @todo Improve precessing of errors */ msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } - htmlc->document = binding_get_document(htmlc->parser_binding, - &htmlc->quirks); + htmlc->document = dom_hubbub_parser_get_document(htmlc->parser); if (htmlc->document == NULL) { LOG(("Parsing failed")); @@ -2255,10 +1949,18 @@ static bool html_convert(struct content *c) return false; } + exc = dom_document_get_quirks_mode(htmlc->document, &htmlc->quirks); + if (exc != DOM_NO_ERR) { + LOG(("error retrieving quirks")); + } + + LOG(("quirks set to %d", htmlc->quirks)); + + if (htmlc->encoding == NULL) { - const char *encoding = binding_get_encoding( - htmlc->parser_binding, - &htmlc->encoding_source); + const char *encoding; + encoding = dom_hubbub_parser_get_encoding(htmlc->parser, + &htmlc->encoding_source); htmlc->encoding = talloc_strdup(c, encoding); if (htmlc->encoding == NULL) { @@ -2667,13 +2369,14 @@ static void html_destroy(struct content *c) if (html->base_url) nsurl_unref(html->base_url); - if (html->parser_binding != NULL) { - binding_destroy_tree(html->parser_binding); - html->parser_binding = NULL; + if (html->parser != NULL) { + dom_hubbub_parser_destroy(html->parser); + html->parser = NULL; } - if (html->document != NULL) - binding_destroy_document(html->document); + if (html->document != NULL) { + dom_node_unref(html->document); + } /* Free base target */ if (html->base_target != NULL) { @@ -3244,7 +2947,7 @@ const char *html_get_encoding(hlcache_handle *h) * \param h Content to retrieve charset from * \return Pointer to charset, or NULL */ -binding_encoding_source html_get_encoding_source(hlcache_handle *h) +dom_hubbub_encoding_source html_get_encoding_source(hlcache_handle *h) { html_content *c = (html_content *) hlcache_handle_get_content(h); |