From cc93afcfda94ada42b86227b3acd15706ee4ed4f Mon Sep 17 00:00:00 2001 From: Vincent Sanders Date: Fri, 6 Jul 2012 20:00:32 +0100 Subject: alter usage of libdom hubbub binding --- render/html.c | 2988 +++++++++++++++++++++++------------------------ render/libdom_binding.c | 4 +- render/parser_binding.h | 3 +- 3 files changed, 1495 insertions(+), 1500 deletions(-) (limited to 'render') diff --git a/render/html.c b/render/html.c index 36a2c0201..ede8343ac 100644 --- a/render/html.c +++ b/render/html.c @@ -117,1862 +117,1864 @@ dom_string *html_dom_string_circle; dom_string *html_dom_string_poly; dom_string *html_dom_string_polygon; -static nserror -html_create_html_data(html_content *c, const http_parameter *params) -{ - lwc_string *charset; - union content_msg_data msg_data; - binding_error error; - nserror nerror; +typedef bool (script_handler_t)(struct jscontext *jscontext, const char *data, size_t size) ; - c->parser_binding = NULL; - c->document = NULL; - c->quirks = BINDING_QUIRKS_MODE_NONE; - c->encoding = NULL; - c->base_url = nsurl_ref(content_get_url(&c->base)); - c->base_target = NULL; - c->aborted = false; - c->layout = NULL; - c->background_colour = NS_TRANSPARENT; - c->stylesheet_count = 0; - c->stylesheets = NULL; - c->select_ctx = NULL; - c->universal = NULL; - c->num_objects = 0; - c->object_list = NULL; - c->forms = NULL; - c->imagemaps = NULL; - c->bw = NULL; - c->frameset = NULL; - c->iframe = NULL; - c->page = NULL; - c->box = NULL; - c->font_func = &nsfont; - c->scrollbar = NULL; - c->scripts_count = 0; - c->scripts = NULL; - c->jscontext = NULL; - if (lwc_intern_string("*", SLEN("*"), &c->universal) != lwc_error_ok) { - error = BINDING_NOMEM; - goto error; +static script_handler_t *select_script_handler(content_type ctype) +{ + if (ctype == CONTENT_JS) { + return js_exec; } + return NULL; +} - selection_prepare(&c->sel, (struct content *)c, true); - nerror = http_parameter_list_find_item(params, html_charset, &charset); - if (nerror == NSERROR_OK) { - c->encoding = talloc_strdup(c, lwc_string_data(charset)); +/* attempt to progress script execution + * + * execute scripts using algorithm found in: + * http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#the-script-element + * + */ +static bool html_scripts_exec(html_content *c) +{ + unsigned int i; + struct html_script *s; + script_handler_t *script_handler; - lwc_string_unref(charset); + if (c->jscontext == NULL) + return false; - if (c->encoding == NULL) { - error = BINDING_NOMEM; - goto error; + for (i = 0, s = c->scripts; i != c->scripts_count; i++, s++) { + if (s->already_started) { + continue; } - c->encoding_source = ENCODING_SOURCE_HEADER; - } - /* Create the parser binding */ - error = binding_create_tree(c, c->encoding, &c->parser_binding); - if (error == BINDING_BADENCODING && c->encoding != NULL) { - /* Ok, we don't support the declared encoding. Bailing out - * isn't exactly user-friendly, so fall back to autodetect */ - talloc_free(c->encoding); - c->encoding = NULL; + assert((s->type == HTML_SCRIPT_EXTERNAL) || + (s->type == HTML_SCRIPT_INTERNAL)); - error = binding_create_tree(c, c->encoding, &c->parser_binding); - } + if (s->type == HTML_SCRIPT_EXTERNAL) { + /* ensure script content is present */ + if (s->data.external == NULL) + continue; - if (error != BINDING_OK) - goto error; + /* ensure script content fetch status is not an error */ + if (content_get_status(s->data.external) == CONTENT_STATUS_ERROR) + continue; - return NSERROR_OK; + /* ensure script handler for content type */ + script_handler = select_script_handler(content_get_type(s->data.external)); + if (script_handler == NULL) + continue; /* unsupported type */ -error: - if (error == BINDING_BADENCODING) { - LOG(("Bad encoding: %s", c->encoding ? c->encoding : "")); - msg_data.error = messages_get("ParsingFail"); - nerror = NSERROR_BAD_ENCODING; - } else { - msg_data.error = messages_get("NoMemory"); - nerror = NSERROR_NOMEM; - } + if (content_get_status(s->data.external) == CONTENT_STATUS_DONE) { + /* external script is now available */ + const char *data; + unsigned long size; + data = content_get_source_data(s->data.external, &size ); + script_handler(c->jscontext, data, size); - content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); + s->already_started = true; - if (c->universal != NULL) { - lwc_string_unref(c->universal); - c->universal = NULL; - } + } else { + /* script not yet available */ - if (c->base_url != NULL) { - nsurl_unref(c->base_url); - c->base_url = NULL; + /* check if deferable or asynchronous */ + if (!s->defer && !s->async) { + break; + } + } + } else { + struct lwc_string_s *lwcmimetype; + dom_string_intern(s->mimetype, &lwcmimetype); + + /* ensure script handler for content type */ + script_handler = select_script_handler(content_factory_type_from_mime_type(lwcmimetype)); + lwc_string_unref(lwcmimetype); + + if (script_handler == NULL) + continue; /* unsupported type */ + + script_handler(c->jscontext, + dom_string_data(s->data.internal), + dom_string_byte_length(s->data.internal)); + s->already_started = true; + } } - return nerror; + return true; } -/** - * Create a CONTENT_HTML. - * - * The content_html_data structure is initialized and the HTML parser is - * created. - */ - -static nserror -html_create(const content_handler *handler, - lwc_string *imime_type, - const http_parameter *params, - llcache_handle *llcache, - const char *fallback_charset, - bool quirks, - struct content **c) +/* create new html script entry */ +static struct html_script * +html_process_new_script(html_content *c, enum html_script_type type) { - html_content *html; - nserror error; + struct html_script *nscript; + /* add space for new script entry */ + nscript = realloc(c->scripts, + sizeof(struct html_script) * (c->scripts_count + 1)); + if (nscript == NULL) { + return NULL; + } - html = talloc_zero(0, html_content); - if (html == NULL) - return NSERROR_NOMEM; + c->scripts = nscript; - error = content__init(&html->base, handler, imime_type, params, - llcache, fallback_charset, quirks); - if (error != NSERROR_OK) { - talloc_free(html); - return error; - } + /* increment script entry count */ + nscript = &c->scripts[c->scripts_count]; + c->scripts_count++; - error = html_create_html_data(html, params); - if (error != NSERROR_OK) { - talloc_free(html); - return error; - } + nscript->already_started = false; + nscript->parser_inserted = false; + nscript->force_async = true; + nscript->ready_exec = false; + nscript->async = false; + nscript->defer = false; - *c = (struct content *) html; + nscript->type = type; - return NSERROR_OK; + return nscript; } +static void html_destroy_objects(html_content *html) +{ + while (html->object_list != NULL) { + struct content_html_object *victim = html->object_list; + + if (victim->content != NULL) { + LOG(("object %p", victim->content)); + + if (content_get_type(victim->content) == CONTENT_HTML) + schedule_remove(html_object_refresh, victim); + + hlcache_handle_release(victim->content); + } + html->object_list = victim->next; + talloc_free(victim); + } +} /** - * Process data for CONTENT_HTML. + * Perform post-box-creation conversion of a document + * + * \param c HTML content to complete conversion of + * \param success Whether box tree construction was successful */ - -static bool -html_process_data(struct content *c, const char *data, unsigned int size) +static void html_box_convert_done(html_content *c, bool success) { - html_content *html = (html_content *) c; - binding_error err; - const char *encoding; - - err = binding_parse_chunk(html->parser_binding, - (const uint8_t *) data, size); - if (err == BINDING_ENCODINGCHANGE) { - goto encoding_change; - } else if (err != BINDING_OK) { - union content_msg_data msg_data; + union content_msg_data msg_data; + dom_exception exc; /* returned by libdom functions */ + dom_node *html; - msg_data.error = messages_get("NoMemory"); - content_broadcast(c, CONTENT_MSG_ERROR, msg_data); + LOG(("Done XML to box (%p)", c)); - return false; + /* Clean up and report error if unsuccessful or aborted */ + if ((success == false) || c->aborted) { + html_destroy_objects(c); + if (success == false) + msg_data.error = messages_get("NoMemory"); + else + msg_data.error = messages_get("Stopped"); + content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); + content_set_error(&c->base); + return; } - return true; +#if ALWAYS_DUMP_BOX + box_dump(stderr, c->layout->children, 0); +#endif +#if ALWAYS_DUMP_FRAMESET + if (c->frameset) + html_dump_frameset(c->frameset, 0); +#endif -encoding_change: - - /* Retrieve new encoding */ - encoding = binding_get_encoding( - html->parser_binding, - &html->encoding_source); - - if (html->encoding != NULL) - talloc_free(html->encoding); - - html->encoding = talloc_strdup(c, encoding); - if (html->encoding == NULL) { - union content_msg_data msg_data; + exc = dom_document_get_document_element(c->document, (void *) &html); + if ((exc != DOM_NO_ERR) || (html == NULL)) { + LOG(("error retrieving html element from dom")); + msg_data.error = messages_get("ParsingFail"); + content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); + content_set_error(&c->base); + return; + } + /* extract image maps - can't do this sensibly in xml_to_box */ + if (imagemap_extract(c) == false) { + LOG(("imagemap extraction failed")); + html_destroy_objects(c); msg_data.error = messages_get("NoMemory"); - content_broadcast(c, CONTENT_MSG_ERROR, msg_data); - return false; + content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); + content_set_error(&c->base); + return; } + /*imagemap_dump(c);*/ - /* Destroy binding */ - binding_destroy_tree(html->parser_binding); + /* Destroy the parser binding */ + binding_destroy_tree(c->parser_binding); + c->parser_binding = NULL; - /* Create new binding, using the new encoding */ - err = binding_create_tree(html, html->encoding, &html->parser_binding); - if (err == BINDING_BADENCODING) { - /* Ok, we don't support the declared encoding. Bailing out - * isn't exactly user-friendly, so fall back to Windows-1252 */ - talloc_free(html->encoding); - html->encoding = talloc_strdup(c, "Windows-1252"); - if (html->encoding == NULL) { - union content_msg_data msg_data; + content_set_ready(&c->base); - msg_data.error = messages_get("NoMemory"); - content_broadcast(c, CONTENT_MSG_ERROR, msg_data); - return false; - } + if (c->base.active == 0) + content_set_done(&c->base); - err = binding_create_tree(html, html->encoding, - &html->parser_binding); - } + html_set_status(c, ""); +} - if (err != BINDING_OK) { - union content_msg_data msg_data; +/** + * Complete conversion of an HTML document + * + * \param c Content to convert + */ +static void html_finish_conversion(html_content *c) +{ + union content_msg_data msg_data; + dom_exception exc; /* returned by libdom functions */ + dom_node *html; + uint32_t i; + css_error error; - if (err == BINDING_BADENCODING) { - LOG(("Bad encoding: %s", html->encoding - ? html->encoding : "")); - msg_data.error = messages_get("ParsingFail"); - } else - msg_data.error = messages_get("NoMemory"); - content_broadcast(c, CONTENT_MSG_ERROR, msg_data); - return false; + /* Bail out if we've been aborted */ + if (c->aborted) { + msg_data.error = messages_get("Stopped"); + content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); + content_set_error(&c->base); + return; } - { - const char *source_data; - unsigned long source_size; - - source_data = content__get_source_data(c, &source_size); + /* check that the base stylesheet loaded; layout fails without it */ + if (c->stylesheets[STYLESHEET_BASE].data.external == NULL) { + msg_data.error = "Base stylesheet failed to load"; + content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); + content_set_error(&c->base); + return; + } - /* Recurse to reprocess all the data. This is safe because - * the encoding is now specified at parser start which means - * it cannot be changed again. */ - return html_process_data(c, source_data, source_size); + /* Create selection context */ + error = css_select_ctx_create(ns_realloc, c, &c->select_ctx); + if (error != CSS_OK) { + msg_data.error = messages_get("NoMemory"); + content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); + content_set_error(&c->base); + return; } -} + /* Add sheets to it */ + for (i = STYLESHEET_BASE; i != c->stylesheet_count; i++) { + const struct html_stylesheet *hsheet = &c->stylesheets[i]; + css_stylesheet *sheet; + css_origin origin = CSS_ORIGIN_AUTHOR; -/** process link node */ -static bool html_process_link(html_content *c, dom_node *node) -{ - struct content_rfc5988_link link; /* the link added to the content */ - dom_exception exc; /* returned by libdom functions */ - dom_string *atr_string; - nserror error; + if (i < STYLESHEET_USER) + origin = CSS_ORIGIN_UA; + else if (i < STYLESHEET_START) + origin = CSS_ORIGIN_USER; - memset(&link, 0, sizeof(struct content_rfc5988_link)); + if (hsheet->type == HTML_STYLESHEET_EXTERNAL && + hsheet->data.external != NULL) { + sheet = nscss_get_stylesheet(hsheet->data.external); + } else if (hsheet->type == HTML_STYLESHEET_INTERNAL) { + sheet = hsheet->data.internal->sheet; + } else { + sheet = NULL; + } - /* check that the relation exists - w3c spec says must be present */ - exc = dom_element_get_attribute(node, html_dom_string_rel, &atr_string); - if ((exc != DOM_NO_ERR) || (atr_string == NULL)) { - return false; - } - /* get a lwc string containing the link relation */ - exc = dom_string_intern(atr_string, &link.rel); - dom_string_unref(atr_string); - if (exc != DOM_NO_ERR) { - return false; + if (sheet != NULL) { + error = css_select_ctx_append_sheet( + c->select_ctx, sheet, + origin, CSS_MEDIA_SCREEN); + if (error != CSS_OK) { + msg_data.error = messages_get("NoMemory"); + content_broadcast(&c->base, CONTENT_MSG_ERROR, + msg_data); + content_set_error(&c->base); + return; + } + } } - - /* check that the href exists - w3c spec says must be present */ - exc = dom_element_get_attribute(node, html_dom_string_href, &atr_string); - if ((exc != DOM_NO_ERR) || (atr_string == NULL)) { - lwc_string_unref(link.rel); - return false; + + /* convert xml tree to box tree */ + LOG(("XML to box (%p)", c)); + content_set_status(&c->base, messages_get("Processing")); + content_broadcast(&c->base, CONTENT_MSG_STATUS, msg_data); + + exc = dom_document_get_document_element(c->document, (void *) &html); + if ((exc != DOM_NO_ERR) || (html == NULL)) { + LOG(("error retrieving html element from dom")); + msg_data.error = messages_get("ParsingFail"); + content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); + content_set_error(&c->base); + return; } - /* get nsurl */ - error = nsurl_join(c->base_url, dom_string_data(atr_string), &link.href); - dom_string_unref(atr_string); - if (error != NSERROR_OK) { - lwc_string_unref(link.rel); - return false; + if (xml_to_box(html, c, html_box_convert_done) == false) { + html_destroy_objects(c); + msg_data.error = messages_get("NoMemory"); + content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); + content_set_error(&c->base); + return; } +} - /* look for optional properties -- we don't care if internment fails */ - exc = dom_element_get_attribute(node, html_dom_string_hreflang, &atr_string); - if ((exc == DOM_NO_ERR) && (atr_string != NULL)) { - /* get a lwc string containing the href lang */ - exc = dom_string_intern(atr_string, &link.hreflang); - dom_string_unref(atr_string); - } +/** + * Callback for fetchcache() for linked stylesheets. + */ - exc = dom_element_get_attribute(node, html_dom_string_type, &atr_string); - if ((exc == DOM_NO_ERR) && (atr_string != NULL)) { - /* get a lwc string containing the type */ - exc = dom_string_intern(atr_string, &link.type); - dom_string_unref(atr_string); - } +static nserror +html_convert_script_callback(hlcache_handle *script, + const hlcache_event *event, + void *pw) +{ + html_content *parent = pw; + unsigned int i; + struct html_script *s; - exc = dom_element_get_attribute(node, html_dom_string_media, &atr_string); - if ((exc == DOM_NO_ERR) && (atr_string != NULL)) { - /* get a lwc string containing the media */ - exc = dom_string_intern(atr_string, &link.media); - dom_string_unref(atr_string); + /* Find script */ + for (i = 0, s = parent->scripts; i != parent->scripts_count; i++, s++) { + if (s->type == HTML_SCRIPT_EXTERNAL && + s->data.external == script) + break; } - exc = dom_element_get_attribute(node, html_dom_string_sizes, &atr_string); - if ((exc == DOM_NO_ERR) && (atr_string != NULL)) { - /* get a lwc string containing the sizes */ - exc = dom_string_intern(atr_string, &link.sizes); - dom_string_unref(atr_string); - } + assert(i != parent->scripts_count); - /* add to content */ - content__add_rfc5988_link(&c->base, &link); + switch (event->type) { + case CONTENT_MSG_LOADING: + break; - if (link.sizes != NULL) - lwc_string_unref(link.sizes); - if (link.media != NULL) - lwc_string_unref(link.media); - if (link.type != NULL) - lwc_string_unref(link.type); - if (link.hreflang != NULL) - lwc_string_unref(link.hreflang); + case CONTENT_MSG_READY: + break; - nsurl_unref(link.href); - lwc_string_unref(link.rel); + case CONTENT_MSG_DONE: + LOG(("script %d done '%s'", i, + nsurl_access(hlcache_handle_get_url(script)))); + parent->base.active--; + LOG(("%d fetches active", parent->base.active)); - return true; -} + /* script finished loading so try and continue execution */ + html_scripts_exec(parent); + break; -/** process title node */ -static bool html_process_title(html_content *c, dom_node *node) -{ - dom_exception exc; /* returned by libdom functions */ - dom_string *title; - char *title_str; - bool success; + case CONTENT_MSG_ERROR: + LOG(("script %s failed: %s", + nsurl_access(hlcache_handle_get_url(script)), + event->data.error)); + hlcache_handle_release(script); + s->data.external = NULL; + parent->base.active--; + LOG(("%d fetches active", parent->base.active)); + content_add_error(&parent->base, "?", 0); - if (c->base.title != NULL) - return true; + /* script failed loading so try and continue execution */ + html_scripts_exec(parent); - exc = dom_node_get_text_content(node, &title); - if ((exc != DOM_NO_ERR) || (title == NULL)) { - return false; - } + break; - title_str = squash_whitespace(dom_string_data(title)); - dom_string_unref(title); + case CONTENT_MSG_STATUS: + html_set_status(parent, content_get_status_message(script)); + content_broadcast(&parent->base, CONTENT_MSG_STATUS, + event->data); + break; - if (title_str == NULL) { - return false; + default: + assert(0); } - success = content__set_title(&c->base, title_str); - - free(title_str); + if (parent->base.active == 0) + html_finish_conversion(parent); - return success; + return NSERROR_OK; } -static bool html_process_base(html_content *c, dom_node *node) +/** process script node + * + * + */ +static dom_hubbub_error +html_process_script(void *ctx, dom_node *node) { + html_content *c = (html_content *)ctx; dom_exception exc; /* returned by libdom functions */ - dom_string *atr_string; + dom_string *src, *script, *mimetype; + struct html_script *nscript; + union content_msg_data msg_data; - /* get href attribute if present */ - exc = dom_element_get_attribute(node, html_dom_string_href, &atr_string); - if ((exc == DOM_NO_ERR) && (atr_string != NULL)) { - nsurl *url; - nserror error; + /* ensure javascript context is available */ + if (c->jscontext == NULL) { + union content_msg_data msg_data; - /* get url from string */ - error = nsurl_create(dom_string_data(atr_string), &url); - dom_string_unref(atr_string); - if (error == NSERROR_OK) { - if (c->base_url != NULL) - nsurl_unref(c->base_url); - c->base_url = url; + msg_data.jscontext = &c->jscontext; + content_broadcast(&c->base, CONTENT_MSG_GETCTX, msg_data); + LOG(("javascript context %p ", c->jscontext)); + if (c->jscontext == NULL) { + /* no context and it could not be created, abort */ + return DOM_HUBBUB_OK; } } - - /* get target attribute if present and not already set */ - if (c->base_target != NULL) { - return true; + exc = dom_element_get_attribute(node, html_dom_string_type, &mimetype); + if (exc != DOM_NO_ERR || mimetype == NULL) { + mimetype = dom_string_ref(html_dom_string_text_javascript); } - exc = dom_element_get_attribute(node, html_dom_string_target, &atr_string); - if ((exc == DOM_NO_ERR) && (atr_string != NULL)) { - /* Validation rules from the HTML5 spec for the base element: - * The target must be one of _blank, _self, _parent, or - * _top or any identifier which does not begin with an - * underscore - */ - if (*dom_string_data(atr_string) != '_' || - dom_string_caseless_isequal(atr_string, html_dom_string__blank) || - dom_string_caseless_isequal(atr_string, html_dom_string__self) || - dom_string_caseless_isequal(atr_string, html_dom_string__parent) || - dom_string_caseless_isequal(atr_string, html_dom_string__top)) { - c->base_target = strdup(dom_string_data(atr_string)); + exc = dom_element_get_attribute(node, html_dom_string_src, &src); + if (exc != DOM_NO_ERR || src == NULL) { + /* does not appear to be a src so script is inline content */ + exc = dom_node_get_text_content(node, &script); + if ((exc != DOM_NO_ERR) || (script == NULL)) { + dom_string_unref(mimetype); + return DOM_HUBBUB_OK; /* no contents, skip */ } - dom_string_unref(atr_string); - } - return true; -} + nscript = html_process_new_script(c, HTML_STYLESHEET_INTERNAL); + if (nscript == NULL) { + dom_string_unref(mimetype); + dom_string_unref(script); + goto html_process_script_no_memory; + } -/** - * Process elements in . - * - * \param c content structure - * \param head xml node of head element - * \return true on success, false on memory exhaustion - * - * The title and base href are extracted if present. - */ + nscript->data.internal = script; + nscript->mimetype = mimetype; -static bool html_head(html_content *c, dom_node *head) -{ - dom_node *node; - dom_exception exc; /* returned by libdom functions */ - dom_string *node_name; - dom_node_type node_type; - dom_node *next_node; + /* charset (encoding) */ + } else { + /* script with a src tag */ + nserror ns_error; + nsurl *joined; + hlcache_child_context child; - exc = dom_node_get_first_child(head, &node); - if (exc != DOM_NO_ERR) { - return false; - } - while (node != NULL) { - exc = dom_node_get_node_type(node, &node_type); + nscript = html_process_new_script(c, HTML_STYLESHEET_EXTERNAL); + if (nscript == NULL) { + dom_string_unref(src); + dom_string_unref(mimetype); + goto html_process_script_no_memory; + } - if ((exc == DOM_NO_ERR) && (node_type == DOM_ELEMENT_NODE)) { - exc = dom_node_get_node_name(node, &node_name); + /* charset (encoding) */ - if ((exc == DOM_NO_ERR) || (node_name != NULL)) { - if (dom_string_caseless_isequal(node_name, - html_dom_string_title)) { - html_process_title(c, node); - } else if (dom_string_caseless_isequal(node_name, - html_dom_string_base)) { - html_process_base(c, node); - } else if (dom_string_caseless_isequal(node_name, - html_dom_string_link)) { - html_process_link(c, node); - } - } + ns_error = nsurl_join(c->base_url, dom_string_data(src), &joined); + dom_string_unref(src); + if (ns_error != NSERROR_OK) { + dom_string_unref(mimetype); + goto html_process_script_no_memory; } - /* move to next node */ - exc = dom_node_get_next_sibling(node, &next_node); - dom_node_unref(node); - if (exc == DOM_NO_ERR) { - node = next_node; - } else { - node = NULL; + nscript->mimetype = mimetype; /* keep reference to mimetype */ + + LOG(("script %i '%s'", c->scripts_count, nsurl_access(joined))); + + child.charset = c->encoding; + child.quirks = c->base.quirks; + + ns_error = hlcache_handle_retrieve(joined, + 0, + content_get_url(&c->base), + NULL, + html_convert_script_callback, + c, + &child, + CONTENT_SCRIPT, + &nscript->data.external); + + nsurl_unref(joined); + + if (ns_error != NSERROR_OK) { + goto html_process_script_no_memory; } + + c->base.active++; /* ensure base content knows the fetch is active */ + LOG(("%d fetches active", c->base.active)); + } + html_scripts_exec(c); - return true; + return DOM_HUBBUB_OK; + +html_process_script_no_memory: + msg_data.error = messages_get("NoMemory"); + content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); + return DOM_HUBBUB_NOMEM; } -static bool html_meta_refresh_process_element(html_content *c, dom_node *n) +static nserror +html_create_html_data(html_content *c, const http_parameter *params) { + lwc_string *charset; union content_msg_data msg_data; - const char *url, *end, *refresh = NULL; - char *new_url; - char quote = '\0'; - dom_string *equiv, *content; - dom_exception exc; - nsurl *nsurl; - nserror error; - - exc = dom_element_get_attribute(n, html_dom_string_http_equiv, &equiv); - if (exc != DOM_NO_ERR) - return false; + binding_error error; + nserror nerror; - if (equiv == NULL) - return true; + c->parser_binding = NULL; + c->document = NULL; + c->quirks = BINDING_QUIRKS_MODE_NONE; + c->encoding = NULL; + c->base_url = nsurl_ref(content_get_url(&c->base)); + c->base_target = NULL; + c->aborted = false; + c->layout = NULL; + c->background_colour = NS_TRANSPARENT; + c->stylesheet_count = 0; + c->stylesheets = NULL; + c->select_ctx = NULL; + c->universal = NULL; + c->num_objects = 0; + c->object_list = NULL; + c->forms = NULL; + c->imagemaps = NULL; + c->bw = NULL; + c->frameset = NULL; + c->iframe = NULL; + c->page = NULL; + c->box = NULL; + c->font_func = &nsfont; + c->scrollbar = NULL; + c->scripts_count = 0; + c->scripts = NULL; + c->jscontext = NULL; - if (strcasecmp(dom_string_data(equiv), "refresh") != 0) { - dom_string_unref(equiv); - return true; + if (lwc_intern_string("*", SLEN("*"), &c->universal) != lwc_error_ok) { + error = BINDING_NOMEM; + goto error; } - dom_string_unref(equiv); + selection_prepare(&c->sel, (struct content *)c, true); - exc = dom_element_get_attribute(n, html_dom_string_content, &content); - if (exc != DOM_NO_ERR) - return false; + nerror = http_parameter_list_find_item(params, html_charset, &charset); + if (nerror == NSERROR_OK) { + c->encoding = talloc_strdup(c, lwc_string_data(charset)); - if (content == NULL) - return true; + lwc_string_unref(charset); - end = dom_string_data(content) + dom_string_byte_length(content); + if (c->encoding == NULL) { + error = BINDING_NOMEM; + goto error; + } + c->encoding_source = ENCODING_SOURCE_HEADER; + } - /* content := *LWS intpart fracpart? *LWS [';' *LWS *1url *LWS] - * intpart := 1*DIGIT - * fracpart := 1*('.' | DIGIT) - * url := "url" *LWS '=' *LWS (url-nq | url-sq | url-dq) - * url-nq := *urlchar - * url-sq := "'" *(urlchar | '"') "'" - * url-dq := '"' *(urlchar | "'") '"' - * urlchar := [#x9#x21#x23-#x26#x28-#x7E] | nonascii - * nonascii := [#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF] - */ + /* Create the parser binding */ + error = binding_create_tree(&c->parser_binding, + c->encoding, + nsoption_bool(enable_javascript), + html_process_script, + c); + if (error == BINDING_BADENCODING && c->encoding != NULL) { + /* Ok, we don't support the declared encoding. Bailing out + * isn't exactly user-friendly, so fall back to autodetect */ + talloc_free(c->encoding); + c->encoding = NULL; - url = dom_string_data(content); + error = binding_create_tree(&c->parser_binding, + c->encoding, + nsoption_bool(enable_javascript), + html_process_script, + c); - /* *LWS */ - while (url < end && isspace(*url)) { - url++; } - /* intpart */ - if (url == end || (*url < '0' || '9' < *url)) { - /* Empty content, or invalid timeval */ - dom_string_unref(content); - return true; - } + if (error != BINDING_OK) + goto error; - msg_data.delay = (int) strtol(url, &new_url, 10); - /* a very small delay and self-referencing URL can cause a loop - * that grinds machines to a halt. To prevent this we set a - * minimum refresh delay of 1s. */ - if (msg_data.delay < 1) - msg_data.delay = 1; + return NSERROR_OK; - url = new_url; +error: + if (error == BINDING_BADENCODING) { + LOG(("Bad encoding: %s", c->encoding ? c->encoding : "")); + msg_data.error = messages_get("ParsingFail"); + nerror = NSERROR_BAD_ENCODING; + } else { + msg_data.error = messages_get("NoMemory"); + nerror = NSERROR_NOMEM; + } - /* fracpart? (ignored, as delay is integer only) */ - while (url < end && (('0' <= *url && *url <= '9') || - *url == '.')) { - url++; + content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); + + if (c->universal != NULL) { + lwc_string_unref(c->universal); + c->universal = NULL; } - /* *LWS */ - while (url < end && isspace(*url)) { - url++; + if (c->base_url != NULL) { + nsurl_unref(c->base_url); + c->base_url = NULL; } - /* ';' */ - if (url < end && *url == ';') - url++; + return nerror; +} - /* *LWS */ - while (url < end && isspace(*url)) { - url++; - } +/** + * Create a CONTENT_HTML. + * + * The content_html_data structure is initialized and the HTML parser is + * created. + */ - if (url == end) { - /* Just delay specified, so refresh current page */ - dom_string_unref(content); +static nserror +html_create(const content_handler *handler, + lwc_string *imime_type, + const http_parameter *params, + llcache_handle *llcache, + const char *fallback_charset, + bool quirks, + struct content **c) +{ + html_content *html; + nserror error; - c->base.refresh = nsurl_ref( - content_get_url(&c->base)); + html = talloc_zero(0, html_content); + if (html == NULL) + return NSERROR_NOMEM; - content_broadcast(&c->base, CONTENT_MSG_REFRESH, - msg_data); - return true; + error = content__init(&html->base, handler, imime_type, params, + llcache, fallback_charset, quirks); + if (error != NSERROR_OK) { + talloc_free(html); + return error; } - /* "url" */ - if (url <= end - 3) { - if (strncasecmp(url, "url", 3) == 0) { - url += 3; - } else { - /* Unexpected input, ignore this header */ - dom_string_unref(content); - return true; - } - } else { - /* Insufficient input, ignore this header */ - dom_string_unref(content); - return true; + error = html_create_html_data(html, params); + if (error != NSERROR_OK) { + talloc_free(html); + return error; } - /* *LWS */ - while (url < end && isspace(*url)) { - url++; - } + *c = (struct content *) html; - /* '=' */ - if (url < end) { - if (*url == '=') { - url++; - } else { - /* Unexpected input, ignore this header */ - dom_string_unref(content); - return true; - } - } else { - /* Insufficient input, ignore this header */ - dom_string_unref(content); - return true; - } + return NSERROR_OK; +} - /* *LWS */ - while (url < end && isspace(*url)) { - url++; - } - /* '"' or "'" */ - if (url < end && (*url == '"' || *url == '\'')) { - quote = *url; - url++; - } - /* Start of URL */ - refresh = url; +/** + * Process data for CONTENT_HTML. + */ - if (quote != 0) { - /* url-sq | url-dq */ - while (url < end && *url != quote) - url++; - } else { - /* url-nq */ - while (url < end && !isspace(*url)) - url++; +static bool +html_process_data(struct content *c, const char *data, unsigned int size) +{ + html_content *html = (html_content *) c; + binding_error err; + const char *encoding; + + err = binding_parse_chunk(html->parser_binding, + (const uint8_t *) data, size); + if (err == BINDING_ENCODINGCHANGE) { + goto encoding_change; + } else if (err != BINDING_OK) { + union content_msg_data msg_data; + + msg_data.error = messages_get("NoMemory"); + content_broadcast(c, CONTENT_MSG_ERROR, msg_data); + + return false; } - /* '"' or "'" or *LWS (we don't care) */ - if (url > refresh) { - /* There's a URL */ - new_url = strndup(refresh, url - refresh); - if (new_url == NULL) { - dom_string_unref(content); - return false; - } + return true; - error = nsurl_join(c->base_url, new_url, &nsurl); - if (error != NSERROR_OK) { - free(new_url); +encoding_change: - dom_string_unref(content); + /* Retrieve new encoding */ + encoding = binding_get_encoding( + html->parser_binding, + &html->encoding_source); - msg_data.error = messages_get("NoMemory"); - content_broadcast(&c->base, CONTENT_MSG_ERROR, - msg_data); + if (html->encoding != NULL) + talloc_free(html->encoding); + html->encoding = talloc_strdup(c, encoding); + if (html->encoding == NULL) { + union content_msg_data msg_data; + + msg_data.error = messages_get("NoMemory"); + content_broadcast(c, CONTENT_MSG_ERROR, msg_data); + return false; + } + + /* Destroy binding */ + binding_destroy_tree(html->parser_binding); + + /* Create new binding, using the new encoding */ + err = binding_create_tree(&html->parser_binding, + html->encoding, + nsoption_bool(enable_javascript), + html_process_script, + html); + if (err == BINDING_BADENCODING) { + /* Ok, we don't support the declared encoding. Bailing out + * isn't exactly user-friendly, so fall back to Windows-1252 */ + talloc_free(html->encoding); + html->encoding = talloc_strdup(c, "Windows-1252"); + if (html->encoding == NULL) { + union content_msg_data msg_data; + + msg_data.error = messages_get("NoMemory"); + content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } - free(new_url); + err = binding_create_tree(&html->parser_binding, + html->encoding, + nsoption_bool(enable_javascript), + html_process_script, + html); + } - c->base.refresh = nsurl; + if (err != BINDING_OK) { + union content_msg_data msg_data; - content_broadcast(&c->base, CONTENT_MSG_REFRESH, msg_data); + if (err == BINDING_BADENCODING) { + LOG(("Bad encoding: %s", html->encoding + ? html->encoding : "")); + msg_data.error = messages_get("ParsingFail"); + } else + msg_data.error = messages_get("NoMemory"); + content_broadcast(c, CONTENT_MSG_ERROR, msg_data); + return false; } - dom_string_unref(content); + { + const char *source_data; + unsigned long source_size; - return true; + source_data = content__get_source_data(c, &source_size); + + /* Recurse to reprocess all the data. This is safe because + * the encoding is now specified at parser start which means + * it cannot be changed again. */ + return html_process_data(c, source_data, source_size); + } } -/** - * Search for meta refresh - * - * http://wp.netscape.com/assist/net_sites/pushpull.html - * - * \param c content structure - * \param head xml node of head element - * \return true on success, false otherwise (error reported) - */ -static bool html_meta_refresh(html_content *c, dom_node *head) +/** process link node */ +static bool html_process_link(html_content *c, dom_node *node) { - dom_node *n, *next; - dom_exception exc; + struct content_rfc5988_link link; /* the link added to the content */ + dom_exception exc; /* returned by libdom functions */ + dom_string *atr_string; + nserror error; - if (head == NULL) - return true; + memset(&link, 0, sizeof(struct content_rfc5988_link)); - exc = dom_node_get_first_child(head, &n); - if (exc != DOM_NO_ERR) + /* check that the relation exists - w3c spec says must be present */ + exc = dom_element_get_attribute(node, html_dom_string_rel, &atr_string); + if ((exc != DOM_NO_ERR) || (atr_string == NULL)) { return false; + } + /* get a lwc string containing the link relation */ + exc = dom_string_intern(atr_string, &link.rel); + dom_string_unref(atr_string); + if (exc != DOM_NO_ERR) { + return false; + } - while (n != NULL) { - dom_node_type type; - - exc = dom_node_get_node_type(n, &type); - if (exc != DOM_NO_ERR) { - dom_node_unref(n); - return false; - } - - if (type == DOM_ELEMENT_NODE) { - dom_string *name; + /* check that the href exists - w3c spec says must be present */ + exc = dom_element_get_attribute(node, html_dom_string_href, &atr_string); + if ((exc != DOM_NO_ERR) || (atr_string == NULL)) { + lwc_string_unref(link.rel); + return false; + } - exc = dom_node_get_node_name(n, &name); - if (exc != DOM_NO_ERR) { - dom_node_unref(n); - return false; - } - - /* Recurse into noscript elements */ - if (strcmp(dom_string_data(name), "noscript") == 0) { - if (html_meta_refresh(c, n) == false) { - /* Some error occurred */ - dom_node_unref(n); - return false; - } else if (c->base.refresh) { - /* Meta refresh found - stop */ - dom_node_unref(n); - return true; - } - } else if (strcmp(dom_string_data(name), "meta") == 0) { - if (html_meta_refresh_process_element(c, - n) == false) { - /* Some error occurred */ - dom_node_unref(n); - return false; - } else if (c->base.refresh != NULL) { - /* Meta refresh found - stop */ - dom_node_unref(n); - return true; - } - } - } - - exc = dom_node_get_next_sibling(n, &next); - if (exc != DOM_NO_ERR) { - dom_node_unref(n); - return false; - } - - dom_node_unref(n); - n = next; + /* get nsurl */ + error = nsurl_join(c->base_url, dom_string_data(atr_string), &link.href); + dom_string_unref(atr_string); + if (error != NSERROR_OK) { + lwc_string_unref(link.rel); + return false; } - return true; -} + /* look for optional properties -- we don't care if internment fails */ -/** - * Update a box whose content has completed rendering. - */ + exc = dom_element_get_attribute(node, html_dom_string_hreflang, &atr_string); + if ((exc == DOM_NO_ERR) && (atr_string != NULL)) { + /* get a lwc string containing the href lang */ + exc = dom_string_intern(atr_string, &link.hreflang); + dom_string_unref(atr_string); + } -static void -html_object_done(struct box *box, - hlcache_handle *object, - bool background) -{ - struct box *b; + exc = dom_element_get_attribute(node, html_dom_string_type, &atr_string); + if ((exc == DOM_NO_ERR) && (atr_string != NULL)) { + /* get a lwc string containing the type */ + exc = dom_string_intern(atr_string, &link.type); + dom_string_unref(atr_string); + } - if (background) { - box->background = object; - return; + exc = dom_element_get_attribute(node, html_dom_string_media, &atr_string); + if ((exc == DOM_NO_ERR) && (atr_string != NULL)) { + /* get a lwc string containing the media */ + exc = dom_string_intern(atr_string, &link.media); + dom_string_unref(atr_string); } - box->object = object; + exc = dom_element_get_attribute(node, html_dom_string_sizes, &atr_string); + if ((exc == DOM_NO_ERR) && (atr_string != NULL)) { + /* get a lwc string containing the sizes */ + exc = dom_string_intern(atr_string, &link.sizes); + dom_string_unref(atr_string); + } - if (!(box->flags & REPLACE_DIM)) { - /* invalidate parent min, max widths */ - for (b = box; b; b = b->parent) - b->max_width = UNKNOWN_MAX_WIDTH; + /* add to content */ + content__add_rfc5988_link(&c->base, &link); - /* delete any clones of this box */ - while (box->next && (box->next->flags & CLONE)) { - /* box_free_box(box->next); */ - box->next = box->next->next; - } - } -} + if (link.sizes != NULL) + lwc_string_unref(link.sizes); + if (link.media != NULL) + lwc_string_unref(link.media); + if (link.type != NULL) + lwc_string_unref(link.type); + if (link.hreflang != NULL) + lwc_string_unref(link.hreflang); -/** - * Handle object fetching or loading failure. - * - * \param box box containing object which failed to load - * \param content document of type CONTENT_HTML - * \param background the object was the background image for the box - */ + nsurl_unref(link.href); + lwc_string_unref(link.rel); -static void -html_object_failed(struct box *box, html_content *content, bool background) -{ - /* Nothing to do */ - return; + return true; } -/** - * Callback for hlcache_handle_retrieve() for objects. - */ - -static nserror -html_object_callback(hlcache_handle *object, - const hlcache_event *event, - void *pw) +/** process title node */ +static bool html_process_title(html_content *c, dom_node *node) { - struct content_html_object *o = pw; - html_content *c = (html_content *) o->parent; - int x, y; - struct box *box; - - assert(c->base.status != CONTENT_STATUS_ERROR); + dom_exception exc; /* returned by libdom functions */ + dom_string *title; + char *title_str; + bool success; - box = o->box; + if (c->base.title != NULL) + return true; - switch (event->type) { - case CONTENT_MSG_LOADING: - if (c->base.status != CONTENT_STATUS_LOADING && c->bw != NULL) - content_open(object, - c->bw, &c->base, - box, - box->object_params); - break; + exc = dom_node_get_text_content(node, &title); + if ((exc != DOM_NO_ERR) || (title == NULL)) { + return false; + } - case CONTENT_MSG_READY: - if (content_get_type(object) == CONTENT_HTML) { - html_object_done(box, object, o->background); - if (c->base.status == CONTENT_STATUS_READY || - c->base.status == CONTENT_STATUS_DONE) - content__reformat(&c->base, false, - c->base.available_width, - c->base.height); - } - break; + title_str = squash_whitespace(dom_string_data(title)); + dom_string_unref(title); - case CONTENT_MSG_DONE: - c->base.active--; - LOG(("%d fetches active", c->base.active)); + if (title_str == NULL) { + return false; + } - html_object_done(box, object, o->background); + success = content__set_title(&c->base, title_str); - if (c->base.status != CONTENT_STATUS_LOADING && - box->flags & REPLACE_DIM) { - union content_msg_data data; + free(title_str); - if (!box_visible(box)) - break; + return success; +} - box_coords(box, &x, &y); +static bool html_process_base(html_content *c, dom_node *node) +{ + dom_exception exc; /* returned by libdom functions */ + dom_string *atr_string; - data.redraw.x = x + box->padding[LEFT]; - data.redraw.y = y + box->padding[TOP]; - data.redraw.width = box->width; - data.redraw.height = box->height; - data.redraw.full_redraw = true; + /* get href attribute if present */ + exc = dom_element_get_attribute(node, html_dom_string_href, &atr_string); + if ((exc == DOM_NO_ERR) && (atr_string != NULL)) { + nsurl *url; + nserror error; - content_broadcast(&c->base, CONTENT_MSG_REDRAW, data); + /* get url from string */ + error = nsurl_create(dom_string_data(atr_string), &url); + dom_string_unref(atr_string); + if (error == NSERROR_OK) { + if (c->base_url != NULL) + nsurl_unref(c->base_url); + c->base_url = url; } - break; + } - case CONTENT_MSG_ERROR: - hlcache_handle_release(object); - o->content = NULL; + /* get target attribute if present and not already set */ + if (c->base_target != NULL) { + return true; + } - c->base.active--; - LOG(("%d fetches active", c->base.active)); + exc = dom_element_get_attribute(node, html_dom_string_target, &atr_string); + if ((exc == DOM_NO_ERR) && (atr_string != NULL)) { + /* Validation rules from the HTML5 spec for the base element: + * The target must be one of _blank, _self, _parent, or + * _top or any identifier which does not begin with an + * underscore + */ + if (*dom_string_data(atr_string) != '_' || + dom_string_caseless_isequal(atr_string, html_dom_string__blank) || + dom_string_caseless_isequal(atr_string, html_dom_string__self) || + dom_string_caseless_isequal(atr_string, html_dom_string__parent) || + dom_string_caseless_isequal(atr_string, html_dom_string__top)) { + c->base_target = strdup(dom_string_data(atr_string)); + } + dom_string_unref(atr_string); + } - content_add_error(&c->base, "?", 0); - html_set_status(c, event->data.error); - content_broadcast(&c->base, CONTENT_MSG_STATUS, event->data); - html_object_failed(box, c, o->background); - break; + return true; +} - case CONTENT_MSG_STATUS: - html_set_status(c, content_get_status_message(object)); - /* content_broadcast(&c->base, CONTENT_MSG_STATUS, 0); */ - break; +/** + * Process elements in . + * + * \param c content structure + * \param head xml node of head element + * \return true on success, false on memory exhaustion + * + * The title and base href are extracted if present. + */ - case CONTENT_MSG_REFORMAT: - break; +static bool html_head(html_content *c, dom_node *head) +{ + dom_node *node; + dom_exception exc; /* returned by libdom functions */ + dom_string *node_name; + dom_node_type node_type; + dom_node *next_node; - case CONTENT_MSG_REDRAW: - if (c->base.status != CONTENT_STATUS_LOADING) { - union content_msg_data data = event->data; + exc = dom_node_get_first_child(head, &node); + if (exc != DOM_NO_ERR) { + return false; + } - if (!box_visible(box)) - break; + while (node != NULL) { + exc = dom_node_get_node_type(node, &node_type); - box_coords(box, &x, &y); + if ((exc == DOM_NO_ERR) && (node_type == DOM_ELEMENT_NODE)) { + exc = dom_node_get_node_name(node, &node_name); - if (hlcache_handle_get_content(object) == - event->data.redraw.object) { - data.redraw.x = data.redraw.x * - box->width / content_get_width(object); - data.redraw.y = data.redraw.y * - box->height / - content_get_height(object); - data.redraw.width = data.redraw.width * - box->width / content_get_width(object); - data.redraw.height = data.redraw.height * - box->height / - content_get_height(object); - data.redraw.object_width = box->width; - data.redraw.object_height = box->height; + if ((exc == DOM_NO_ERR) || (node_name != NULL)) { + if (dom_string_caseless_isequal(node_name, + html_dom_string_title)) { + html_process_title(c, node); + } else if (dom_string_caseless_isequal(node_name, + html_dom_string_base)) { + html_process_base(c, node); + } else if (dom_string_caseless_isequal(node_name, + html_dom_string_link)) { + html_process_link(c, node); + } } - - data.redraw.x += x + box->padding[LEFT]; - data.redraw.y += y + box->padding[TOP]; - data.redraw.object_x += x + box->padding[LEFT]; - data.redraw.object_y += y + box->padding[TOP]; - - content_broadcast(&c->base, CONTENT_MSG_REDRAW, data); } - break; - case CONTENT_MSG_REFRESH: - if (content_get_type(object) == CONTENT_HTML) { - /* only for HTML objects */ - schedule(event->data.delay * 100, - html_object_refresh, o); + /* move to next node */ + exc = dom_node_get_next_sibling(node, &next_node); + dom_node_unref(node); + if (exc == DOM_NO_ERR) { + node = next_node; + } else { + node = NULL; } + } - break; + return true; +} - case CONTENT_MSG_LINK: - /* Don't care about favicons */ - break; +static bool html_meta_refresh_process_element(html_content *c, dom_node *n) +{ + union content_msg_data msg_data; + const char *url, *end, *refresh = NULL; + char *new_url; + char quote = '\0'; + dom_string *equiv, *content; + dom_exception exc; + nsurl *nsurl; + nserror error; - default: - assert(0); - } + exc = dom_element_get_attribute(n, html_dom_string_http_equiv, &equiv); + if (exc != DOM_NO_ERR) + return false; - if (c->base.status == CONTENT_STATUS_READY && c->base.active == 0 && - (event->type == CONTENT_MSG_LOADING || - event->type == CONTENT_MSG_DONE || - event->type == CONTENT_MSG_ERROR)) { - /* all objects have arrived */ - content__reformat(&c->base, false, c->base.available_width, - c->base.height); - html_set_status(c, ""); - content_set_done(&c->base); - } + if (equiv == NULL) + return true; - /* If 1) the configuration option to reflow pages while objects are - * fetched is set - * 2) an object is newly fetched & converted, - * 3) the box's dimensions need to change due to being replaced - * 4) the object's parent HTML is ready for reformat, - * 5) the time since the previous reformat is more than the - * configured minimum time between reformats - * then reformat the page to display newly fetched objects */ - else if (nsoption_bool(incremental_reflow) && - event->type == CONTENT_MSG_DONE && - !(box->flags & REPLACE_DIM) && - (c->base.status == CONTENT_STATUS_READY || - c->base.status == CONTENT_STATUS_DONE) && - (wallclock() > c->base.reformat_time)) { - content__reformat(&c->base, false, c->base.available_width, - c->base.height); + if (strcasecmp(dom_string_data(equiv), "refresh") != 0) { + dom_string_unref(equiv); + return true; } - return NSERROR_OK; -} - -/** - * Start a fetch for an object required by a page, replacing an existing object. - * - * \param object Object to replace - * \param url URL of object to fetch (copied) - * \return true on success, false on memory exhaustion - */ - -static bool html_replace_object(struct content_html_object *object, nsurl *url) -{ - html_content *c; - hlcache_child_context child; - html_content *page; - nserror error; + dom_string_unref(equiv); - assert(object != NULL); + exc = dom_element_get_attribute(n, html_dom_string_content, &content); + if (exc != DOM_NO_ERR) + return false; - c = (html_content *) object->parent; + if (content == NULL) + return true; - child.charset = c->encoding; - child.quirks = c->base.quirks; + end = dom_string_data(content) + dom_string_byte_length(content); - if (object->content != NULL) { - /* remove existing object */ - if (content_get_status(object->content) != CONTENT_STATUS_DONE) { - c->base.active--; - LOG(("%d fetches active", c->base.active)); - } + /* content := *LWS intpart fracpart? *LWS [';' *LWS *1url *LWS] + * intpart := 1*DIGIT + * fracpart := 1*('.' | DIGIT) + * url := "url" *LWS '=' *LWS (url-nq | url-sq | url-dq) + * url-nq := *urlchar + * url-sq := "'" *(urlchar | '"') "'" + * url-dq := '"' *(urlchar | "'") '"' + * urlchar := [#x9#x21#x23-#x26#x28-#x7E] | nonascii + * nonascii := [#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF] + */ - hlcache_handle_release(object->content); - object->content = NULL; + url = dom_string_data(content); - object->box->object = NULL; + /* *LWS */ + while (url < end && isspace(*url)) { + url++; } - /* initialise fetch */ - error = hlcache_handle_retrieve(url, HLCACHE_RETRIEVE_SNIFF_TYPE, - content_get_url(&c->base), NULL, - html_object_callback, object, &child, - object->permitted_types, - &object->content); + /* intpart */ + if (url == end || (*url < '0' || '9' < *url)) { + /* Empty content, or invalid timeval */ + dom_string_unref(content); + return true; + } - if (error != NSERROR_OK) - return false; + msg_data.delay = (int) strtol(url, &new_url, 10); + /* a very small delay and self-referencing URL can cause a loop + * that grinds machines to a halt. To prevent this we set a + * minimum refresh delay of 1s. */ + if (msg_data.delay < 1) + msg_data.delay = 1; - for (page = c; page != NULL; page = page->page) { - page->base.active++; - LOG(("%d fetches active", c->base.active)); + url = new_url; - page->base.status = CONTENT_STATUS_READY; + /* fracpart? (ignored, as delay is integer only) */ + while (url < end && (('0' <= *url && *url <= '9') || + *url == '.')) { + url++; } - return true; -} + /* *LWS */ + while (url < end && isspace(*url)) { + url++; + } -/** - * schedule() callback for object refresh - */ + /* ';' */ + if (url < end && *url == ';') + url++; -static void html_object_refresh(void *p) -{ - struct content_html_object *object = p; - nsurl *refresh_url; + /* *LWS */ + while (url < end && isspace(*url)) { + url++; + } - assert(content_get_type(object->content) == CONTENT_HTML); + if (url == end) { + /* Just delay specified, so refresh current page */ + dom_string_unref(content); - refresh_url = content_get_refresh_url(object->content); + c->base.refresh = nsurl_ref( + content_get_url(&c->base)); - /* Ignore if refresh URL has gone - * (may happen if fetch errored) */ - if (refresh_url == NULL) - return; + content_broadcast(&c->base, CONTENT_MSG_REFRESH, + msg_data); + return true; + } - content_invalidate_reuse_data(object->content); + /* "url" */ + if (url <= end - 3) { + if (strncasecmp(url, "url", 3) == 0) { + url += 3; + } else { + /* Unexpected input, ignore this header */ + dom_string_unref(content); + return true; + } + } else { + /* Insufficient input, ignore this header */ + dom_string_unref(content); + return true; + } - if (!html_replace_object(object, refresh_url)) { - /** \todo handle memory exhaustion */ + /* *LWS */ + while (url < end && isspace(*url)) { + url++; + } + + /* '=' */ + if (url < end) { + if (*url == '=') { + url++; + } else { + /* Unexpected input, ignore this header */ + dom_string_unref(content); + return true; + } + } else { + /* Insufficient input, ignore this header */ + dom_string_unref(content); + return true; } -} + /* *LWS */ + while (url < end && isspace(*url)) { + url++; + } + /* '"' or "'" */ + if (url < end && (*url == '"' || *url == '\'')) { + quote = *url; + url++; + } + /* Start of URL */ + refresh = url; + if (quote != 0) { + /* url-sq | url-dq */ + while (url < end && *url != quote) + url++; + } else { + /* url-nq */ + while (url < end && !isspace(*url)) + url++; + } + /* '"' or "'" or *LWS (we don't care) */ + if (url > refresh) { + /* There's a URL */ + new_url = strndup(refresh, url - refresh); + if (new_url == NULL) { + dom_string_unref(content); + return false; + } -static void html_destroy_objects(html_content *html) -{ - while (html->object_list != NULL) { - struct content_html_object *victim = html->object_list; + error = nsurl_join(c->base_url, new_url, &nsurl); + if (error != NSERROR_OK) { + free(new_url); - if (victim->content != NULL) { - LOG(("object %p", victim->content)); + dom_string_unref(content); - if (content_get_type(victim->content) == CONTENT_HTML) - schedule_remove(html_object_refresh, victim); + msg_data.error = messages_get("NoMemory"); + content_broadcast(&c->base, CONTENT_MSG_ERROR, + msg_data); - hlcache_handle_release(victim->content); + return false; } - html->object_list = victim->next; - talloc_free(victim); + free(new_url); + + c->base.refresh = nsurl; + + content_broadcast(&c->base, CONTENT_MSG_REFRESH, msg_data); } + + dom_string_unref(content); + + return true; } /** - * Perform post-box-creation conversion of a document + * Search for meta refresh * - * \param c HTML content to complete conversion of - * \param success Whether box tree construction was successful + * http://wp.netscape.com/assist/net_sites/pushpull.html + * + * \param c content structure + * \param head xml node of head element + * \return true on success, false otherwise (error reported) */ -static void html_box_convert_done(html_content *c, bool success) + +static bool html_meta_refresh(html_content *c, dom_node *head) { - union content_msg_data msg_data; - dom_exception exc; /* returned by libdom functions */ - dom_node *html; + dom_node *n, *next; + dom_exception exc; - LOG(("Done XML to box (%p)", c)); + if (head == NULL) + return true; - /* Clean up and report error if unsuccessful or aborted */ - if ((success == false) || c->aborted) { - html_destroy_objects(c); - if (success == false) - msg_data.error = messages_get("NoMemory"); - else - msg_data.error = messages_get("Stopped"); - content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); - content_set_error(&c->base); - return; - } + exc = dom_node_get_first_child(head, &n); + if (exc != DOM_NO_ERR) + return false; -#if ALWAYS_DUMP_BOX - box_dump(stderr, c->layout->children, 0); -#endif -#if ALWAYS_DUMP_FRAMESET - if (c->frameset) - html_dump_frameset(c->frameset, 0); -#endif + while (n != NULL) { + dom_node_type type; - exc = dom_document_get_document_element(c->document, (void *) &html); - if ((exc != DOM_NO_ERR) || (html == NULL)) { - LOG(("error retrieving html element from dom")); - msg_data.error = messages_get("ParsingFail"); - content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); - content_set_error(&c->base); - return; - } + exc = dom_node_get_node_type(n, &type); + if (exc != DOM_NO_ERR) { + dom_node_unref(n); + return false; + } - /* extract image maps - can't do this sensibly in xml_to_box */ - if (imagemap_extract(c) == false) { - LOG(("imagemap extraction failed")); - html_destroy_objects(c); - msg_data.error = messages_get("NoMemory"); - content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); - content_set_error(&c->base); - return; - } - /*imagemap_dump(c);*/ + if (type == DOM_ELEMENT_NODE) { + dom_string *name; + + exc = dom_node_get_node_name(n, &name); + if (exc != DOM_NO_ERR) { + dom_node_unref(n); + return false; + } - /* Destroy the parser binding */ - binding_destroy_tree(c->parser_binding); - c->parser_binding = NULL; + /* Recurse into noscript elements */ + if (strcmp(dom_string_data(name), "noscript") == 0) { + if (html_meta_refresh(c, n) == false) { + /* Some error occurred */ + dom_node_unref(n); + return false; + } else if (c->base.refresh) { + /* Meta refresh found - stop */ + dom_node_unref(n); + return true; + } + } else if (strcmp(dom_string_data(name), "meta") == 0) { + if (html_meta_refresh_process_element(c, + n) == false) { + /* Some error occurred */ + dom_node_unref(n); + return false; + } else if (c->base.refresh != NULL) { + /* Meta refresh found - stop */ + dom_node_unref(n); + return true; + } + } + } - content_set_ready(&c->base); + exc = dom_node_get_next_sibling(n, &next); + if (exc != DOM_NO_ERR) { + dom_node_unref(n); + return false; + } - if (c->base.active == 0) - content_set_done(&c->base); + dom_node_unref(n); + n = next; + } - html_set_status(c, ""); + return true; } /** - * Complete conversion of an HTML document - * - * \param c Content to convert + * Update a box whose content has completed rendering. */ -static void html_finish_conversion(html_content *c) -{ - union content_msg_data msg_data; - dom_exception exc; /* returned by libdom functions */ - dom_node *html; - uint32_t i; - css_error error; - - /* Bail out if we've been aborted */ - if (c->aborted) { - msg_data.error = messages_get("Stopped"); - content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); - content_set_error(&c->base); - return; - } - /* check that the base stylesheet loaded; layout fails without it */ - if (c->stylesheets[STYLESHEET_BASE].data.external == NULL) { - msg_data.error = "Base stylesheet failed to load"; - content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); - content_set_error(&c->base); - return; - } +static void +html_object_done(struct box *box, + hlcache_handle *object, + bool background) +{ + struct box *b; - /* Create selection context */ - error = css_select_ctx_create(ns_realloc, c, &c->select_ctx); - if (error != CSS_OK) { - msg_data.error = messages_get("NoMemory"); - content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); - content_set_error(&c->base); + if (background) { + box->background = object; return; } - /* Add sheets to it */ - for (i = STYLESHEET_BASE; i != c->stylesheet_count; i++) { - const struct html_stylesheet *hsheet = &c->stylesheets[i]; - css_stylesheet *sheet; - css_origin origin = CSS_ORIGIN_AUTHOR; - - if (i < STYLESHEET_USER) - origin = CSS_ORIGIN_UA; - else if (i < STYLESHEET_START) - origin = CSS_ORIGIN_USER; + box->object = object; - if (hsheet->type == HTML_STYLESHEET_EXTERNAL && - hsheet->data.external != NULL) { - sheet = nscss_get_stylesheet(hsheet->data.external); - } else if (hsheet->type == HTML_STYLESHEET_INTERNAL) { - sheet = hsheet->data.internal->sheet; - } else { - sheet = NULL; - } + if (!(box->flags & REPLACE_DIM)) { + /* invalidate parent min, max widths */ + for (b = box; b; b = b->parent) + b->max_width = UNKNOWN_MAX_WIDTH; - if (sheet != NULL) { - error = css_select_ctx_append_sheet( - c->select_ctx, sheet, - origin, CSS_MEDIA_SCREEN); - if (error != CSS_OK) { - msg_data.error = messages_get("NoMemory"); - content_broadcast(&c->base, CONTENT_MSG_ERROR, - msg_data); - content_set_error(&c->base); - return; - } + /* delete any clones of this box */ + while (box->next && (box->next->flags & CLONE)) { + /* box_free_box(box->next); */ + box->next = box->next->next; } } +} - /* convert xml tree to box tree */ - LOG(("XML to box (%p)", c)); - content_set_status(&c->base, messages_get("Processing")); - content_broadcast(&c->base, CONTENT_MSG_STATUS, msg_data); - - exc = dom_document_get_document_element(c->document, (void *) &html); - if ((exc != DOM_NO_ERR) || (html == NULL)) { - LOG(("error retrieving html element from dom")); - msg_data.error = messages_get("ParsingFail"); - content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); - content_set_error(&c->base); - return; - } +/** + * Handle object fetching or loading failure. + * + * \param box box containing object which failed to load + * \param content document of type CONTENT_HTML + * \param background the object was the background image for the box + */ - if (xml_to_box(html, c, html_box_convert_done) == false) { - html_destroy_objects(c); - msg_data.error = messages_get("NoMemory"); - content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); - content_set_error(&c->base); - return; - } +static void +html_object_failed(struct box *box, html_content *content, bool background) +{ + /* Nothing to do */ + return; } /** - * Callback for fetchcache() for linked stylesheets. + * Callback for hlcache_handle_retrieve() for objects. */ -static nserror -html_convert_css_callback(hlcache_handle *css, - const hlcache_event *event, - void *pw) +static nserror +html_object_callback(hlcache_handle *object, + const hlcache_event *event, + void *pw) { - html_content *parent = pw; - unsigned int i; - struct html_stylesheet *s; + struct content_html_object *o = pw; + html_content *c = (html_content *) o->parent; + int x, y; + struct box *box; - /* Find sheet */ - for (i = 0, s = parent->stylesheets; - i != parent->stylesheet_count; i++, s++) { - if (s->type == HTML_STYLESHEET_EXTERNAL && - s->data.external == css) - break; - } + assert(c->base.status != CONTENT_STATUS_ERROR); - assert(i != parent->stylesheet_count); + box = o->box; switch (event->type) { case CONTENT_MSG_LOADING: + if (c->base.status != CONTENT_STATUS_LOADING && c->bw != NULL) + content_open(object, + c->bw, &c->base, + box, + box->object_params); break; case CONTENT_MSG_READY: + if (content_get_type(object) == CONTENT_HTML) { + html_object_done(box, object, o->background); + if (c->base.status == CONTENT_STATUS_READY || + c->base.status == CONTENT_STATUS_DONE) + content__reformat(&c->base, false, + c->base.available_width, + c->base.height); + } break; case CONTENT_MSG_DONE: - LOG(("done stylesheet slot %d '%s'", i, - nsurl_access(hlcache_handle_get_url(css)))); - parent->base.active--; - LOG(("%d fetches active", parent->base.active)); - break; - - case CONTENT_MSG_ERROR: - LOG(("stylesheet %s failed: %s", - nsurl_access(hlcache_handle_get_url(css)), - event->data.error)); - hlcache_handle_release(css); - s->data.external = NULL; - parent->base.active--; - LOG(("%d fetches active", parent->base.active)); - content_add_error(&parent->base, "?", 0); - break; - - case CONTENT_MSG_STATUS: - html_set_status(parent, content_get_status_message(css)); - content_broadcast(&parent->base, CONTENT_MSG_STATUS, - event->data); - break; - - default: - assert(0); - } - - if (parent->base.active == 0) - html_finish_conversion(parent); - - return NSERROR_OK; -} - -/** - * Handle notification of inline style completion - * - * \param css Inline style object - * \param pw Private data - */ -static void html_inline_style_done(struct content_css_data *css, void *pw) -{ - html_content *html = pw; + c->base.active--; + LOG(("%d fetches active", c->base.active)); - if (--html->base.active == 0) - html_finish_conversion(html); -} + html_object_done(box, object, o->background); -/** - * Process an inline stylesheet in the document. - * - * \param c content structure - * \param index Index of stylesheet in stylesheet_content array, - * updated if successful - * \param style xml node of style element - * \return true on success, false if an error occurred - */ + if (c->base.status != CONTENT_STATUS_LOADING && + box->flags & REPLACE_DIM) { + union content_msg_data data; -static bool -html_process_style_element(html_content *c, - unsigned int *index, - dom_node *style) -{ - dom_node *child, *next; - dom_string *val; - dom_exception exc; - union content_msg_data msg_data; - struct html_stylesheet *stylesheets; - struct content_css_data *sheet; - nserror error; + if (!box_visible(box)) + break; - /* type='text/css', or not present (invalid but common) */ - exc = dom_element_get_attribute(style, html_dom_string_type, &val); - if (exc == DOM_NO_ERR && val != NULL) { - if (strcmp(dom_string_data(val), "text/css") != 0) { - dom_string_unref(val); - return true; - } - dom_string_unref(val); - } + box_coords(box, &x, &y); - /* media contains 'screen' or 'all' or not present */ - exc = dom_element_get_attribute(style, html_dom_string_media, &val); - if (exc == DOM_NO_ERR && val != NULL) { - if (strcasestr(dom_string_data(val), "screen") == NULL && - strcasestr(dom_string_data(val), - "all") == NULL) { - dom_string_unref(val); - return true; + data.redraw.x = x + box->padding[LEFT]; + data.redraw.y = y + box->padding[TOP]; + data.redraw.width = box->width; + data.redraw.height = box->height; + data.redraw.full_redraw = true; + + content_broadcast(&c->base, CONTENT_MSG_REDRAW, data); } - dom_string_unref(val); - } + break; - /* Extend array */ - stylesheets = talloc_realloc(c, c->stylesheets, - struct html_stylesheet, *index + 1); - if (stylesheets == NULL) - goto no_memory; + case CONTENT_MSG_ERROR: + hlcache_handle_release(object); - c->stylesheets = stylesheets; - c->stylesheet_count++; + o->content = NULL; - c->stylesheets[(*index)].type = HTML_STYLESHEET_INTERNAL; - c->stylesheets[(*index)].data.internal = NULL; + c->base.active--; + LOG(("%d fetches active", c->base.active)); - /* create stylesheet */ - sheet = talloc(c, struct content_css_data); - if (sheet == NULL) { - c->stylesheet_count--; - goto no_memory; - } + content_add_error(&c->base, "?", 0); + html_set_status(c, event->data.error); + content_broadcast(&c->base, CONTENT_MSG_STATUS, event->data); + html_object_failed(box, c, o->background); + break; - error = nscss_create_css_data(sheet, - nsurl_access(c->base_url), NULL, c->quirks, - html_inline_style_done, c); - if (error != NSERROR_OK) { - talloc_free(sheet); - c->stylesheet_count--; - goto no_memory; - } + case CONTENT_MSG_STATUS: + html_set_status(c, content_get_status_message(object)); + /* content_broadcast(&c->base, CONTENT_MSG_STATUS, 0); */ + break; - /* can't just use xmlNodeGetContent(style), because that won't - * give the content of comments which may be used to 'hide' - * the content */ - exc = dom_node_get_first_child(style, &child); - if (exc != DOM_NO_ERR) { - nscss_destroy_css_data(sheet); - talloc_free(sheet); - c->stylesheet_count--; - goto no_memory; - } + case CONTENT_MSG_REFORMAT: + break; - while (child != NULL) { - dom_string *data; + case CONTENT_MSG_REDRAW: + if (c->base.status != CONTENT_STATUS_LOADING) { + union content_msg_data data = event->data; - exc = dom_node_get_text_content(child, &data); - if (exc != DOM_NO_ERR) { - dom_node_unref(child); - nscss_destroy_css_data(sheet); - talloc_free(sheet); - c->stylesheet_count--; - goto no_memory; - } + if (!box_visible(box)) + break; - if (nscss_process_css_data(sheet, dom_string_data(data), - dom_string_byte_length(data)) == false) { - dom_string_unref(data); - dom_node_unref(child); - nscss_destroy_css_data(sheet); - talloc_free(sheet); - c->stylesheet_count--; - goto no_memory; - } + box_coords(box, &x, &y); - dom_string_unref(data); + if (hlcache_handle_get_content(object) == + event->data.redraw.object) { + data.redraw.x = data.redraw.x * + box->width / content_get_width(object); + data.redraw.y = data.redraw.y * + box->height / + content_get_height(object); + data.redraw.width = data.redraw.width * + box->width / content_get_width(object); + data.redraw.height = data.redraw.height * + box->height / + content_get_height(object); + data.redraw.object_width = box->width; + data.redraw.object_height = box->height; + } - exc = dom_node_get_next_sibling(child, &next); - if (exc != DOM_NO_ERR) { - dom_node_unref(child); - nscss_destroy_css_data(sheet); - talloc_free(sheet); - c->stylesheet_count--; - goto no_memory; + data.redraw.x += x + box->padding[LEFT]; + data.redraw.y += y + box->padding[TOP]; + data.redraw.object_x += x + box->padding[LEFT]; + data.redraw.object_y += y + box->padding[TOP]; + + content_broadcast(&c->base, CONTENT_MSG_REDRAW, data); } + break; - dom_node_unref(child); - child = next; - } + case CONTENT_MSG_REFRESH: + if (content_get_type(object) == CONTENT_HTML) { + /* only for HTML objects */ + schedule(event->data.delay * 100, + html_object_refresh, o); + } - c->base.active++; - LOG(("%d fetches active", c->base.active)); + break; - /* Convert the content -- manually, as we want the result */ - if (nscss_convert_css_data(sheet) != CSS_OK) { - /* conversion failed */ - c->base.active--; - LOG(("%d fetches active", c->base.active)); - nscss_destroy_css_data(sheet); - talloc_free(sheet); - sheet = NULL; + case CONTENT_MSG_LINK: + /* Don't care about favicons */ + break; + + default: + assert(0); } - /* Update index */ - c->stylesheets[(*index)].data.internal = sheet; - (*index)++; + if (c->base.status == CONTENT_STATUS_READY && c->base.active == 0 && + (event->type == CONTENT_MSG_LOADING || + event->type == CONTENT_MSG_DONE || + event->type == CONTENT_MSG_ERROR)) { + /* all objects have arrived */ + content__reformat(&c->base, false, c->base.available_width, + c->base.height); + html_set_status(c, ""); + content_set_done(&c->base); + } - return true; + /* If 1) the configuration option to reflow pages while objects are + * fetched is set + * 2) an object is newly fetched & converted, + * 3) the box's dimensions need to change due to being replaced + * 4) the object's parent HTML is ready for reformat, + * 5) the time since the previous reformat is more than the + * configured minimum time between reformats + * then reformat the page to display newly fetched objects */ + else if (nsoption_bool(incremental_reflow) && + event->type == CONTENT_MSG_DONE && + !(box->flags & REPLACE_DIM) && + (c->base.status == CONTENT_STATUS_READY || + c->base.status == CONTENT_STATUS_DONE) && + (wallclock() > c->base.reformat_time)) { + content__reformat(&c->base, false, c->base.available_width, + c->base.height); + } -no_memory: - msg_data.error = messages_get("NoMemory"); - content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); - return false; + return NSERROR_OK; } - -/* depth-first walk the dom calling callback for each element +/** + * Start a fetch for an object required by a page, replacing an existing object. * - * @param root the dom node to use as the root of the tree walk - * @return true if all nodes were examined, false if the callback terminated - * the walk early. + * \param object Object to replace + * \param url URL of object to fetch (copied) + * \return true on success, false on memory exhaustion */ -static bool -html_treewalk_dom(dom_node *root, - bool (*callback)(dom_node *node, dom_string *name, void *ctx), - void *ctx) + +static bool html_replace_object(struct content_html_object *object, nsurl *url) { - dom_node *node; - bool result = true;; + html_content *c; + hlcache_child_context child; + html_content *page; + nserror error; - node = dom_node_ref(root); /* tree root */ + assert(object != NULL); - while (node != NULL) { - dom_node *next = NULL; - dom_node_type type; - dom_string *name; - dom_exception exc; + c = (html_content *) object->parent; - exc = dom_node_get_first_child(node, &next); - if (exc != DOM_NO_ERR) { - dom_node_unref(node); - break; - } + child.charset = c->encoding; + child.quirks = c->base.quirks; - if (next != NULL) { /* 1. children */ - dom_node_unref(node); - node = next; - } else { - exc = dom_node_get_next_sibling(node, &next); - if (exc != DOM_NO_ERR) { - dom_node_unref(node); - break; - } + if (object->content != NULL) { + /* remove existing object */ + if (content_get_status(object->content) != CONTENT_STATUS_DONE) { + c->base.active--; + LOG(("%d fetches active", c->base.active)); + } - if (next != NULL) { /* 2. siblings */ - dom_node_unref(node); - node = next; - } else { /* 3. ancestor siblings */ - while (node != NULL) { - exc = dom_node_get_next_sibling(node, - &next); - if (exc != DOM_NO_ERR) { - dom_node_unref(node); - node = NULL; - break; - } + hlcache_handle_release(object->content); + object->content = NULL; - if (next != NULL) { - dom_node_unref(next); - break; - } + object->box->object = NULL; + } - exc = dom_node_get_parent_node(node, - &next); - if (exc != DOM_NO_ERR) { - dom_node_unref(node); - node = NULL; - break; - } + /* initialise fetch */ + error = hlcache_handle_retrieve(url, HLCACHE_RETRIEVE_SNIFF_TYPE, + content_get_url(&c->base), NULL, + html_object_callback, object, &child, + object->permitted_types, + &object->content); - dom_node_unref(node); - node = next; - } + if (error != NSERROR_OK) + return false; - if (node == NULL) - break; + for (page = c; page != NULL; page = page->page) { + page->base.active++; + LOG(("%d fetches active", c->base.active)); - exc = dom_node_get_next_sibling(node, &next); - if (exc != DOM_NO_ERR) { - dom_node_unref(node); - break; - } + page->base.status = CONTENT_STATUS_READY; + } - dom_node_unref(node); - node = next; - } - } + return true; +} - assert(node != NULL); +/** + * schedule() callback for object refresh + */ - exc = dom_node_get_node_type(node, &type); - if ((exc != DOM_NO_ERR) || (type != DOM_ELEMENT_NODE)) - continue; +static void html_object_refresh(void *p) +{ + struct content_html_object *object = p; + nsurl *refresh_url; - exc = dom_node_get_node_name(node, &name); - if (exc != DOM_NO_ERR) - continue; + assert(content_get_type(object->content) == CONTENT_HTML); - result = callback(node, name, ctx); + refresh_url = content_get_refresh_url(object->content); - dom_string_unref(name); + /* Ignore if refresh URL has gone + * (may happen if fetch errored) */ + if (refresh_url == NULL) + return; - if (result == false) { - break; /* callback caused early termination */ - } - + content_invalidate_reuse_data(object->content); + + if (!html_replace_object(object, refresh_url)) { + /** \todo handle memory exhaustion */ } - return result; } -typedef bool (script_handler_t)(struct jscontext *jscontext, const char *data, size_t size) ; -static script_handler_t *select_script_handler(content_type ctype) -{ - if (ctype == CONTENT_JS) { - return js_exec; - } - return NULL; -} -/* attempt to progress script execution - * - * execute scripts using algorithm found in: - * http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#the-script-element - * +/** + * Callback for fetchcache() for linked stylesheets. */ -static bool html_scripts_exec(html_content *c) + +static nserror +html_convert_css_callback(hlcache_handle *css, + const hlcache_event *event, + void *pw) { + html_content *parent = pw; unsigned int i; - struct html_script *s; - script_handler_t *script_handler; + struct html_stylesheet *s; - if (c->jscontext == NULL) - return false; + /* Find sheet */ + for (i = 0, s = parent->stylesheets; + i != parent->stylesheet_count; i++, s++) { + if (s->type == HTML_STYLESHEET_EXTERNAL && + s->data.external == css) + break; + } - for (i = 0, s = c->scripts; i != c->scripts_count; i++, s++) { - if (s->already_started) { - continue; - } + assert(i != parent->stylesheet_count); - assert((s->type == HTML_SCRIPT_EXTERNAL) || - (s->type == HTML_SCRIPT_INTERNAL)); + switch (event->type) { + case CONTENT_MSG_LOADING: + break; - if (s->type == HTML_SCRIPT_EXTERNAL) { - /* ensure script content is present */ - if (s->data.external == NULL) - continue; + case CONTENT_MSG_READY: + break; - /* ensure script content fetch status is not an error */ - if (content_get_status(s->data.external) == CONTENT_STATUS_ERROR) - continue; + case CONTENT_MSG_DONE: + LOG(("done stylesheet slot %d '%s'", i, + nsurl_access(hlcache_handle_get_url(css)))); + parent->base.active--; + LOG(("%d fetches active", parent->base.active)); + break; - /* ensure script handler for content type */ - script_handler = select_script_handler(content_get_type(s->data.external)); - if (script_handler == NULL) - continue; /* unsupported type */ + case CONTENT_MSG_ERROR: + LOG(("stylesheet %s failed: %s", + nsurl_access(hlcache_handle_get_url(css)), + event->data.error)); + hlcache_handle_release(css); + s->data.external = NULL; + parent->base.active--; + LOG(("%d fetches active", parent->base.active)); + content_add_error(&parent->base, "?", 0); + break; - if (content_get_status(s->data.external) == CONTENT_STATUS_DONE) { - /* external script is now available */ - const char *data; - unsigned long size; - data = content_get_source_data(s->data.external, &size ); - script_handler(c->jscontext, data, size); - - s->already_started = true; - - } else { - /* script not yet available */ + case CONTENT_MSG_STATUS: + html_set_status(parent, content_get_status_message(css)); + content_broadcast(&parent->base, CONTENT_MSG_STATUS, + event->data); + break; - /* check if deferable or asynchronous */ - if (!s->defer && !s->async) { - break; - } - } - } else { - struct lwc_string_s *lwcmimetype; - dom_string_intern(s->mimetype, &lwcmimetype); + default: + assert(0); + } - /* ensure script handler for content type */ - script_handler = select_script_handler(content_factory_type_from_mime_type(lwcmimetype)); - lwc_string_unref(lwcmimetype); + if (parent->base.active == 0) + html_finish_conversion(parent); - if (script_handler == NULL) - continue; /* unsupported type */ + return NSERROR_OK; +} - script_handler(c->jscontext, - dom_string_data(s->data.internal), - dom_string_byte_length(s->data.internal)); - s->already_started = true; - } - } +/** + * Handle notification of inline style completion + * + * \param css Inline style object + * \param pw Private data + */ +static void html_inline_style_done(struct content_css_data *css, void *pw) +{ + html_content *html = pw; - return true; + if (--html->base.active == 0) + html_finish_conversion(html); } -/* create new html script entry */ -static struct html_script * -html_process_new_script(html_content *c, enum html_script_type type) -{ - struct html_script *nscript; - /* add space for new script entry */ - nscript = realloc(c->scripts, - sizeof(struct html_script) * (c->scripts_count + 1)); - if (nscript == NULL) { - return NULL; - } +/** + * Process an inline stylesheet in the document. + * + * \param c content structure + * \param index Index of stylesheet in stylesheet_content array, + * updated if successful + * \param style xml node of style element + * \return true on success, false if an error occurred + */ - c->scripts = nscript; +static bool +html_process_style_element(html_content *c, + unsigned int *index, + dom_node *style) +{ + dom_node *child, *next; + dom_string *val; + dom_exception exc; + union content_msg_data msg_data; + struct html_stylesheet *stylesheets; + struct content_css_data *sheet; + nserror error; - /* increment script entry count */ - nscript = &c->scripts[c->scripts_count]; - c->scripts_count++; + /* type='text/css', or not present (invalid but common) */ + exc = dom_element_get_attribute(style, html_dom_string_type, &val); + if (exc == DOM_NO_ERR && val != NULL) { + if (strcmp(dom_string_data(val), "text/css") != 0) { + dom_string_unref(val); + return true; + } + dom_string_unref(val); + } - nscript->already_started = false; - nscript->parser_inserted = false; - nscript->force_async = true; - nscript->ready_exec = false; - nscript->async = false; - nscript->defer = false; + /* media contains 'screen' or 'all' or not present */ + exc = dom_element_get_attribute(style, html_dom_string_media, &val); + if (exc == DOM_NO_ERR && val != NULL) { + if (strcasestr(dom_string_data(val), "screen") == NULL && + strcasestr(dom_string_data(val), + "all") == NULL) { + dom_string_unref(val); + return true; + } + dom_string_unref(val); + } - nscript->type = type; + /* Extend array */ + stylesheets = talloc_realloc(c, c->stylesheets, + struct html_stylesheet, *index + 1); + if (stylesheets == NULL) + goto no_memory; - return nscript; -} + c->stylesheets = stylesheets; + c->stylesheet_count++; -/** - * Callback for fetchcache() for linked stylesheets. - */ + c->stylesheets[(*index)].type = HTML_STYLESHEET_INTERNAL; + c->stylesheets[(*index)].data.internal = NULL; -static nserror -html_convert_script_callback(hlcache_handle *script, - const hlcache_event *event, - void *pw) -{ - html_content *parent = pw; - unsigned int i; - struct html_script *s; + /* create stylesheet */ + sheet = talloc(c, struct content_css_data); + if (sheet == NULL) { + c->stylesheet_count--; + goto no_memory; + } - /* Find sheet */ - for (i = 0, s = parent->scripts; i != parent->scripts_count; i++, s++) { - if (s->type == HTML_SCRIPT_EXTERNAL && - s->data.external == script) - break; + error = nscss_create_css_data(sheet, + nsurl_access(c->base_url), NULL, c->quirks, + html_inline_style_done, c); + if (error != NSERROR_OK) { + talloc_free(sheet); + c->stylesheet_count--; + goto no_memory; } - assert(i != parent->scripts_count); - - switch (event->type) { - case CONTENT_MSG_LOADING: - break; + /* can't just use xmlNodeGetContent(style), because that won't + * give the content of comments which may be used to 'hide' + * the content */ + exc = dom_node_get_first_child(style, &child); + if (exc != DOM_NO_ERR) { + nscss_destroy_css_data(sheet); + talloc_free(sheet); + c->stylesheet_count--; + goto no_memory; + } - case CONTENT_MSG_READY: - break; + while (child != NULL) { + dom_string *data; - case CONTENT_MSG_DONE: - LOG(("script %d done '%s'", i, - nsurl_access(hlcache_handle_get_url(script)))); - parent->base.active--; - LOG(("%d fetches active", parent->base.active)); + exc = dom_node_get_text_content(child, &data); + if (exc != DOM_NO_ERR) { + dom_node_unref(child); + nscss_destroy_css_data(sheet); + talloc_free(sheet); + c->stylesheet_count--; + goto no_memory; + } - /* script finished loading so try and continue execution */ - html_scripts_exec(parent); - break; + if (nscss_process_css_data(sheet, dom_string_data(data), + dom_string_byte_length(data)) == false) { + dom_string_unref(data); + dom_node_unref(child); + nscss_destroy_css_data(sheet); + talloc_free(sheet); + c->stylesheet_count--; + goto no_memory; + } - case CONTENT_MSG_ERROR: - LOG(("script %s failed: %s", - nsurl_access(hlcache_handle_get_url(script)), - event->data.error)); - hlcache_handle_release(script); - s->data.external = NULL; - parent->base.active--; - LOG(("%d fetches active", parent->base.active)); - content_add_error(&parent->base, "?", 0); + dom_string_unref(data); - /* script failed loading so try and continue execution */ - html_scripts_exec(parent); + exc = dom_node_get_next_sibling(child, &next); + if (exc != DOM_NO_ERR) { + dom_node_unref(child); + nscss_destroy_css_data(sheet); + talloc_free(sheet); + c->stylesheet_count--; + goto no_memory; + } - break; + dom_node_unref(child); + child = next; + } - case CONTENT_MSG_STATUS: - html_set_status(parent, content_get_status_message(script)); - content_broadcast(&parent->base, CONTENT_MSG_STATUS, - event->data); - break; + c->base.active++; + LOG(("%d fetches active", c->base.active)); - default: - assert(0); + /* Convert the content -- manually, as we want the result */ + if (nscss_convert_css_data(sheet) != CSS_OK) { + /* conversion failed */ + c->base.active--; + LOG(("%d fetches active", c->base.active)); + nscss_destroy_css_data(sheet); + talloc_free(sheet); + sheet = NULL; } - if (parent->base.active == 0) - html_finish_conversion(parent); + /* Update index */ + c->stylesheets[(*index)].data.internal = sheet; + (*index)++; - return NSERROR_OK; + return true; + +no_memory: + msg_data.error = messages_get("NoMemory"); + content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); + return false; } -/** process script node - * + +/* depth-first walk the dom calling callback for each element * + * @param root the dom node to use as the root of the tree walk + * @return true if all nodes were examined, false if the callback terminated + * the walk early. */ -static bool -html_process_script(dom_node *node, dom_string *name, void *ctx) +static bool +html_treewalk_dom(dom_node *root, + bool (*callback)(dom_node *node, dom_string *name, void *ctx), + void *ctx) { - html_content *c = (html_content *)ctx; - dom_exception exc; /* returned by libdom functions */ - dom_string *src, *script, *mimetype; - struct html_script *nscript; - union content_msg_data msg_data; + dom_node *node; + bool result = true;; - if (!dom_string_isequal(name, html_dom_string_script)) - return true; /* was not a script tag, carry on the walk */ + node = dom_node_ref(root); /* tree root */ - /* ensure javascript context is available */ - if (c->jscontext == NULL) { - union content_msg_data msg_data; + while (node != NULL) { + dom_node *next = NULL; + dom_node_type type; + dom_string *name; + dom_exception exc; - msg_data.jscontext = &c->jscontext; - content_broadcast(&c->base, CONTENT_MSG_GETCTX, msg_data); - LOG(("javascript context %p ", c->jscontext)); - if (c->jscontext == NULL) { - /* no context and it could not be created, abort */ - return false; + exc = dom_node_get_first_child(node, &next); + if (exc != DOM_NO_ERR) { + dom_node_unref(node); + break; } - } - - exc = dom_element_get_attribute(node, html_dom_string_type, &mimetype); - if (exc != DOM_NO_ERR || mimetype == NULL) { - mimetype = dom_string_ref(html_dom_string_text_javascript); - } - exc = dom_element_get_attribute(node, html_dom_string_src, &src); - if (exc != DOM_NO_ERR || src == NULL) { - /* does not appear to be a src so script is inline content */ - exc = dom_node_get_text_content(node, &script); - if ((exc != DOM_NO_ERR) || (script == NULL)) { - dom_string_unref(mimetype); - return true; /* no contents, skip */ - } + if (next != NULL) { /* 1. children */ + dom_node_unref(node); + node = next; + } else { + exc = dom_node_get_next_sibling(node, &next); + if (exc != DOM_NO_ERR) { + dom_node_unref(node); + break; + } - nscript = html_process_new_script(c, HTML_STYLESHEET_INTERNAL); - if (nscript == NULL) { - dom_string_unref(mimetype); - dom_string_unref(script); - goto html_process_script_no_memory; - } + if (next != NULL) { /* 2. siblings */ + dom_node_unref(node); + node = next; + } else { /* 3. ancestor siblings */ + while (node != NULL) { + exc = dom_node_get_next_sibling(node, + &next); + if (exc != DOM_NO_ERR) { + dom_node_unref(node); + node = NULL; + break; + } - nscript->data.internal = script; - nscript->mimetype = mimetype; + if (next != NULL) { + dom_node_unref(next); + break; + } - /* charset (encoding) */ - } else { - /* script with a src tag */ - nserror ns_error; - nsurl *joined; - hlcache_child_context child; + exc = dom_node_get_parent_node(node, + &next); + if (exc != DOM_NO_ERR) { + dom_node_unref(node); + node = NULL; + break; + } + dom_node_unref(node); + node = next; + } - nscript = html_process_new_script(c, HTML_STYLESHEET_EXTERNAL); - if (nscript == NULL) { - dom_string_unref(src); - dom_string_unref(mimetype); - goto html_process_script_no_memory; - } + if (node == NULL) + break; - /* charset (encoding) */ + exc = dom_node_get_next_sibling(node, &next); + if (exc != DOM_NO_ERR) { + dom_node_unref(node); + break; + } - ns_error = nsurl_join(c->base_url, dom_string_data(src), &joined); - dom_string_unref(src); - if (ns_error != NSERROR_OK) { - dom_string_unref(mimetype); - goto html_process_script_no_memory; + dom_node_unref(node); + node = next; + } } - nscript->mimetype = mimetype; /* keep reference to mimetype */ + assert(node != NULL); - LOG(("script %i '%s'", c->scripts_count, nsurl_access(joined))); + exc = dom_node_get_node_type(node, &type); + if ((exc != DOM_NO_ERR) || (type != DOM_ELEMENT_NODE)) + continue; - child.charset = c->encoding; - child.quirks = c->base.quirks; + exc = dom_node_get_node_name(node, &name); + if (exc != DOM_NO_ERR) + continue; - ns_error = hlcache_handle_retrieve(joined, - 0, - content_get_url(&c->base), - NULL, - html_convert_script_callback, - c, - &child, - CONTENT_SCRIPT, - &nscript->data.external); + result = callback(node, name, ctx); - nsurl_unref(joined); + dom_string_unref(name); - if (ns_error != NSERROR_OK) { - goto html_process_script_no_memory; + if (result == false) { + break; /* callback caused early termination */ } - c->base.active++; /* ensure base content knows the fetch is active */ - LOG(("%d fetches active", c->base.active)); - } - - return true; - -html_process_script_no_memory: - msg_data.error = messages_get("NoMemory"); - content_broadcast(&c->base, CONTENT_MSG_ERROR, msg_data); - return false; /* out of memory, abort walk */ + return result; } -/** - * Process inline script and fetch linked scripts. - * - * - * - * \param c content structure - * \param html dom node of html element - * \return true on success, false if an error occurred - */ -static bool html_find_scripts(html_content *c, dom_node *html) -{ - return html_treewalk_dom(html, html_process_script, c); -} + struct find_stylesheet_ctx { unsigned int count; html_content *c; }; -/** callback to process stylesheet elements +/** callback to process stylesheet elements */ -static bool +static bool html_process_stylesheet(dom_node *node, dom_string *name, void *vctx) { - struct find_stylesheet_ctx *ctx = (struct find_stylesheet_ctx *)vctx; + struct find_stylesheet_ctx *ctx = (struct find_stylesheet_ctx *)vctx; dom_string *rel, *type_attr, *media, *href; struct html_stylesheet *stylesheets; nsurl *joined; @@ -1991,10 +1993,10 @@ html_process_stylesheet(dom_node *node, dom_string *name, void *vctx) /* if it is not a link node skip it */ if (strcmp(dom_string_data(name), "link") != 0) { return true; - } + } /* rel= */ - exc = dom_element_get_attribute(node, + exc = dom_element_get_attribute(node, html_dom_string_rel, &rel); if (exc != DOM_NO_ERR || rel == NULL) return true; @@ -2009,7 +2011,7 @@ html_process_stylesheet(dom_node *node, dom_string *name, void *vctx) } dom_string_unref(rel); - /* type='text/css' or not present */ + /* type='text/css' or not present */ exc = dom_element_get_attribute(node, html_dom_string_type, &type_attr); if (exc == DOM_NO_ERR && type_attr != NULL) { if (strcmp(dom_string_data(type_attr), "text/css") != 0) { @@ -2051,7 +2053,7 @@ html_process_stylesheet(dom_node *node, dom_string *name, void *vctx) /* start fetch */ stylesheets = talloc_realloc(ctx->c, ctx->c->stylesheets, - struct html_stylesheet, + struct html_stylesheet, ctx->count + 1); if (stylesheets == NULL) { nsurl_unref(joined); @@ -2065,12 +2067,12 @@ html_process_stylesheet(dom_node *node, dom_string *name, void *vctx) child.charset = ctx->c->encoding; child.quirks = ctx->c->base.quirks; - ns_error = hlcache_handle_retrieve(joined, + ns_error = hlcache_handle_retrieve(joined, 0, - content_get_url(&ctx->c->base), + content_get_url(&ctx->c->base), NULL, - html_convert_css_callback, - ctx->c, + html_convert_css_callback, + ctx->c, &child, CONTENT_CSS, &ctx->c->stylesheets[ctx->count].data.external); @@ -2123,7 +2125,7 @@ static bool html_find_stylesheets(html_content *c, dom_node *html) * stylesheet 3 is the user stylesheet */ c->stylesheets = talloc_array(c, struct html_stylesheet, STYLESHEET_START); - if (c->stylesheets == NULL) + if (c->stylesheets == NULL) goto html_find_stylesheets_no_memory; c->stylesheets[STYLESHEET_BASE].type = HTML_STYLESHEET_EXTERNAL; @@ -2150,9 +2152,9 @@ static bool html_find_stylesheets(html_content *c, dom_node *html) LOG(("%d fetches active", c->base.active)); if (c->quirks == BINDING_QUIRKS_MODE_FULL) { - ns_error = hlcache_handle_retrieve(html_quirks_stylesheet_url, + ns_error = hlcache_handle_retrieve(html_quirks_stylesheet_url, 0, content_get_url(&c->base), NULL, - html_convert_css_callback, c, &child, + html_convert_css_callback, c, &child, CONTENT_CSS, &c->stylesheets[STYLESHEET_QUIRKS].data.external); if (ns_error != NSERROR_OK) @@ -2250,7 +2252,7 @@ static bool html_convert(struct content *c) if (htmlc->encoding == NULL) { const char *encoding = binding_get_encoding( - htmlc->parser_binding, + htmlc->parser_binding, &htmlc->encoding_source); htmlc->encoding = talloc_strdup(c, encoding); @@ -2270,27 +2272,27 @@ static bool html_convert(struct content *c) /* locate root element and ensure it is html */ exc = dom_document_get_document_element(htmlc->document, (void *) &html); - if ((exc != DOM_NO_ERR) || (html == NULL)) { + if ((exc != DOM_NO_ERR) || (html == NULL)) { LOG(("error retrieving html element from dom")); msg_data.error = messages_get("ParsingFail"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; - } + } exc = dom_node_get_node_name(html, &node_name); - if ((exc != DOM_NO_ERR) || - (node_name == NULL) || + if ((exc != DOM_NO_ERR) || + (node_name == NULL) || (!dom_string_caseless_isequal(node_name, html_dom_string_html))) { LOG(("root element not html")); msg_data.error = messages_get("ParsingFail"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; - } + } dom_string_unref(node_name); /* ensure the head element is found */ exc = dom_node_get_first_child(html, &head); - if ((exc != DOM_NO_ERR) || (head == NULL)) { + if ((exc != DOM_NO_ERR) || (head == NULL)) { head = NULL; LOG(("head element not found")); } else { @@ -2301,7 +2303,7 @@ static bool html_convert(struct content *c) do { exc = dom_node_get_node_type(head, &node_type); - if ((exc != DOM_NO_ERR) || + if ((exc != DOM_NO_ERR) || (node_type == DOM_ELEMENT_NODE)) break; @@ -2317,7 +2319,7 @@ static bool html_convert(struct content *c) if (head != NULL) { exc = dom_node_get_node_name(head, &node_name); if ((exc == DOM_NO_ERR) || (node_name != NULL)) { - if (!dom_string_caseless_isequal(node_name, + if (!dom_string_caseless_isequal(node_name, html_dom_string_head)) { dom_node_unref(head); LOG(("head element not found")); @@ -2349,7 +2351,7 @@ static bool html_convert(struct content *c) /* Make all actions absolute */ if (f->action == NULL || f->action[0] == '\0') { /* HTML5 4.10.22.3 step 11 */ - res = url_join(nsurl_access(content_get_url(c)), + res = url_join(nsurl_access(content_get_url(c)), nsurl_access(htmlc->base_url), &action); } else { res = url_join(f->action, nsurl_access(htmlc->base_url), @@ -2370,21 +2372,13 @@ static bool html_convert(struct content *c) f->document_charset = strdup(htmlc->encoding); if (f->document_charset == NULL) { msg_data.error = messages_get("NoMemory"); - content_broadcast(c, CONTENT_MSG_ERROR, + content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } } } - /* find script tags */ - if (nsoption_bool(enable_javascript)) { - /* @todo this ought to be done during parse */ - html_find_scripts(htmlc, html); - /* run as far as we can */ - html_scripts_exec(htmlc); - } - /* get stylesheets */ if (html_find_stylesheets(htmlc, html) == false) return false; @@ -2435,11 +2429,11 @@ bool html_fetch_object(html_content *c, nsurl *url, struct box *box, object->box = box; object->permitted_types = permitted_types; object->background = background; - - error = hlcache_handle_retrieve(url, - HLCACHE_RETRIEVE_SNIFF_TYPE, - content_get_url(&c->base), NULL, - html_object_callback, object, &child, + + error = hlcache_handle_retrieve(url, + HLCACHE_RETRIEVE_SNIFF_TYPE, + content_get_url(&c->base), NULL, + html_object_callback, object, &child, object->permitted_types, &object->content); if (error != NSERROR_OK) { talloc_free(object); @@ -2477,18 +2471,18 @@ static void html_stop(struct content *c) htmlc->aborted = true; break; case CONTENT_STATUS_READY: - for (object = htmlc->object_list; object != NULL; + for (object = htmlc->object_list; object != NULL; object = object->next) { if (object->content == NULL) continue; - if (content_get_status(object->content) == + if (content_get_status(object->content) == CONTENT_STATUS_DONE) ; /* already loaded: do nothing */ - else if (content_get_status(object->content) == + else if (content_get_status(object->content) == CONTENT_STATUS_READY) hlcache_handle_abort(object->content); - /* Active count will be updated when + /* Active count will be updated when * html_object_callback receives * CONTENT_MSG_DONE from this object */ else { @@ -2710,7 +2704,7 @@ static void html_destroy(struct content *c) html->stylesheets[i].data.external != NULL) { hlcache_handle_release( html->stylesheets[i].data.external); - } else if (html->stylesheets[i].type == + } else if (html->stylesheets[i].type == HTML_STYLESHEET_INTERNAL && html->stylesheets[i].data.internal != NULL) { nscss_destroy_css_data( @@ -2727,7 +2721,7 @@ static void html_destroy(struct content *c) html->scripts[i].data.external != NULL) { hlcache_handle_release( html->scripts[i].data.external); - } else if (html->scripts[i].type == + } else if (html->scripts[i].type == HTML_SCRIPT_INTERNAL && html->scripts[i].data.internal != NULL) { dom_string_unref(html->scripts[i].data.internal); @@ -2744,8 +2738,8 @@ static nserror html_clone(const struct content *old, struct content **newc) { /** \todo Clone HTML specifics */ - /* In the meantime, we should never be called, as HTML contents - * cannot be shared and we're not intending to fix printing's + /* In the meantime, we should never be called, as HTML contents + * cannot be shared and we're not intending to fix printing's * cloning of documents. */ assert(0 && "html_clone should never be called"); @@ -2766,10 +2760,10 @@ void html_set_status(html_content *c, const char *extra) * Handle a window containing a CONTENT_HTML being opened. */ -static void -html_open(struct content *c, +static void +html_open(struct content *c, struct browser_window *bw, - struct content *page, + struct content *page, struct box *box, struct object_params *params) { @@ -2792,7 +2786,7 @@ html_open(struct content *c, if (content_get_type(object->content) == CONTENT_NONE) continue; - content_open(object->content, + content_open(object->content, bw, c, object->box, object->box->object_params); @@ -2826,7 +2820,7 @@ static void html_close(struct content *c) if (content_get_type(object->content) == CONTENT_HTML) schedule_remove(html_object_refresh, object); - content_close(object->content); + content_close(object->content); } } @@ -2853,10 +2847,10 @@ static struct selection *html_get_selection(struct content *c) * \param data pointer to contextual_content struct. Its fields are updated * with pointers to any relevent content, or set to NULL if none. */ -static void +static void html_get_contextual_content(struct content *c, - int x, - int y, + int x, + int y, struct contextual_content *data) { html_content *html = (html_content *) c; @@ -2870,7 +2864,7 @@ html_get_contextual_content(struct content *c, &containing_content)) != NULL) { box = next; - if (box->style && css_computed_visibility(box->style) == + if (box->style && css_computed_visibility(box->style) == CSS_VISIBILITY_HIDDEN) continue; @@ -2904,7 +2898,7 @@ html_get_contextual_content(struct content *c, * \param scry y-coordinate of point of interest * \return true iff scroll was consumed by something in the content */ -static bool +static bool html_scroll_at_point(struct content *c, int x, int y, int scrx, int scry) { html_content *html = (html_content *) c; @@ -2921,7 +2915,7 @@ html_scroll_at_point(struct content *c, int x, int y, int scrx, int scry) &containing_content)) != NULL) { box = next; - if (box->style && css_computed_visibility(box->style) == + if (box->style && css_computed_visibility(box->style) == CSS_VISIBILITY_HIDDEN) continue; @@ -3141,7 +3135,7 @@ struct search_context *html_get_search(struct content *c) * Print a frameset tree to stderr. */ -static void +static void html_dump_frameset(struct content_html_frames *frame, unsigned int depth) { unsigned int i; @@ -3502,7 +3496,7 @@ nserror html_init(void) goto error; } - error = nsurl_create("resource:default.css", + error = nsurl_create("resource:default.css", &html_default_stylesheet_url); if (error != NSERROR_OK) goto error; @@ -3527,7 +3521,7 @@ nserror html_init(void) sizeof(#NAME) - 1, \ &html_dom_string_##NAME ); \ if ((exc != DOM_NO_ERR) || (html_dom_string_##NAME == NULL)) \ - goto error + goto error HTML_DOM_STRING_INTERN(html); HTML_DOM_STRING_INTERN(head); diff --git a/render/libdom_binding.c b/render/libdom_binding.c index 06dbbb53e..9ae76469c 100644 --- a/render/libdom_binding.c +++ b/render/libdom_binding.c @@ -24,11 +24,11 @@ #include "utils/log.h" -binding_error binding_create_tree(void *arena, const char *charset, void **ctx) +binding_error binding_create_tree(void **ctx, const char *charset, bool enable_script, dom_script script, void *context) { dom_hubbub_parser *parser = NULL; - parser = dom_hubbub_parser_create(charset, true, NULL, NULL); + parser = dom_hubbub_parser_create(charset, true, enable_script, NULL, script, context); if (parser == NULL) { LOG(("Can't create Hubbub Parser\n")); return BINDING_NOMEM; diff --git a/render/parser_binding.h b/render/parser_binding.h index 90930c379..cf3497867 100644 --- a/render/parser_binding.h +++ b/render/parser_binding.h @@ -20,6 +20,7 @@ #define _NETSURF_RENDER_PARSER_BINDING_H_ #include +#include struct box; struct form; @@ -44,7 +45,7 @@ typedef enum binding_quirks_mode { BINDING_QUIRKS_MODE_FULL } binding_quirks_mode; -binding_error binding_create_tree(void *arena, const char *charset, void **ctx); +binding_error binding_create_tree(void **ctx, const char *charset, bool enable_script, dom_script script, void *context); binding_error binding_destroy_tree(void *ctx); binding_error binding_parse_chunk(void *ctx, const uint8_t *data, size_t len); -- cgit v1.2.3 From 752261c66c3a8f6d5cbfb4d1311776dd07f0e144 Mon Sep 17 00:00:00 2001 From: Vincent Sanders Date: Sat, 7 Jul 2012 00:16:07 +0100 Subject: Improve logging and inline script handling --- javascript/jsapi.c | 2 +- javascript/jsapi/document.c | 2 +- render/html.c | 34 +++++++++++++++++++--------------- 3 files changed, 21 insertions(+), 17 deletions(-) (limited to 'render') diff --git a/javascript/jsapi.c b/javascript/jsapi.c index 6ba9427bf..fa8726b42 100644 --- a/javascript/jsapi.c +++ b/javascript/jsapi.c @@ -133,7 +133,7 @@ bool js_exec(jscontext *ctx, const char *txt, size_t txtlen) { JSContext *cx = (JSContext *)ctx; - LOG(("%p \"%s\"",cx ,txt)); + /* LOG(("%p \"%s\"",cx ,txt)); */ if (ctx == NULL) { return false; diff --git a/javascript/jsapi/document.c b/javascript/jsapi/document.c index 29ed20c1b..fb1da7ef8 100644 --- a/javascript/jsapi/document.c +++ b/javascript/jsapi/document.c @@ -56,7 +56,7 @@ static JSBool JSAPI_NATIVE(write, JSContext *cx, uintN argc, jsval *vp) JSString_to_char(u16_txt, txt, length); - LOG(("content %p writing %s",htmlc, txt)); + LOG(("content %p parser %p writing %s",htmlc, htmlc->parser_binding, txt)); dom_hubbub_parser_insert_chunk(htmlc->parser_binding, (uint8_t *)txt, length); diff --git a/render/html.c b/render/html.c index ede8343ac..9db7ab971 100644 --- a/render/html.c +++ b/render/html.c @@ -183,21 +183,6 @@ static bool html_scripts_exec(html_content *c) break; } } - } else { - struct lwc_string_s *lwcmimetype; - dom_string_intern(s->mimetype, &lwcmimetype); - - /* ensure script handler for content type */ - script_handler = select_script_handler(content_factory_type_from_mime_type(lwcmimetype)); - lwc_string_unref(lwcmimetype); - - if (script_handler == NULL) - continue; /* unsupported type */ - - script_handler(c->jscontext, - dom_string_data(s->data.internal), - dom_string_byte_length(s->data.internal)); - s->already_started = true; } } @@ -511,6 +496,8 @@ html_process_script(void *ctx, dom_node *node) } } + LOG(("content %p parser %p node %p",c,c->parser_binding, node)); + exc = dom_element_get_attribute(node, html_dom_string_type, &mimetype); if (exc != DOM_NO_ERR || mimetype == NULL) { mimetype = dom_string_ref(html_dom_string_text_javascript); @@ -518,6 +505,9 @@ html_process_script(void *ctx, dom_node *node) exc = dom_element_get_attribute(node, html_dom_string_src, &src); if (exc != DOM_NO_ERR || src == NULL) { + struct lwc_string_s *lwcmimetype; + script_handler_t *script_handler; + /* does not appear to be a src so script is inline content */ exc = dom_node_get_text_content(node, &script); if ((exc != DOM_NO_ERR) || (script == NULL)) { @@ -534,8 +524,22 @@ html_process_script(void *ctx, dom_node *node) nscript->data.internal = script; nscript->mimetype = mimetype; + nscript->already_started = true; /* charset (encoding) */ + + /* ensure script handler for content type */ + dom_string_intern(mimetype, &lwcmimetype); + script_handler = select_script_handler(content_factory_type_from_mime_type(lwcmimetype)); + lwc_string_unref(lwcmimetype); + + if (script_handler != NULL) { + script_handler(c->jscontext, + dom_string_data(script), + dom_string_byte_length(script)); + } + + } else { /* script with a src tag */ nserror ns_error; -- cgit v1.2.3 From c5a87d3ebbe30d51f07f7f4d1ed97e1a56f89676 Mon Sep 17 00:00:00 2001 From: Vincent Sanders Date: Sat, 7 Jul 2012 00:28:45 +0100 Subject: do not try and write to document if the parser is finished --- javascript/jsapi/document.c | 6 +++--- render/html.c | 5 ++++- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'render') diff --git a/javascript/jsapi/document.c b/javascript/jsapi/document.c index fb1da7ef8..7d4ebc543 100644 --- a/javascript/jsapi/document.c +++ b/javascript/jsapi/document.c @@ -57,9 +57,9 @@ static JSBool JSAPI_NATIVE(write, JSContext *cx, uintN argc, jsval *vp) JSString_to_char(u16_txt, txt, length); LOG(("content %p parser %p writing %s",htmlc, htmlc->parser_binding, txt)); - - dom_hubbub_parser_insert_chunk(htmlc->parser_binding, (uint8_t *)txt, length); - + if (htmlc->parser_binding != NULL) { + dom_hubbub_parser_insert_chunk(htmlc->parser_binding, (uint8_t *)txt, length); + } JSAPI_SET_RVAL(cx, vp, JSVAL_VOID); return JS_TRUE; diff --git a/render/html.c b/render/html.c index 9db7ab971..495cd7dab 100644 --- a/render/html.c +++ b/render/html.c @@ -796,6 +796,7 @@ encoding_change: /* Destroy binding */ binding_destroy_tree(html->parser_binding); + html->parser_binding = NULL; /* Create new binding, using the new encoding */ err = binding_create_tree(&html->parser_binding, @@ -2666,8 +2667,10 @@ static void html_destroy(struct content *c) if (html->base_url) nsurl_unref(html->base_url); - if (html->parser_binding != NULL) + if (html->parser_binding != NULL) { binding_destroy_tree(html->parser_binding); + html->parser_binding = NULL; + } if (html->document != NULL) binding_destroy_document(html->document); -- cgit v1.2.3