/* * Copyright 2007 James Bursa * Copyright 2010 Michael Drake * * This file is part of NetSurf, http://www.netsurf-browser.org/ * * NetSurf is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2 of the License. * * NetSurf is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /** * \file * Implementation of HTML content handling. */ #include #include #include #include #include #include #include "utils/utils.h" #include "utils/config.h" #include "utils/corestrings.h" #include "utils/http.h" #include "utils/libdom.h" #include "utils/log.h" #include "utils/messages.h" #include "utils/talloc.h" #include "utils/utf8.h" #include "utils/nsoption.h" #include "utils/string.h" #include "utils/ascii.h" #include "netsurf/content.h" #include "netsurf/browser_window.h" #include "netsurf/utf8.h" #include "netsurf/layout.h" #include "netsurf/misc.h" #include "content/hlcache.h" #include "desktop/selection.h" #include "desktop/scrollbar.h" #include "desktop/textarea.h" #include "netsurf/bitmap.h" #include "javascript/js.h" #include "desktop/gui_internal.h" #include "html/html.h" #include "html/html_save.h" #include "html/html_internal.h" #include "html/box.h" #include "html/form_internal.h" #include "html/imagemap.h" #include "html/layout.h" #include "html/search.h" #define CHUNK 4096 /* Change these to 1 to cause a dump to stderr of the frameset or box * when the trees have been built. */ #define ALWAYS_DUMP_FRAMESET 0 #define ALWAYS_DUMP_BOX 0 static const char *html_types[] = { "application/xhtml+xml", "text/html" }; /* Exported interface, see html_internal.h */ bool fire_dom_event(dom_string *type, dom_node *target, bool bubbles, bool cancelable) { dom_exception exc; dom_event *evt; bool result; exc = dom_event_create(&evt); if (exc != DOM_NO_ERR) return false; exc = dom_event_init(evt, type, bubbles, cancelable); if (exc != DOM_NO_ERR) { dom_event_unref(evt); return false; } NSLOG(netsurf, INFO, "Dispatching '%*s' against %p", dom_string_length(type), dom_string_data(type), target); exc = dom_event_target_dispatch_event(target, evt, &result); if (exc != DOM_NO_ERR) { result = false; } dom_event_unref(evt); return result; } /** * Perform post-box-creation conversion of a document * * \param c HTML content to complete conversion of * \param success Whether box tree construction was successful */ static void html_box_convert_done(html_content *c, bool success) { nserror err; dom_exception exc; /* returned by libdom functions */ dom_node *html; NSLOG(netsurf, INFO, "Done XML to box (%p)", c); /* Clean up and report error if unsuccessful or aborted */ if ((success == false) || (c->aborted)) { html_object_free_objects(c); if (success == false) { content_broadcast_errorcode(&c->base, NSERROR_BOX_CONVERT); } else { content_broadcast_errorcode(&c->base, NSERROR_STOPPED); } content_set_error(&c->base); return; } #if ALWAYS_DUMP_BOX box_dump(stderr, c->layout->children, 0, true); #endif #if ALWAYS_DUMP_FRAMESET if (c->frameset) html_dump_frameset(c->frameset, 0); #endif exc = dom_document_get_document_element(c->document, (void *) &html); if ((exc != DOM_NO_ERR) || (html == NULL)) { /** @todo should this call html_object_free_objects(c); * like the other error paths */ NSLOG(netsurf, INFO, "error retrieving html element from dom"); content_broadcast_errorcode(&c->base, NSERROR_DOM); content_set_error(&c->base); return; } /* extract image maps - can't do this sensibly in dom_to_box */ err = imagemap_extract(c); if (err != NSERROR_OK) { NSLOG(netsurf, INFO, "imagemap extraction failed"); html_object_free_objects(c); content_broadcast_errorcode(&c->base, err); content_set_error(&c->base); dom_node_unref(html); return; } /*imagemap_dump(c);*/ /* Destroy the parser binding */ dom_hubbub_parser_destroy(c->parser); c->parser = NULL; content_set_ready(&c->base); if (c->base.active == 0) { content_set_done(&c->base); } dom_node_unref(html); } /** process link node */ static bool html_process_link(html_content *c, dom_node *node) { struct content_rfc5988_link link; /* the link added to the content */ dom_exception exc; /* returned by libdom functions */ dom_string *atr_string; nserror error; memset(&link, 0, sizeof(struct content_rfc5988_link)); /* check that the relation exists - w3c spec says must be present */ exc = dom_element_get_attribute(node, corestring_dom_rel, &atr_string); if ((exc != DOM_NO_ERR) || (atr_string == NULL)) { return false; } /* get a lwc string containing the link relation */ exc = dom_string_intern(atr_string, &link.rel); dom_string_unref(atr_string); if (exc != DOM_NO_ERR) { return false; } /* check that the href exists - w3c spec says must be present */ exc = dom_element_get_attribute(node, corestring_dom_href, &atr_string); if ((exc != DOM_NO_ERR) || (atr_string == NULL)) { lwc_string_unref(link.rel); return false; } /* get nsurl */ error = nsurl_join(c->base_url, dom_string_data(atr_string), &link.href); dom_string_unref(atr_string); if (error != NSERROR_OK) { lwc_string_unref(link.rel); return false; } /* look for optional properties -- we don't care if internment fails */ exc = dom_element_get_attribute(node, corestring_dom_hreflang, &atr_string); if ((exc == DOM_NO_ERR) && (atr_string != NULL)) { /* get a lwc string containing the href lang */ exc = dom_string_intern(atr_string, &link.hreflang); dom_string_unref(atr_string); } exc = dom_element_get_attribute(node, corestring_dom_type, &atr_string); if ((exc == DOM_NO_ERR) && (atr_string != NULL)) { /* get a lwc string containing the type */ exc = dom_string_intern(atr_string, &link.type); dom_string_unref(atr_string); } exc = dom_element_get_attribute(node, corestring_dom_media, &atr_string); if ((exc == DOM_NO_ERR) && (atr_string != NULL)) { /* get a lwc string containing the media */ exc = dom_string_intern(atr_string, &link.media); dom_string_unref(atr_string); } exc = dom_element_get_attribute(node, corestring_dom_sizes, &atr_string); if ((exc == DOM_NO_ERR) && (atr_string != NULL)) { /* get a lwc string containing the sizes */ exc = dom_string_intern(atr_string, &link.sizes); dom_string_unref(atr_string); } /* add to content */ content__add_rfc5988_link(&c->base, &link); if (link.sizes != NULL) lwc_string_unref(link.sizes); if (link.media != NULL) lwc_string_unref(link.media); if (link.type != NULL) lwc_string_unref(link.type); if (link.hreflang != NULL) lwc_string_unref(link.hreflang); nsurl_unref(link.href); lwc_string_unref(link.rel); return true; } /** process title node */ static bool html_process_title(html_content *c, dom_node *node) { dom_exception exc; /* returned by libdom functions */ dom_string *title; char *title_str; bool success; exc = dom_node_get_text_content(node, &title); if ((exc != DOM_NO_ERR) || (title == NULL)) { return false; } title_str = squash_whitespace(dom_string_data(title)); dom_string_unref(title); if (title_str == NULL) { return false; } success = content__set_title(&c->base, title_str); free(title_str); return success; } static bool html_process_base(html_content *c, dom_node *node) { dom_exception exc; /* returned by libdom functions */ dom_string *atr_string; /* get href attribute if present */ exc = dom_element_get_attribute(node, corestring_dom_href, &atr_string); if ((exc == DOM_NO_ERR) && (atr_string != NULL)) { nsurl *url; nserror error; /* get url from string */ error = nsurl_create(dom_string_data(atr_string), &url); dom_string_unref(atr_string); if (error == NSERROR_OK) { if (c->base_url != NULL) nsurl_unref(c->base_url); c->base_url = url; } } /* get target attribute if present and not already set */ if (c->base_target != NULL) { return true; } exc = dom_element_get_attribute(node, corestring_dom_target, &atr_string); if ((exc == DOM_NO_ERR) && (atr_string != NULL)) { /* Validation rules from the HTML5 spec for the base element: * The target must be one of _blank, _self, _parent, or * _top or any identifier which does not begin with an * underscore */ if (*dom_string_data(atr_string) != '_' || dom_string_caseless_lwc_isequal(atr_string, corestring_lwc__blank) || dom_string_caseless_lwc_isequal(atr_string, corestring_lwc__self) || dom_string_caseless_lwc_isequal(atr_string, corestring_lwc__parent) || dom_string_caseless_lwc_isequal(atr_string, corestring_lwc__top)) { c->base_target = strdup(dom_string_data(atr_string)); } dom_string_unref(atr_string); } return true; } static nserror html_meta_refresh_process_element(html_content *c, dom_node *n) { union content_msg_data msg_data; const char *url, *end, *refresh = NULL; char *new_url; char quote = '\0'; dom_string *equiv, *content; dom_exception exc; nsurl *nsurl; nserror error = NSERROR_OK; exc = dom_element_get_attribute(n, corestring_dom_http_equiv, &equiv); if (exc != DOM_NO_ERR) { return NSERROR_DOM; } if (equiv == NULL) { return NSERROR_OK; } if (!dom_string_caseless_lwc_isequal(equiv, corestring_lwc_refresh)) { dom_string_unref(equiv); return NSERROR_OK; } dom_string_unref(equiv); exc = dom_element_get_attribute(n, corestring_dom_content, &content); if (exc != DOM_NO_ERR) { return NSERROR_DOM; } if (content == NULL) { return NSERROR_OK; } end = dom_string_data(content) + dom_string_byte_length(content); /* content := *LWS intpart fracpart? *LWS [';' *LWS *1url *LWS] * intpart := 1*DIGIT * fracpart := 1*('.' | DIGIT) * url := "url" *LWS '=' *LWS (url-nq | url-sq | url-dq) * url-nq := *urlchar * url-sq := "'" *(urlchar | '"') "'" * url-dq := '"' *(urlchar | "'") '"' * urlchar := [#x9#x21#x23-#x26#x28-#x7E] | nonascii * nonascii := [#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF] */ url = dom_string_data(content); /* *LWS */ while (url < end && ascii_is_space(*url)) { url++; } /* intpart */ if (url == end || (*url < '0' || '9' < *url)) { /* Empty content, or invalid timeval */ dom_string_unref(content); return NSERROR_OK; } msg_data.delay = (int) strtol(url, &new_url, 10); /* a very small delay and self-referencing URL can cause a loop * that grinds machines to a halt. To prevent this we set a * minimum refresh delay of 1s. */ if (msg_data.delay < 1) { msg_data.delay = 1; } url = new_url; /* fracpart? (ignored, as delay is integer only) */ while (url < end && (('0' <= *url && *url <= '9') || *url == '.')) { url++; } /* *LWS */ while (url < end && ascii_is_space(*url)) { url++; } /* ';' */ if (url < end && *url == ';') url++; /* *LWS */ while (url < end && ascii_is_space(*url)) { url++; } if (url == end) { /* Just delay specified, so refresh current page */ dom_string_unref(content); c->base.refresh = nsurl_ref( content_get_url(&c->base)); content_broadcast(&c->base, CONTENT_MSG_REFRESH, &msg_data); return NSERROR_OK; } /* "url" */ if (url <= end - 3) { if (strncasecmp(url, "url", 3) == 0) { url += 3; } else { /* Unexpected input, ignore this header */ dom_string_unref(content); return NSERROR_OK; } } else { /* Insufficient input, ignore this header */ dom_string_unref(content); return NSERROR_OK; } /* *LWS */ while (url < end && ascii_is_space(*url)) { url++; } /* '=' */ if (url < end) { if (*url == '=') { url++; } else { /* Unexpected input, ignore this header */ dom_string_unref(content); return NSERROR_OK; } } else { /* Insufficient input, ignore this header */ dom_string_unref(content); return NSERROR_OK; } /* *LWS */ while (url < end && ascii_is_space(*url)) { url++; } /* '"' or "'" */ if (url < end && (*url == '"' || *url == '\'')) { quote = *url; url++; } /* Start of URL */ refresh = url; if (quote != 0) { /* url-sq | url-dq */ while (url < end && *url != quote) url++; } else { /* url-nq */ while (url < end && !ascii_is_space(*url)) url++; } /* '"' or "'" or *LWS (we don't care) */ if (url > refresh) { /* There's a URL */ new_url = strndup(refresh, url - refresh); if (new_url == NULL) { dom_string_unref(content); return NSERROR_NOMEM; } error = nsurl_join(c->base_url, new_url, &nsurl); if (error == NSERROR_OK) { /* broadcast valid refresh url */ c->base.refresh = nsurl; content_broadcast(&c->base, CONTENT_MSG_REFRESH, &msg_data); c->refresh = true; } free(new_url); } dom_string_unref(content); return error; } static bool html_process_img(html_content *c, dom_node *node) { dom_string *src; nsurl *url; nserror err; dom_exception exc; bool success; /* Do nothing if foreground images are disabled */ if (nsoption_bool(foreground_images) == false) { return true; } exc = dom_element_get_attribute(node, corestring_dom_src, &src); if (exc != DOM_NO_ERR || src == NULL) { return true; } err = nsurl_join(c->base_url, dom_string_data(src), &url); if (err != NSERROR_OK) { dom_string_unref(src); return false; } dom_string_unref(src); /* Speculatively fetch the image */ success = html_fetch_object(c, url, NULL, CONTENT_IMAGE, 0, 0, false); nsurl_unref(url); return success; } /* exported function documented in html/html_internal.h */ void html_finish_conversion(html_content *htmlc) { union content_msg_data msg_data; dom_exception exc; /* returned by libdom functions */ dom_node *html; nserror error; /* Bail out if we've been aborted */ if (htmlc->aborted) { content_broadcast_errorcode(&htmlc->base, NSERROR_STOPPED); content_set_error(&htmlc->base); return; } /* If we already have a selection context, then we have already * "finished" conversion. We can get here twice if e.g. some JS * adds a new stylesheet, and the stylesheet gets added after * the HTML content is initially finished. * * If we didn't do this, the HTML content would try to rebuild the * box tree for the html content when this new stylesheet is ready. * NetSurf has no concept of dynamically changing documents, so this * would break badly. */ if (htmlc->select_ctx != NULL) { NSLOG(netsurf, INFO, "Ignoring style change: NS layout is static."); return; } /* create new css selection context */ error = html_css_new_selection_context(htmlc, &htmlc->select_ctx); if (error != NSERROR_OK) { content_broadcast_errorcode(&htmlc->base, error); content_set_error(&htmlc->base); return; } /* fire a simple event named load at the Document's Window * object, but with its target set to the Document object (and * the currentTarget set to the Window object) */ if (htmlc->jscontext != NULL) { js_fire_event(htmlc->jscontext, "load", htmlc->document, NULL); } /* convert dom tree to box tree */ NSLOG(netsurf, INFO, "DOM to box (%p)", htmlc); content_set_status(&htmlc->base, messages_get("Processing")); msg_data.explicit_status_text = NULL; content_broadcast(&htmlc->base, CONTENT_MSG_STATUS, &msg_data); exc = dom_document_get_document_element(htmlc->document, (void *) &html); if ((exc != DOM_NO_ERR) || (html == NULL)) { NSLOG(netsurf, INFO, "error retrieving html element from dom"); content_broadcast_errorcode(&htmlc->base, NSERROR_DOM); content_set_error(&htmlc->base); return; } error = dom_to_box(html, htmlc, html_box_convert_done); if (error != NSERROR_OK) { NSLOG(netsurf, INFO, "box conversion failed"); dom_node_unref(html); html_object_free_objects(htmlc); content_broadcast_errorcode(&htmlc->base, error); content_set_error(&htmlc->base); return; } dom_node_unref(html); } /* handler for a SCRIPT which has been added to a tree */ static void dom_SCRIPT_showed_up(html_content *htmlc, dom_html_script_element *script) { dom_exception exc; dom_html_script_element_flags flags; dom_hubbub_error res; bool within; if (!htmlc->enable_scripting) { NSLOG(netsurf, INFO, "Encountered a script, but scripting is off, ignoring"); return; } NSLOG(netsurf, DEEPDEBUG, "Encountered a script, node %p showed up", script); exc = dom_html_script_element_get_flags(script, &flags); if (exc != DOM_NO_ERR) { NSLOG(netsurf, DEEPDEBUG, "Unable to retrieve flags, giving up"); return; } if (flags & DOM_HTML_SCRIPT_ELEMENT_FLAG_PARSER_INSERTED) { NSLOG(netsurf, DEBUG, "Script was parser inserted, skipping"); return; } exc = dom_node_contains(htmlc->document, script, &within); if (exc != DOM_NO_ERR) { NSLOG(netsurf, DEBUG, "Unable to determine if script was within document, ignoring"); return; } if (!within) { NSLOG(netsurf, DEBUG, "Script was not within the document, ignoring for now"); return; } res = html_process_script(htmlc, (dom_node *) script); if (res == DOM_HUBBUB_OK) { NSLOG(netsurf, DEEPDEBUG, "Inserted script has finished running"); } else { if (res == (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_PAUSED)) { NSLOG(netsurf, DEEPDEBUG, "Inserted script has launced asynchronously"); } else { NSLOG(netsurf, DEEPDEBUG, "Failure starting script"); } } } /* callback for DOMNodeInserted end type */ static void dom_default_action_DOMNodeInserted_cb(struct dom_event *evt, void *pw) { dom_event_target *node; dom_node_type type; dom_exception exc; html_content *htmlc = pw; exc = dom_event_get_target(evt, &node); if ((exc == DOM_NO_ERR) && (node != NULL)) { exc = dom_node_get_node_type(node, &type); if ((exc == DOM_NO_ERR) && (type == DOM_ELEMENT_NODE)) { /* an element node has been inserted */ dom_html_element_type tag_type; exc = dom_html_element_get_tag_type(node, &tag_type); if (exc != DOM_NO_ERR) { tag_type = DOM_HTML_ELEMENT_TYPE__UNKNOWN; } switch (tag_type) { case DOM_HTML_ELEMENT_TYPE_LINK: /* Handle stylesheet loading */ html_css_process_link(htmlc, (dom_node *)node); /* Generic link handling */ html_process_link(htmlc, (dom_node *)node); break; case DOM_HTML_ELEMENT_TYPE_META: if (htmlc->refresh) break; html_meta_refresh_process_element(htmlc, (dom_node *)node); break; case DOM_HTML_ELEMENT_TYPE_TITLE: if (htmlc->title != NULL) break; htmlc->title = dom_node_ref(node); break; case DOM_HTML_ELEMENT_TYPE_BASE: html_process_base(htmlc, (dom_node *)node); break; case DOM_HTML_ELEMENT_TYPE_IMG: html_process_img(htmlc, (dom_node *) node); break; case DOM_HTML_ELEMENT_TYPE_STYLE: html_css_process_style(htmlc, (dom_node *) node); break; case DOM_HTML_ELEMENT_TYPE_SCRIPT: dom_SCRIPT_showed_up(htmlc, (dom_html_script_element *) node); break; default: break; } if (htmlc->enable_scripting) { /* ensure javascript context is available */ if (htmlc->jscontext == NULL) { union content_msg_data msg_data; msg_data.jscontext = &htmlc->jscontext; content_broadcast(&htmlc->base, CONTENT_MSG_GETCTX, &msg_data); NSLOG(netsurf, INFO, "javascript context: %p (htmlc: %p)", htmlc->jscontext, htmlc); } if (htmlc->jscontext != NULL) { js_handle_new_element(htmlc->jscontext, (dom_element *) node); } } } dom_node_unref(node); } } /* callback for DOMNodeInsertedIntoDocument end type */ static void dom_default_action_DOMNodeInsertedIntoDocument_cb(struct dom_event *evt, void *pw) { html_content *htmlc = pw; dom_event_target *node; dom_node_type type; dom_exception exc; exc = dom_event_get_target(evt, &node); if ((exc == DOM_NO_ERR) && (node != NULL)) { exc = dom_node_get_node_type(node, &type); if ((exc == DOM_NO_ERR) && (type == DOM_ELEMENT_NODE)) { /* an element node has been modified */ dom_html_element_type tag_type; exc = dom_html_element_get_tag_type(node, &tag_type); if (exc != DOM_NO_ERR) { tag_type = DOM_HTML_ELEMENT_TYPE__UNKNOWN; } switch (tag_type) { case DOM_HTML_ELEMENT_TYPE_SCRIPT: dom_SCRIPT_showed_up(htmlc, (dom_html_script_element *) node); default: break; } } dom_node_unref(node); } } /* callback for DOMSubtreeModified end type */ static void dom_default_action_DOMSubtreeModified_cb(struct dom_event *evt, void *pw) { dom_event_target *node; dom_node_type type; dom_exception exc; html_content *htmlc = pw; exc = dom_event_get_target(evt, &node); if ((exc == DOM_NO_ERR) && (node != NULL)) { if (htmlc->title == (dom_node *)node) { /* Node is our title node */ html_process_title(htmlc, (dom_node *)node); dom_node_unref(node); return; } exc = dom_node_get_node_type(node, &type); if ((exc == DOM_NO_ERR) && (type == DOM_ELEMENT_NODE)) { /* an element node has been modified */ dom_html_element_type tag_type; exc = dom_html_element_get_tag_type(node, &tag_type); if (exc != DOM_NO_ERR) { tag_type = DOM_HTML_ELEMENT_TYPE__UNKNOWN; } switch (tag_type) { case DOM_HTML_ELEMENT_TYPE_STYLE: html_css_update_style(htmlc, (dom_node *)node); break; default: break; } } dom_node_unref(node); } } static void dom_default_action_finished_cb(struct dom_event *evt, void *pw) { html_content *htmlc = pw; if (htmlc->jscontext != NULL) js_event_cleanup(htmlc->jscontext, evt); } /* callback function selector * * selects a callback function for libdom to call based on the type and phase. * dom_default_action_phase from events/document_event.h * * The principle events are: * DOMSubtreeModified * DOMAttrModified * DOMNodeInserted * DOMNodeInsertedIntoDocument * * @return callback function pointer or NULL for none */ static dom_default_action_callback dom_event_fetcher(dom_string *type, dom_default_action_phase phase, void **pw) { NSLOG(netsurf, DEEPDEBUG, "phase:%d type:%s", phase, dom_string_data(type)); if (phase == DOM_DEFAULT_ACTION_END) { if (dom_string_isequal(type, corestring_dom_DOMNodeInserted)) { return dom_default_action_DOMNodeInserted_cb; } else if (dom_string_isequal(type, corestring_dom_DOMNodeInsertedIntoDocument)) { return dom_default_action_DOMNodeInsertedIntoDocument_cb; } else if (dom_string_isequal(type, corestring_dom_DOMSubtreeModified)) { return dom_default_action_DOMSubtreeModified_cb; } } else if (phase == DOM_DEFAULT_ACTION_FINISHED) { return dom_default_action_finished_cb; } return NULL; } static void html_document_user_data_handler(dom_node_operation operation, dom_string *key, void *data, struct dom_node *src, struct dom_node *dst) { if (dom_string_isequal(corestring_dom___ns_key_html_content_data, key) == false || data == NULL) { return; } switch (operation) { case DOM_NODE_CLONED: NSLOG(netsurf, INFO, "Cloned"); break; case DOM_NODE_RENAMED: NSLOG(netsurf, INFO, "Renamed"); break; case DOM_NODE_IMPORTED: NSLOG(netsurf, INFO, "imported"); break; case DOM_NODE_ADOPTED: NSLOG(netsurf, INFO, "Adopted"); break; case DOM_NODE_DELETED: /* This is the only path I expect */ break; default: NSLOG(netsurf, INFO, "User data operation not handled."); assert(0); } } static nserror html_create_html_data(html_content *c, const http_parameter *params) { lwc_string *charset; nserror nerror; dom_hubbub_parser_params parse_params; dom_hubbub_error error; dom_exception err; void *old_node_data; c->parser = NULL; c->parse_completed = false; c->conversion_begun = false; c->document = NULL; c->quirks = DOM_DOCUMENT_QUIRKS_MODE_NONE; c->encoding = NULL; c->base_url = nsurl_ref(content_get_url(&c->base)); c->base_target = NULL; c->aborted = false; c->refresh = false; c->reflowing = false; c->title = NULL; c->bctx = NULL; c->layout = NULL; c->background_colour = NS_TRANSPARENT; c->stylesheet_count = 0; c->stylesheets = NULL; c->select_ctx = NULL; c->media.type = CSS_MEDIA_SCREEN; c->universal = NULL; c->num_objects = 0; c->object_list = NULL; c->forms = NULL; c->imagemaps = NULL; c->bw = NULL; c->frameset = NULL; c->iframe = NULL; c->page = NULL; c->font_func = guit->layout; c->drag_type = HTML_DRAG_NONE; c->drag_owner.no_owner = true; c->selection_type = HTML_SELECTION_NONE; c->selection_owner.none = true; c->focus_type = HTML_FOCUS_SELF; c->focus_owner.self = true; c->search = NULL; c->search_string = NULL; c->scripts_count = 0; c->scripts = NULL; c->jscontext = NULL; c->enable_scripting = nsoption_bool(enable_javascript); c->base.active = 1; /* The html content itself is active */ if (lwc_intern_string("*", SLEN("*"), &c->universal) != lwc_error_ok) { return NSERROR_NOMEM; } selection_prepare(&c->sel, (struct content *)c, true); nerror = http_parameter_list_find_item(params, corestring_lwc_charset, &charset); if (nerror == NSERROR_OK) { c->encoding = strdup(lwc_string_data(charset)); lwc_string_unref(charset); if (c->encoding == NULL) { lwc_string_unref(c->universal); c->universal = NULL; return NSERROR_NOMEM; } c->encoding_source = DOM_HUBBUB_ENCODING_SOURCE_HEADER; } /* Create the parser binding */ parse_params.enc = c->encoding; parse_params.fix_enc = true; parse_params.enable_script = c->enable_scripting; parse_params.msg = NULL; parse_params.script = html_process_script; parse_params.ctx = c; parse_params.daf = dom_event_fetcher; error = dom_hubbub_parser_create(&parse_params, &c->parser, &c->document); if ((error != DOM_HUBBUB_OK) && (c->encoding != NULL)) { /* Ok, we don't support the declared encoding. Bailing out * isn't exactly user-friendly, so fall back to autodetect */ free(c->encoding); c->encoding = NULL; parse_params.enc = c->encoding; error = dom_hubbub_parser_create(&parse_params, &c->parser, &c->document); } if (error != DOM_HUBBUB_OK) { nsurl_unref(c->base_url); c->base_url = NULL; lwc_string_unref(c->universal); c->universal = NULL; return libdom_hubbub_error_to_nserror(error); } err = dom_node_set_user_data(c->document, corestring_dom___ns_key_html_content_data, c, html_document_user_data_handler, (void *) &old_node_data); if (err != DOM_NO_ERR) { dom_hubbub_parser_destroy(c->parser); c->parser = NULL; nsurl_unref(c->base_url); c->base_url = NULL; lwc_string_unref(c->universal); c->universal = NULL; NSLOG(netsurf, INFO, "Unable to set user data."); return NSERROR_DOM; } assert(old_node_data == NULL); return NSERROR_OK; } /** * Create a CONTENT_HTML. * * The content_html_data structure is initialized and the HTML parser is * created. */ static nserror html_create(const content_handler *handler, lwc_string *imime_type, const http_parameter *params, llcache_handle *llcache, const char *fallback_charset, bool quirks, struct content **c) { html_content *html; nserror error; html = calloc(1, sizeof(html_content)); if (html == NULL) return NSERROR_NOMEM; error = content__init(&html->base, handler, imime_type, params, llcache, fallback_charset, quirks); if (error != NSERROR_OK) { free(html); return error; } error = html_create_html_data(html, params); if (error != NSERROR_OK) { content_broadcast_errorcode(&html->base, error); free(html); return error; } error = html_css_new_stylesheets(html); if (error != NSERROR_OK) { content_broadcast_errorcode(&html->base, error); free(html); return error; } *c = (struct content *) html; return NSERROR_OK; } static nserror html_process_encoding_change(struct content *c, const char *data, unsigned int size) { html_content *html = (html_content *) c; dom_hubbub_parser_params parse_params; dom_hubbub_error error; const char *encoding; const uint8_t *source_data; size_t source_size; /* Retrieve new encoding */ encoding = dom_hubbub_parser_get_encoding(html->parser, &html->encoding_source); if (encoding == NULL) { return NSERROR_NOMEM; } if (html->encoding != NULL) { free(html->encoding); html->encoding = NULL; } html->encoding = strdup(encoding); if (html->encoding == NULL) { return NSERROR_NOMEM; } /* Destroy binding */ dom_hubbub_parser_destroy(html->parser); html->parser = NULL; if (html->document != NULL) { dom_node_unref(html->document); } parse_params.enc = html->encoding; parse_params.fix_enc = true; parse_params.enable_script = html->enable_scripting; parse_params.msg = NULL; parse_params.script = html_process_script; parse_params.ctx = html; parse_params.daf = dom_event_fetcher; /* Create new binding, using the new encoding */ error = dom_hubbub_parser_create(&parse_params, &html->parser, &html->document); if (error != DOM_HUBBUB_OK) { /* Ok, we don't support the declared encoding. Bailing out * isn't exactly user-friendly, so fall back to Windows-1252 */ free(html->encoding); html->encoding = strdup("Windows-1252"); if (html->encoding == NULL) { return NSERROR_NOMEM; } parse_params.enc = html->encoding; error = dom_hubbub_parser_create(&parse_params, &html->parser, &html->document); if (error != DOM_HUBBUB_OK) { return libdom_hubbub_error_to_nserror(error); } } source_data = content__get_source_data(c, &source_size); /* Reprocess all the data. This is safe because * the encoding is now specified at parser start which means * it cannot be changed again. */ error = dom_hubbub_parser_parse_chunk(html->parser, source_data, source_size); return libdom_hubbub_error_to_nserror(error); } /** * Process data for CONTENT_HTML. */ static bool html_process_data(struct content *c, const char *data, unsigned int size) { html_content *html = (html_content *) c; dom_hubbub_error dom_ret; nserror err = NSERROR_OK; /* assume its all going to be ok */ dom_ret = dom_hubbub_parser_parse_chunk(html->parser, (const uint8_t *) data, size); err = libdom_hubbub_error_to_nserror(dom_ret); /* deal with encoding change */ if (err == NSERROR_ENCODING_CHANGE) { err = html_process_encoding_change(c, data, size); } /* broadcast the error if necessary */ if (err != NSERROR_OK) { content_broadcast_errorcode(c, err); return false; } return true; } /** * Convert a CONTENT_HTML for display. * * The following steps are carried out in order: * * - parsing to an XML tree is completed * - stylesheets are fetched * - the XML tree is converted to a box tree and object fetches are started * * On exit, the content status will be either CONTENT_STATUS_DONE if the * document is completely loaded or CONTENT_STATUS_READY if objects are still * being fetched. */ static bool html_convert(struct content *c) { html_content *htmlc = (html_content *) c; dom_exception exc; /* returned by libdom functions */ /* The quirk check and associated stylesheet fetch is "safe" * once the root node has been inserted into the document * which must have happened by this point in the parse. * * faliure to retrive the quirk mode or to start the * stylesheet fetch is non fatal as this "only" affects the * render and it would annoy the user to fail the entire * render for want of a quirks stylesheet. */ exc = dom_document_get_quirks_mode(htmlc->document, &htmlc->quirks); if (exc == DOM_NO_ERR) { html_css_quirks_stylesheets(htmlc); NSLOG(netsurf, INFO, "quirks set to %d", htmlc->quirks); } htmlc->base.active--; /* the html fetch is no longer active */ NSLOG(netsurf, INFO, "%d fetches active (%p)", htmlc->base.active, c); /* The parse cannot be completed here because it may be paused * untill all the resources being fetched have completed. */ /* if there are no active fetches in progress no scripts are * being fetched or they completed already. */ if (html_can_begin_conversion(htmlc)) { return html_begin_conversion(htmlc); } return true; } /* Exported interface documented in html_internal.h */ bool html_can_begin_conversion(html_content *htmlc) { unsigned int i; /* Cannot begin conversion if we're still fetching stuff */ if (htmlc->base.active != 0) return false; for (i = 0; i != htmlc->stylesheet_count; i++) { /* Cannot begin conversion if the stylesheets are modified */ if (htmlc->stylesheets[i].modified) return false; } /* All is good, begin */ return true; } bool html_begin_conversion(html_content *htmlc) { dom_node *html; nserror ns_error; struct form *f; dom_exception exc; /* returned by libdom functions */ dom_string *node_name = NULL; dom_hubbub_error error; /* The act of completing the parse can result in additional data * being flushed through the parser. This may result in new style or * script nodes, upon which the conversion depends. Thus, once we * have completed the parse, we must check again to see if we can * begin the conversion. If we can't, we must stop and wait for the * new styles/scripts to be processed. Once they have been processed, * we will be called again to begin the conversion for real. Thus, * we must also ensure that we don't attempt to complete the parse * multiple times, so store a flag to indicate that parsing is * complete to avoid repeating the completion pointlessly. */ if (htmlc->parse_completed == false) { NSLOG(netsurf, INFO, "Completing parse (%p)", htmlc); /* complete parsing */ error = dom_hubbub_parser_completed(htmlc->parser); if (error != DOM_HUBBUB_OK) { NSLOG(netsurf, INFO, "Parsing failed"); content_broadcast_errorcode(&htmlc->base, libdom_hubbub_error_to_nserror(error)); return false; } htmlc->parse_completed = true; } if (html_can_begin_conversion(htmlc) == false) { NSLOG(netsurf, INFO, "Can't begin conversion (%p)", htmlc); /* We can't proceed (see commentary above) */ return true; } /* Give up processing if we've been aborted */ if (htmlc->aborted) { NSLOG(netsurf, INFO, "Conversion aborted (%p) (active: %u)", htmlc, htmlc->base.active); content_set_error(&htmlc->base); content_broadcast_errorcode(&htmlc->base, NSERROR_STOPPED); return false; } /* Conversion begins proper at this point */ htmlc->conversion_begun = true; /* complete script execution, including deferred scripts */ html_script_exec(htmlc, true); /* fire a simple event that bubbles named DOMContentLoaded at * the Document. */ /* get encoding */ if (htmlc->encoding == NULL) { const char *encoding; encoding = dom_hubbub_parser_get_encoding(htmlc->parser, &htmlc->encoding_source); if (encoding == NULL) { content_broadcast_errorcode(&htmlc->base, NSERROR_NOMEM); return false; } htmlc->encoding = strdup(encoding); if (htmlc->encoding == NULL) { content_broadcast_errorcode(&htmlc->base, NSERROR_NOMEM); return false; } } /* locate root element and ensure it is html */ exc = dom_document_get_document_element(htmlc->document, (void *) &html); if ((exc != DOM_NO_ERR) || (html == NULL)) { NSLOG(netsurf, INFO, "error retrieving html element from dom"); content_broadcast_errorcode(&htmlc->base, NSERROR_DOM); return false; } exc = dom_node_get_node_name(html, &node_name); if ((exc != DOM_NO_ERR) || (node_name == NULL) || (!dom_string_caseless_lwc_isequal(node_name, corestring_lwc_html))) { NSLOG(netsurf, INFO, "root element not html"); content_broadcast_errorcode(&htmlc->base, NSERROR_DOM); dom_node_unref(html); return false; } dom_string_unref(node_name); /* Retrieve forms from parser */ htmlc->forms = html_forms_get_forms(htmlc->encoding, (dom_html_document *) htmlc->document); for (f = htmlc->forms; f != NULL; f = f->prev) { nsurl *action; /* Make all actions absolute */ if (f->action == NULL || f->action[0] == '\0') { /* HTML5 4.10.22.3 step 9 */ nsurl *doc_addr = content_get_url(&htmlc->base); ns_error = nsurl_join(htmlc->base_url, nsurl_access(doc_addr), &action); } else { ns_error = nsurl_join(htmlc->base_url, f->action, &action); } if (ns_error != NSERROR_OK) { content_broadcast_errorcode(&htmlc->base, ns_error); dom_node_unref(html); return false; } free(f->action); f->action = strdup(nsurl_access(action)); nsurl_unref(action); if (f->action == NULL) { content_broadcast_errorcode(&htmlc->base, NSERROR_NOMEM); dom_node_unref(html); return false; } /* Ensure each form has a document encoding */ if (f->document_charset == NULL) { f->document_charset = strdup(htmlc->encoding); if (f->document_charset == NULL) { content_broadcast_errorcode(&htmlc->base, NSERROR_NOMEM); dom_node_unref(html); return false; } } } dom_node_unref(html); if (htmlc->base.active == 0) { html_finish_conversion(htmlc); } return true; } /** * Stop loading a CONTENT_HTML. * * called when the content is aborted. This must clean up any state * created during the fetch. */ static void html_stop(struct content *c) { html_content *htmlc = (html_content *) c; /* invalidate the html content reference to the javascript context * as it is about to become invalid and must not be used any * more. */ html_script_invalidate_ctx(htmlc); switch (c->status) { case CONTENT_STATUS_LOADING: /* Still loading; simply flag that we've been aborted * html_convert/html_finish_conversion will do the rest */ htmlc->aborted = true; break; case CONTENT_STATUS_READY: html_object_abort_objects(htmlc); /* If there are no further active fetches and we're still * in the READY state, transition to the DONE state. */ if (c->status == CONTENT_STATUS_READY && c->active == 0) { content_set_done(c); } break; case CONTENT_STATUS_DONE: /* Nothing to do */ break; default: NSLOG(netsurf, INFO, "Unexpected status %d (%p)", c->status, c); assert(0); } } /** * Reformat a CONTENT_HTML to a new width. */ static void html_reformat(struct content *c, int width, int height) { html_content *htmlc = (html_content *) c; struct box *layout; uint64_t ms_before; uint64_t ms_after; uint64_t ms_interval; nsu_getmonotonic_ms(&ms_before); htmlc->reflowing = true; htmlc->len_ctx.vw = width; htmlc->len_ctx.vh = height; htmlc->len_ctx.root_style = htmlc->layout->style; layout_document(htmlc, width, height); layout = htmlc->layout; /* width and height are at least margin box of document */ c->width = layout->x + layout->padding[LEFT] + layout->width + layout->padding[RIGHT] + layout->border[RIGHT].width + layout->margin[RIGHT]; c->height = layout->y + layout->padding[TOP] + layout->height + layout->padding[BOTTOM] + layout->border[BOTTOM].width + layout->margin[BOTTOM]; /* if boxes overflow right or bottom edge, expand to contain it */ if (c->width < layout->x + layout->descendant_x1) c->width = layout->x + layout->descendant_x1; if (c->height < layout->y + layout->descendant_y1) c->height = layout->y + layout->descendant_y1; selection_reinit(&htmlc->sel, htmlc->layout); htmlc->reflowing = false; /* calculate next reflow time at three times what it took to reflow */ nsu_getmonotonic_ms(&ms_after); ms_interval = (ms_before - ms_after) * 3; if (ms_interval < (nsoption_uint(min_reflow_period) * 10)) { ms_interval = nsoption_uint(min_reflow_period) * 10; } c->reformat_time = ms_after + ms_interval; } /** * Redraw a box. * * \param h content containing the box, of type CONTENT_HTML * \param box box to redraw */ void html_redraw_a_box(hlcache_handle *h, struct box *box) { int x, y; box_coords(box, &x, &y); content_request_redraw(h, x, y, box->padding[LEFT] + box->width + box->padding[RIGHT], box->padding[TOP] + box->height + box->padding[BOTTOM]); } /** * Redraw a box. * * \param html content containing the box, of type CONTENT_HTML * \param box box to redraw. */ void html__redraw_a_box(struct html_content *html, struct box *box) { int x, y; box_coords(box, &x, &y); content__request_redraw((struct content *)html, x, y, box->padding[LEFT] + box->width + box->padding[RIGHT], box->padding[TOP] + box->height + box->padding[BOTTOM]); } static void html_destroy_frameset(struct content_html_frames *frameset) { int i; if (frameset->name) { talloc_free(frameset->name); frameset->name = NULL; } if (frameset->url) { talloc_free(frameset->url); frameset->url = NULL; } if (frameset->children) { for (i = 0; i < (frameset->rows * frameset->cols); i++) { if (frameset->children[i].name) { talloc_free(frameset->children[i].name); frameset->children[i].name = NULL; } if (frameset->children[i].url) { nsurl_unref(frameset->children[i].url); frameset->children[i].url = NULL; } if (frameset->children[i].children) html_destroy_frameset(&frameset->children[i]); } talloc_free(frameset->children); frameset->children = NULL; } } static void html_destroy_iframe(struct content_html_iframe *iframe) { struct content_html_iframe *next; next = iframe; while ((iframe = next) != NULL) { next = iframe->next; if (iframe->name) talloc_free(iframe->name); if (iframe->url) { nsurl_unref(iframe->url); iframe->url = NULL; } talloc_free(iframe); } } static void html_free_layout(html_content *htmlc) { if (htmlc->bctx != NULL) { /* freeing talloc context should let the entire box * set be destroyed */ talloc_free(htmlc->bctx); } } /** * Destroy a CONTENT_HTML and free all resources it owns. */ static void html_destroy(struct content *c) { html_content *html = (html_content *) c; struct form *f, *g; NSLOG(netsurf, INFO, "content %p", c); /* Destroy forms */ for (f = html->forms; f != NULL; f = g) { g = f->prev; form_free(f); } imagemap_destroy(html); if (c->refresh) nsurl_unref(c->refresh); if (html->base_url) nsurl_unref(html->base_url); if (html->parser != NULL) { dom_hubbub_parser_destroy(html->parser); html->parser = NULL; } if (html->document != NULL) { dom_node_unref(html->document); html->document = NULL; } if (html->title != NULL) { dom_node_unref(html->title); html->title = NULL; } /* Free encoding */ if (html->encoding != NULL) { free(html->encoding); html->encoding = NULL; } /* Free base target */ if (html->base_target != NULL) { free(html->base_target); html->base_target = NULL; } /* Free frameset */ if (html->frameset != NULL) { html_destroy_frameset(html->frameset); talloc_free(html->frameset); html->frameset = NULL; } /* Free iframes */ if (html->iframe != NULL) { html_destroy_iframe(html->iframe); html->iframe = NULL; } /* Destroy selection context */ if (html->select_ctx != NULL) { css_select_ctx_destroy(html->select_ctx); html->select_ctx = NULL; } if (html->universal != NULL) { lwc_string_unref(html->universal); html->universal = NULL; } /* Free stylesheets */ html_css_free_stylesheets(html); /* Free scripts */ html_script_free(html); /* Free objects */ html_object_free_objects(html); /* free layout */ html_free_layout(html); } static nserror html_clone(const struct content *old, struct content **newc) { /** \todo Clone HTML specifics */ /* In the meantime, we should never be called, as HTML contents * cannot be shared and we're not intending to fix printing's * cloning of documents. */ assert(0 && "html_clone should never be called"); return true; } /** * Handle a window containing a CONTENT_HTML being opened. */ static void html_open(struct content *c, struct browser_window *bw, struct content *page, struct object_params *params) { html_content *html = (html_content *) c; html->bw = bw; html->page = (html_content *) page; html->drag_type = HTML_DRAG_NONE; html->drag_owner.no_owner = true; /* text selection */ selection_init(&html->sel, html->layout, &html->len_ctx); html->selection_type = HTML_SELECTION_NONE; html->selection_owner.none = true; html_object_open_objects(html, bw); } /** * Handle a window containing a CONTENT_HTML being closed. */ static void html_close(struct content *c) { html_content *htmlc = (html_content *) c; selection_clear(&htmlc->sel, false); if (htmlc->search != NULL) { search_destroy_context(htmlc->search); } /* clear the html content reference to the browser window */ htmlc->bw = NULL; /* invalidate the html content reference to the javascript context * as it is about to become invalid and must not be used any * more. */ html_script_invalidate_ctx(htmlc); /* remove all object references from the html content */ html_object_close_objects(htmlc); } /** * Return an HTML content's selection context */ static void html_clear_selection(struct content *c) { html_content *html = (html_content *) c; switch (html->selection_type) { case HTML_SELECTION_NONE: /* Nothing to do */ assert(html->selection_owner.none == true); break; case HTML_SELECTION_TEXTAREA: textarea_clear_selection(html->selection_owner.textarea-> gadget->data.text.ta); break; case HTML_SELECTION_SELF: assert(html->selection_owner.none == false); selection_clear(&html->sel, true); break; case HTML_SELECTION_CONTENT: content_clear_selection(html->selection_owner.content->object); break; default: break; } /* There is no selection now. */ html->selection_type = HTML_SELECTION_NONE; html->selection_owner.none = true; } /** * Return an HTML content's selection context */ static char *html_get_selection(struct content *c) { html_content *html = (html_content *) c; switch (html->selection_type) { case HTML_SELECTION_TEXTAREA: return textarea_get_selection(html->selection_owner.textarea-> gadget->data.text.ta); case HTML_SELECTION_SELF: assert(html->selection_owner.none == false); return selection_get_copy(&html->sel); case HTML_SELECTION_CONTENT: return content_get_selection( html->selection_owner.content->object); case HTML_SELECTION_NONE: /* Nothing to do */ assert(html->selection_owner.none == true); break; default: break; } return NULL; } /** * Get access to any content, link URLs and objects (images) currently * at the given (x, y) coordinates. * * \param[in] c html content to look inside * \param[in] x x-coordinate of point of interest * \param[in] y y-coordinate of point of interest * \param[out] data Positional features struct to be updated with any * relevent content, or set to NULL if none. * \return NSERROR_OK on success else appropriate error code. */ static nserror html_get_contextual_content(struct content *c, int x, int y, struct browser_window_features *data) { html_content *html = (html_content *) c; struct box *box = html->layout; struct box *next; int box_x = 0, box_y = 0; while ((next = box_at_point(&html->len_ctx, box, x, y, &box_x, &box_y)) != NULL) { box = next; /* hidden boxes are ignored */ if ((box->style != NULL) && css_computed_visibility(box->style) == CSS_VISIBILITY_HIDDEN) { continue; } if (box->iframe) { browser_window_get_features(box->iframe, x - box_x, y - box_y, data); } if (box->object) content_get_contextual_content(box->object, x - box_x, y - box_y, data); if (box->object) data->object = box->object; if (box->href) data->link = box->href; if (box->usemap) { const char *target = NULL; nsurl *url = imagemap_get(html, box->usemap, box_x, box_y, x, y, &target); /* Box might have imagemap, but no actual link area * at point */ if (url != NULL) data->link = url; } if (box->gadget) { switch (box->gadget->type) { case GADGET_TEXTBOX: case GADGET_TEXTAREA: case GADGET_PASSWORD: data->form_features = CTX_FORM_TEXT; break; case GADGET_FILE: data->form_features = CTX_FORM_FILE; break; default: data->form_features = CTX_FORM_NONE; break; } } } return NSERROR_OK; } /** * Scroll deepest thing within the content which can be scrolled at given point * * \param c html content to look inside * \param x x-coordinate of point of interest * \param y y-coordinate of point of interest * \param scrx number of px try to scroll something in x direction * \param scry number of px try to scroll something in y direction * \return true iff scroll was consumed by something in the content */ static bool html_scroll_at_point(struct content *c, int x, int y, int scrx, int scry) { html_content *html = (html_content *) c; struct box *box = html->layout; struct box *next; int box_x = 0, box_y = 0; bool handled_scroll = false; /* TODO: invert order; visit deepest box first */ while ((next = box_at_point(&html->len_ctx, box, x, y, &box_x, &box_y)) != NULL) { box = next; if (box->style && css_computed_visibility(box->style) == CSS_VISIBILITY_HIDDEN) continue; /* Pass into iframe */ if (box->iframe && browser_window_scroll_at_point(box->iframe, x - box_x, y - box_y, scrx, scry) == true) return true; /* Pass into textarea widget */ if (box->gadget && (box->gadget->type == GADGET_TEXTAREA || box->gadget->type == GADGET_PASSWORD || box->gadget->type == GADGET_TEXTBOX) && textarea_scroll(box->gadget->data.text.ta, scrx, scry) == true) return true; /* Pass into object */ if (box->object != NULL && content_scroll_at_point( box->object, x - box_x, y - box_y, scrx, scry) == true) return true; /* Handle box scrollbars */ if (box->scroll_y && scrollbar_scroll(box->scroll_y, scry)) handled_scroll = true; if (box->scroll_x && scrollbar_scroll(box->scroll_x, scrx)) handled_scroll = true; if (handled_scroll == true) return true; } return false; } /** Helper for file gadgets to store their filename unencoded on the * dom node associated with the gadget. * * \todo Get rid of this crap eventually */ static void html__dom_user_data_handler(dom_node_operation operation, dom_string *key, void *_data, struct dom_node *src, struct dom_node *dst) { char *oldfile; char *data = (char *)_data; if (!dom_string_isequal(corestring_dom___ns_key_file_name_node_data, key) || data == NULL) { return; } switch (operation) { case DOM_NODE_CLONED: if (dom_node_set_user_data(dst, corestring_dom___ns_key_file_name_node_data, strdup(data), html__dom_user_data_handler, &oldfile) == DOM_NO_ERR) { if (oldfile != NULL) free(oldfile); } break; case DOM_NODE_RENAMED: case DOM_NODE_IMPORTED: case DOM_NODE_ADOPTED: break; case DOM_NODE_DELETED: free(data); break; default: NSLOG(netsurf, INFO, "User data operation not handled."); assert(0); } } static void html__set_file_gadget_filename(struct content *c, struct form_control *gadget, const char *fn) { nserror ret; char *utf8_fn, *oldfile = NULL; html_content *html = (html_content *)c; struct box *file_box = gadget->box; ret = guit->utf8->local_to_utf8(fn, 0, &utf8_fn); if (ret != NSERROR_OK) { assert(ret != NSERROR_BAD_ENCODING); NSLOG(netsurf, INFO, "utf8 to local encoding conversion failed"); /* Load was for us - just no memory */ return; } form_gadget_update_value(gadget, utf8_fn); /* corestring_dom___ns_key_file_name_node_data */ if (dom_node_set_user_data((dom_node *)file_box->gadget->node, corestring_dom___ns_key_file_name_node_data, strdup(fn), html__dom_user_data_handler, &oldfile) == DOM_NO_ERR) { if (oldfile != NULL) free(oldfile); } /* Redraw box. */ html__redraw_a_box(html, file_box); } void html_set_file_gadget_filename(struct hlcache_handle *hl, struct form_control *gadget, const char *fn) { return html__set_file_gadget_filename(hlcache_handle_get_content(hl), gadget, fn); } /** * Drop a file onto a content at a particular point, or determine if a file * may be dropped onto the content at given point. * * \param c html content to look inside * \param x x-coordinate of point of interest * \param y y-coordinate of point of interest * \param file path to file to be dropped, or NULL to know if drop allowed * \return true iff file drop has been handled, or if drop possible (NULL file) */ static bool html_drop_file_at_point(struct content *c, int x, int y, char *file) { html_content *html = (html_content *) c; struct box *box = html->layout; struct box *next; struct box *file_box = NULL; struct box *text_box = NULL; int box_x = 0, box_y = 0; /* Scan box tree for boxes that can handle drop */ while ((next = box_at_point(&html->len_ctx, box, x, y, &box_x, &box_y)) != NULL) { box = next; if (box->style && css_computed_visibility(box->style) == CSS_VISIBILITY_HIDDEN) continue; if (box->iframe) return browser_window_drop_file_at_point(box->iframe, x - box_x, y - box_y, file); if (box->object && content_drop_file_at_point(box->object, x - box_x, y - box_y, file) == true) return true; if (box->gadget) { switch (box->gadget->type) { case GADGET_FILE: file_box = box; break; case GADGET_TEXTBOX: case GADGET_TEXTAREA: case GADGET_PASSWORD: text_box = box; break; default: /* appease compiler */ break; } } } if (!file_box && !text_box) /* No box capable of handling drop */ return false; if (file == NULL) /* There is a box capable of handling drop here */ return true; /* Handle the drop */ if (file_box) { /* File dropped on file input */ html__set_file_gadget_filename(c, file_box->gadget, file); } else { /* File dropped on text input */ size_t file_len; FILE *fp = NULL; char *buffer; char *utf8_buff; nserror ret; unsigned int size; int bx, by; /* Open file */ fp = fopen(file, "rb"); if (fp == NULL) { /* Couldn't open file, but drop was for us */ return true; } /* Get filesize */ fseek(fp, 0, SEEK_END); file_len = ftell(fp); fseek(fp, 0, SEEK_SET); if ((long)file_len == -1) { /* unable to get file length, but drop was for us */ fclose(fp); return true; } /* Allocate buffer for file data */ buffer = malloc(file_len + 1); if (buffer == NULL) { /* No memory, but drop was for us */ fclose(fp); return true; } /* Stick file into buffer */ if (file_len != fread(buffer, 1, file_len, fp)) { /* Failed, but drop was for us */ free(buffer); fclose(fp); return true; } /* Done with file */ fclose(fp); /* Ensure buffer's string termination */ buffer[file_len] = '\0'; /* TODO: Sniff for text? */ /* Convert to UTF-8 */ ret = guit->utf8->local_to_utf8(buffer, file_len, &utf8_buff); if (ret != NSERROR_OK) { /* bad encoding shouldn't happen */ assert(ret != NSERROR_BAD_ENCODING); NSLOG(netsurf, INFO, "local to utf8 encoding failed"); free(buffer); guit->misc->warning("NoMemory", NULL); return true; } /* Done with buffer */ free(buffer); /* Get new length */ size = strlen(utf8_buff); /* Simulate a click over the input box, to place caret */ box_coords(text_box, &bx, &by); textarea_mouse_action(text_box->gadget->data.text.ta, BROWSER_MOUSE_PRESS_1, x - bx, y - by); /* Paste the file as text */ textarea_drop_text(text_box->gadget->data.text.ta, utf8_buff, size); free(utf8_buff); } return true; } /** * set debug status. * * \param c The content to debug * \param op The debug operation type */ static nserror html_debug(struct content *c, enum content_debug op) { html_redraw_debug = !html_redraw_debug; return NSERROR_OK; } /** * Dump debug info concerning the html_content * * \param c The content to debug * \param f The file to dump to * \param op The debug dump type */ static nserror html_debug_dump(struct content *c, FILE *f, enum content_debug op) { html_content *htmlc = (html_content *)c; dom_node *html; dom_exception exc; /* returned by libdom functions */ nserror ret; assert(htmlc != NULL); if (op == CONTENT_DEBUG_RENDER) { assert(htmlc->layout != NULL); box_dump(f, htmlc->layout, 0, true); ret = NSERROR_OK; } else { if (htmlc->document == NULL) { NSLOG(netsurf, INFO, "No document to dump"); return NSERROR_DOM; } exc = dom_document_get_document_element(htmlc->document, (void *) &html); if ((exc != DOM_NO_ERR) || (html == NULL)) { NSLOG(netsurf, INFO, "Unable to obtain root node"); return NSERROR_DOM; } ret = libdom_dump_structure(html, f, 0); NSLOG(netsurf, INFO, "DOM structure dump returning %d", ret); dom_node_unref(html); } return ret; } #if ALWAYS_DUMP_FRAMESET /** * Print a frameset tree to stderr. */ static void html_dump_frameset(struct content_html_frames *frame, unsigned int depth) { unsigned int i; int row, col, index; const char *unit[] = {"px", "%", "*"}; const char *scrolling[] = {"auto", "yes", "no"}; assert(frame); fprintf(stderr, "%p ", frame); fprintf(stderr, "(%i %i) ", frame->rows, frame->cols); fprintf(stderr, "w%g%s ", frame->width.value, unit[frame->width.unit]); fprintf(stderr, "h%g%s ", frame->height.value,unit[frame->height.unit]); fprintf(stderr, "(margin w%i h%i) ", frame->margin_width, frame->margin_height); if (frame->name) fprintf(stderr, "'%s' ", frame->name); if (frame->url) fprintf(stderr, "<%s> ", frame->url); if (frame->no_resize) fprintf(stderr, "noresize "); fprintf(stderr, "(scrolling %s) ", scrolling[frame->scrolling]); if (frame->border) fprintf(stderr, "border %x ", (unsigned int) frame->border_colour); fprintf(stderr, "\n"); if (frame->children) { for (row = 0; row != frame->rows; row++) { for (col = 0; col != frame->cols; col++) { for (i = 0; i != depth; i++) fprintf(stderr, " "); fprintf(stderr, "(%i %i): ", row, col); index = (row * frame->cols) + col; html_dump_frameset(&frame->children[index], depth + 1); } } } } #endif /** * Retrieve HTML document tree * * \param h HTML content to retrieve document tree from * \return Pointer to document tree */ dom_document *html_get_document(hlcache_handle *h) { html_content *c = (html_content *) hlcache_handle_get_content(h); assert(c != NULL); return c->document; } /** * Retrieve box tree * * \param h HTML content to retrieve tree from * \return Pointer to box tree * * \todo This API must die, as must all use of the box tree outside of * HTML content handler */ struct box *html_get_box_tree(hlcache_handle *h) { html_content *c = (html_content *) hlcache_handle_get_content(h); assert(c != NULL); return c->layout; } /** * Retrieve the charset of an HTML document * * \param c Content to retrieve charset from * \param op The content encoding operation to perform. * \return Pointer to charset, or NULL */ static const char *html_encoding(const struct content *c, enum content_encoding_type op) { html_content *html = (html_content *) c; static char enc_token[10] = "Encoding0"; assert(html != NULL); if (op == CONTENT_ENCODING_SOURCE) { enc_token[8] = '0' + html->encoding_source; return messages_get(enc_token); } return html->encoding; } /** * Retrieve framesets used in an HTML document * * \param h Content to inspect * \return Pointer to framesets, or NULL if none */ struct content_html_frames *html_get_frameset(hlcache_handle *h) { html_content *c = (html_content *) hlcache_handle_get_content(h); assert(c != NULL); return c->frameset; } /** * Retrieve iframes used in an HTML document * * \param h Content to inspect * \return Pointer to iframes, or NULL if none */ struct content_html_iframe *html_get_iframe(hlcache_handle *h) { html_content *c = (html_content *) hlcache_handle_get_content(h); assert(c != NULL); return c->iframe; } /** * Retrieve an HTML content's base URL * * \param h Content to retrieve base target from * \return Pointer to URL */ nsurl *html_get_base_url(hlcache_handle *h) { html_content *c = (html_content *) hlcache_handle_get_content(h); assert(c != NULL); return c->base_url; } /** * Retrieve an HTML content's base target * * \param h Content to retrieve base target from * \return Pointer to target, or NULL if none */ const char *html_get_base_target(hlcache_handle *h) { html_content *c = (html_content *) hlcache_handle_get_content(h); assert(c != NULL); return c->base_target; } /** * Retrieve layout coordinates of box with given id * * \param h HTML document to search * \param frag_id String containing an element id * \param x Updated to global x coord iff id found * \param y Updated to global y coord iff id found * \return true iff id found */ bool html_get_id_offset(hlcache_handle *h, lwc_string *frag_id, int *x, int *y) { struct box *pos; struct box *layout; if (content_get_type(h) != CONTENT_HTML) return false; layout = html_get_box_tree(h); if ((pos = box_find_by_id(layout, frag_id)) != 0) { box_coords(pos, x, y); return true; } return false; } static bool html_exec(struct content *c, const char *src, size_t srclen) { html_content *htmlc = (html_content *)c; bool result = false; dom_exception err; dom_html_body_element *body_node; dom_string *dom_src; dom_text *text_node; dom_node *spare_node; dom_html_script_element *script_node; if (htmlc->document == NULL) { NSLOG(netsurf, DEEPDEBUG, "Unable to exec, no document"); goto out_no_string; } err = dom_string_create((const uint8_t *)src, srclen, &dom_src); if (err != DOM_NO_ERR) { NSLOG(netsurf, DEEPDEBUG, "Unable to exec, could not create string"); goto out_no_string; } err = dom_html_document_get_body(htmlc->document, &body_node); if (err != DOM_NO_ERR) { NSLOG(netsurf, DEEPDEBUG, "Unable to retrieve body element"); goto out_no_body; } err = dom_document_create_text_node(htmlc->document, dom_src, &text_node); if (err != DOM_NO_ERR) { NSLOG(netsurf, DEEPDEBUG, "Unable to exec, could not create text node"); goto out_no_text_node; } err = dom_document_create_element(htmlc->document, corestring_dom_SCRIPT, &script_node); if (err != DOM_NO_ERR) { NSLOG(netsurf, DEEPDEBUG, "Unable to exec, could not create script node"); goto out_no_script_node; } err = dom_node_append_child(script_node, text_node, &spare_node); if (err != DOM_NO_ERR) { NSLOG(netsurf, DEEPDEBUG, "Unable to exec, could not insert code node into script node"); goto out_unparented; } dom_node_unref(spare_node); /* We do not need the spare ref at all */ err = dom_node_append_child(body_node, script_node, &spare_node); if (err != DOM_NO_ERR) { NSLOG(netsurf, DEEPDEBUG, "Unable to exec, could not insert script node into document body"); goto out_unparented; } dom_node_unref(spare_node); /* Again no need for the spare ref */ /* We successfully inserted the node into the DOM */ result = true; /* Now we unwind, starting by removing the script from wherever it * ended up parented */ err = dom_node_get_parent_node(script_node, &spare_node); if (err == DOM_NO_ERR && spare_node != NULL) { dom_node *second_spare; err = dom_node_remove_child(spare_node, script_node, &second_spare); if (err == DOM_NO_ERR) { dom_node_unref(second_spare); } dom_node_unref(spare_node); } out_unparented: dom_node_unref(script_node); out_no_script_node: dom_node_unref(text_node); out_no_text_node: dom_node_unref(body_node); out_no_body: dom_string_unref(dom_src); out_no_string: return result; } /** * Compute the type of a content * * \return CONTENT_HTML */ static content_type html_content_type(void) { return CONTENT_HTML; } static void html_fini(void) { html_css_fini(); } static const content_handler html_content_handler = { .fini = html_fini, .create = html_create, .process_data = html_process_data, .data_complete = html_convert, .reformat = html_reformat, .destroy = html_destroy, .stop = html_stop, .mouse_track = html_mouse_track, .mouse_action = html_mouse_action, .keypress = html_keypress, .redraw = html_redraw, .open = html_open, .close = html_close, .get_selection = html_get_selection, .clear_selection = html_clear_selection, .get_contextual_content = html_get_contextual_content, .scroll_at_point = html_scroll_at_point, .drop_file_at_point = html_drop_file_at_point, .search = html_search, .search_clear = html_search_clear, .debug_dump = html_debug_dump, .debug = html_debug, .clone = html_clone, .get_encoding = html_encoding, .type = html_content_type, .exec = html_exec, .no_share = true, }; nserror html_init(void) { uint32_t i; nserror error; error = html_css_init(); if (error != NSERROR_OK) goto error; for (i = 0; i < NOF_ELEMENTS(html_types); i++) { error = content_factory_register_handler(html_types[i], &html_content_handler); if (error != NSERROR_OK) goto error; } return NSERROR_OK; error: html_fini(); return error; } /** * Get the browser window containing an HTML content * * \param c HTML content * \return the browser window */ struct browser_window *html_get_browser_window(struct content *c) { html_content *html = (html_content *) c; assert(c != NULL); assert(c->handler == &html_content_handler); return html->bw; }