/* * Copyright 2012 Vincent Sanders * * This file is part of NetSurf, http://www.netsurf-browser.org/ * * NetSurf is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2 of the License. * * NetSurf is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /** \file * libdom utilities (implementation). */ #include #include #include #include "utils/config.h" #include "utils/log.h" #include "utils/libdom.h" /* exported interface documented in libdom.h */ bool libdom_treewalk(dom_node *root, bool (*callback)(dom_node *node, dom_string *name, void *ctx), void *ctx) { dom_node *node; bool result = true; node = dom_node_ref(root); /* tree root */ while (node != NULL) { dom_node *next = NULL; dom_node_type type; dom_string *name; dom_exception exc; exc = dom_node_get_first_child(node, &next); if (exc != DOM_NO_ERR) { dom_node_unref(node); break; } if (next != NULL) { /* 1. Got children */ dom_node_unref(node); node = next; } else { /* No children; siblings & ancestor's siblings */ while (node != NULL) { exc = dom_node_get_next_sibling(node, &next); if (exc != DOM_NO_ERR) { dom_node_unref(node); node = NULL; break; } if (next != NULL) { /* 2. Got sibling */ break; } exc = dom_node_get_parent_node(node, &next); if (exc != DOM_NO_ERR) { dom_node_unref(node); node = NULL; break; } /* 3. Try parent */ dom_node_unref(node); node = next; } if (node == NULL) break; dom_node_unref(node); node = next; } assert(node != NULL); exc = dom_node_get_node_type(node, &type); if ((exc != DOM_NO_ERR) || (type != DOM_ELEMENT_NODE)) continue; exc = dom_node_get_node_name(node, &name); if (exc != DOM_NO_ERR) continue; result = callback(node, name, ctx); dom_string_unref(name); if (result == false) { break; /* callback caused early termination */ } } return result; } /* libdom_treewalk context for libdom_find_element */ struct find_element_ctx { lwc_string *search; dom_node *found; }; /* libdom_treewalk callback for libdom_find_element */ static bool libdom_find_element_callback(dom_node *node, dom_string *name, void *ctx) { struct find_element_ctx *data = ctx; if (dom_string_caseless_lwc_isequal(name, data->search)) { /* Found element */ data->found = node; return false; /* Discontinue search */ } return true; /* Continue search */ } /* exported interface documented in libdom.h */ dom_node *libdom_find_element(dom_node *node, lwc_string *element_name) { struct find_element_ctx data; assert(element_name != NULL); if (node == NULL) return NULL; data.search = element_name; data.found = NULL; libdom_treewalk(node, libdom_find_element_callback, &data); return data.found; } /* exported interface documented in libdom.h */ dom_node *libdom_find_first_element(dom_node *parent, lwc_string *element_name) { dom_node *element; dom_exception exc; dom_string *node_name = NULL; dom_node_type node_type; dom_node *next_node; exc = dom_node_get_first_child(parent, &element); if ((exc != DOM_NO_ERR) || (element == NULL)) { return NULL; } /* find first node thats a element */ do { exc = dom_node_get_node_type(element, &node_type); if ((exc == DOM_NO_ERR) && (node_type == DOM_ELEMENT_NODE)) { exc = dom_node_get_node_name(element, &node_name); if ((exc == DOM_NO_ERR) && (node_name != NULL)) { if (dom_string_caseless_lwc_isequal(node_name, element_name)) { dom_string_unref(node_name); break; } dom_string_unref(node_name); } } exc = dom_node_get_next_sibling(element, &next_node); dom_node_unref(element); if (exc == DOM_NO_ERR) { element = next_node; } else { element = NULL; } } while (element != NULL); return element; } /* exported interface documented in libdom.h */ /* TODO: return appropriate errors */ nserror libdom_iterate_child_elements(dom_node *parent, libdom_iterate_cb cb, void *ctx) { dom_nodelist *children; uint32_t index, num_children; dom_exception error; error = dom_node_get_child_nodes(parent, &children); if (error != DOM_NO_ERR || children == NULL) return NSERROR_NOMEM; error = dom_nodelist_get_length(children, &num_children); if (error != DOM_NO_ERR) { dom_nodelist_unref(children); return NSERROR_NOMEM; } for (index = 0; index < num_children; index++) { dom_node *child; dom_node_type type; error = dom_nodelist_item(children, index, &child); if (error != DOM_NO_ERR) { dom_nodelist_unref(children); return NSERROR_NOMEM; } error = dom_node_get_node_type(child, &type); if (error == DOM_NO_ERR && type == DOM_ELEMENT_NODE) { nserror err = cb(child, ctx); if (err != NSERROR_OK) { dom_node_unref(child); dom_nodelist_unref(children); return err; } } dom_node_unref(child); } dom_nodelist_unref(children); return NSERROR_OK; } /* exported interface documented in libdom.h */ nserror libdom_hubbub_error_to_nserror(dom_hubbub_error error) { switch (error) { /* HUBBUB_REPROCESS is not handled here because it can * never occur outside the hubbub treebuilder */ case DOM_HUBBUB_OK: /* parsed ok */ return NSERROR_OK; case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_PAUSED): /* hubbub input paused */ return NSERROR_OK; case DOM_HUBBUB_NOMEM: /* out of memory error from DOM */ return NSERROR_NOMEM; case DOM_HUBBUB_BADPARM: /* Bad parameter passed to creation */ return NSERROR_BAD_PARAMETER; case DOM_HUBBUB_DOM: /* DOM call returned error */ return NSERROR_DOM; case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_ENCODINGCHANGE): /* encoding changed */ return NSERROR_ENCODING_CHANGE; case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_NOMEM): /* out of memory error from parser */ return NSERROR_NOMEM; case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_BADPARM): return NSERROR_BAD_PARAMETER; case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_INVALID): return NSERROR_INVALID; case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_FILENOTFOUND): return NSERROR_NOT_FOUND; case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_NEEDDATA): return NSERROR_NEED_DATA; case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_BADENCODING): return NSERROR_BAD_ENCODING; case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_UNKNOWN): /* currently only generated by the libdom hubbub binding */ return NSERROR_DOM; default: /* unknown error */ /** @todo better error handling and reporting */ return NSERROR_UNKNOWN; } return NSERROR_UNKNOWN; } static void ignore_dom_msg(uint32_t severity, void *ctx, const char *msg, ...) { } /** * Dump attribute/value for an element node * * \param node The element node to dump attribute details for * \param f file handle to dump to. * \param attribute The attribute to dump * \return true on success, or false on error */ static bool dump_dom_element_attribute(dom_node *node, FILE *f, const char *attribute) { dom_exception exc; dom_string *attr = NULL; dom_string *attr_value = NULL; dom_node_type type; const char *string; size_t length; /* Should only have element nodes here */ exc = dom_node_get_node_type(node, &type); if (exc != DOM_NO_ERR) { fprintf(f, " Exception raised for node_get_node_type\n"); return false; } assert(type == DOM_ELEMENT_NODE); /* Create a dom_string containing required attribute name. */ exc = dom_string_create_interned((uint8_t *)attribute, strlen(attribute), &attr); if (exc != DOM_NO_ERR) { fprintf(f, " Exception raised for dom_string_create\n"); return false; } /* Get class attribute's value */ exc = dom_element_get_attribute(node, attr, &attr_value); if (exc != DOM_NO_ERR) { fprintf(f, " Exception raised for element_get_attribute\n"); dom_string_unref(attr); return false; } else if (attr_value == NULL) { /* Element lacks required attribute */ dom_string_unref(attr); return true; } /* Finished with the attr dom_string */ dom_string_unref(attr); /* Get attribute value's string data */ string = dom_string_data(attr_value); length = dom_string_byte_length(attr_value); /* Print attribute info */ fprintf(f, " %s=\"%.*s\"", attribute, (int)length, string); /* Finished with the attr_value dom_string */ dom_string_unref(attr_value); return true; } /** * Print a line in a DOM structure dump for an element * * \param node The node to dump * \param f file handle to dump to. * \param depth The node's depth * \return true on success, or false on error */ static bool dump_dom_element(dom_node *node, FILE *f, int depth) { dom_exception exc; dom_string *node_name = NULL; dom_node_type type; int i; const char *string; size_t length; /* Only interested in element nodes */ exc = dom_node_get_node_type(node, &type); if (exc != DOM_NO_ERR) { fprintf(f, "Exception raised for node_get_node_type\n"); return false; } else if (type != DOM_ELEMENT_NODE) { /* Nothing to print */ return true; } /* Get element name */ exc = dom_node_get_node_name(node, &node_name); if (exc != DOM_NO_ERR) { fprintf(f, "Exception raised for get_node_name\n"); return false; } else if (node_name == NULL) { fprintf(f, "Broken: root_name == NULL\n"); return false; } /* Print ASCII tree structure for current node */ if (depth > 0) { for (i = 0; i < depth; i++) { fprintf(f, "| "); } fprintf(f, "+-"); } /* Get string data and print element name */ string = dom_string_data(node_name); length = dom_string_byte_length(node_name); fprintf(f, "[%.*s]", (int)length, string); if (length == 5 && strncmp(string, "title", 5) == 0) { /* Title tag, gather the title */ dom_string *str; exc = dom_node_get_text_content(node, &str); if (exc == DOM_NO_ERR && str != NULL) { fprintf(f, " $%.*s$", (int)dom_string_byte_length(str), dom_string_data(str)); dom_string_unref(str); } } /* Finished with the node_name dom_string */ dom_string_unref(node_name); /* Print the element's id & class, if it has them */ if (dump_dom_element_attribute(node, f, "id") == false || dump_dom_element_attribute(node, f, "class") == false) { /* Error occured */ fprintf(f, "\n"); return false; } fprintf(f, "\n"); return true; } /* exported interface documented in libdom.h */ nserror libdom_dump_structure(dom_node *node, FILE *f, int depth) { dom_exception exc; dom_node *child; nserror ret; dom_node *next_child; /* Print this node's entry */ if (dump_dom_element(node, f, depth) == false) { /* There was an error; return */ return NSERROR_DOM; } /* Get the node's first child */ exc = dom_node_get_first_child(node, &child); if (exc != DOM_NO_ERR) { fprintf(f, "Exception raised for node_get_first_child\n"); return NSERROR_DOM; } else if (child != NULL) { /* node has children; decend to children's depth */ depth++; /* Loop though all node's children */ do { /* Visit node's descendents */ ret = libdom_dump_structure(child, f, depth); if (ret !=NSERROR_OK) { /* There was an error; return */ dom_node_unref(child); return NSERROR_DOM; } /* Go to next sibling */ exc = dom_node_get_next_sibling(child, &next_child); if (exc != DOM_NO_ERR) { fprintf(f, "Exception raised for node_get_next_sibling\n"); dom_node_unref(child); return NSERROR_DOM; } dom_node_unref(child); child = next_child; } while (child != NULL); /* No more children */ } return NSERROR_OK; } /* exported interface documented in libdom.h */ nserror libdom_parse_file(const char *filename, const char *encoding, dom_document **doc) { dom_hubbub_parser_params parse_params; dom_hubbub_error error; dom_hubbub_parser *parser; dom_document *document; FILE *fp = NULL; #define BUF_SIZE 512 uint8_t buf[BUF_SIZE]; fp = fopen(filename, "r"); if (fp == NULL) { return NSERROR_NOT_FOUND; } parse_params.enc = encoding; parse_params.fix_enc = false; parse_params.enable_script = false; parse_params.msg = ignore_dom_msg; parse_params.script = NULL; parse_params.ctx = NULL; parse_params.daf = NULL; error = dom_hubbub_parser_create(&parse_params, &parser, &document); if (error != DOM_HUBBUB_OK) { fclose(fp); return libdom_hubbub_error_to_nserror(error); } while (feof(fp) == 0) { size_t read = fread(buf, sizeof(buf[0]), BUF_SIZE, fp); error = dom_hubbub_parser_parse_chunk(parser, buf, read); if (error != DOM_HUBBUB_OK) { dom_node_unref(document); dom_hubbub_parser_destroy(parser); fclose(fp); return NSERROR_DOM; } } error = dom_hubbub_parser_completed(parser); if (error != DOM_HUBBUB_OK) { dom_node_unref(document); dom_hubbub_parser_destroy(parser); fclose(fp); return libdom_hubbub_error_to_nserror(error); } dom_hubbub_parser_destroy(parser); fclose(fp); *doc = document; return NSERROR_OK; }