summaryrefslogtreecommitdiff
path: root/src/select/hash.c
diff options
context:
space:
mode:
authorMichael Drake <tlsa@netsurf-browser.org>2013-12-01 19:02:37 +0000
committerMichael Drake <tlsa@netsurf-browser.org>2013-12-01 19:02:37 +0000
commitcea0f412145c239a7583c10554c2d88964e94fd7 (patch)
tree7c8919bc45001c596ad6dc21a6035077fa48ad68 /src/select/hash.c
parentd2460b3a2f2690813abdf3b350be9dc5ebf2fe18 (diff)
downloadlibcss-cea0f412145c239a7583c10554c2d88964e94fd7.tar.gz
libcss-cea0f412145c239a7583c10554c2d88964e94fd7.tar.bz2
Significantly optimise CSS selection performance.
Now we pass a node bloom filter to css_get_style. That node bloom filter is filled with the node's ancestor element, class, and id names. Internally, libcss also generates a bloom filter for each selector chain. If the selector chain's bloom filter is not a subset of the node bloom filter, we know that the selector chain's rule does not apply to the node. This avoids the slow selector chain matching process. Other smaller optimisations to move the ruling out of selector chains for inapplicable media types and other reasons to before we start comparing rules from different sources to find the next rule. All this is now done in hash.c so select.c never sees the trivially ruled out rules.
Diffstat (limited to 'src/select/hash.c')
-rw-r--r--src/select/hash.c466
1 files changed, 401 insertions, 65 deletions
diff --git a/src/select/hash.c b/src/select/hash.c
index 37492c1..5420d6d 100644
--- a/src/select/hash.c
+++ b/src/select/hash.c
@@ -12,8 +12,11 @@
#include "select/hash.h"
#include "utils/utils.h"
+#undef PRINT_CHAIN_BLOOM_DETAILS
+
typedef struct hash_entry {
const css_selector *sel;
+ css_bloom sel_chain_bloom[CSS_BLOOM_SIZE];
struct hash_entry *next;
} hash_entry;
@@ -41,7 +44,7 @@ struct css_selector_hash {
static hash_entry empty_slot;
-static inline uint32_t _hash_name(lwc_string *name);
+static inline uint32_t _hash_name(const lwc_string *name);
static inline lwc_string *_class_name(const css_selector *selector);
static inline lwc_string *_id_name(const css_selector *selector);
static css_error _insert_into_chain(css_selector_hash *ctx, hash_entry *head,
@@ -49,15 +52,88 @@ static css_error _insert_into_chain(css_selector_hash *ctx, hash_entry *head,
static css_error _remove_from_chain(css_selector_hash *ctx, hash_entry *head,
const css_selector *selector);
-static css_error _iterate_elements(const css_selector **current,
+static css_error _iterate_elements(
+ const struct css_hash_selection_requirments *req,
+ const css_selector **current,
const css_selector ***next);
-static css_error _iterate_classes(const css_selector **current,
+static css_error _iterate_classes(
+ const struct css_hash_selection_requirments *req,
+ const css_selector **current,
const css_selector ***next);
-static css_error _iterate_ids(const css_selector **current,
+static css_error _iterate_ids(
+ const struct css_hash_selection_requirments *req,
+ const css_selector **current,
const css_selector ***next);
-static css_error _iterate_universal(const css_selector **current,
+static css_error _iterate_universal(
+ const struct css_hash_selection_requirments *req,
+ const css_selector **current,
const css_selector ***next);
+
+/* No bytecode if rule body is empty or wholly invalid --
+ * Only interested in rules with bytecode */
+#define RULE_HAS_BYTECODE(r) \
+ (((css_rule_selector *)(r->sel->rule))->style != NULL)
+
+
+/**
+ * Test first selector on selector chain for having matching element name.
+ *
+ * If source of rule is element or universal hash, we know the
+ * element name is a match. If it comes from the class or id hash,
+ * we have to test for a match.
+ *
+ * \param selector selector chain head to test
+ * \param qname element name to look for
+ * \return true iff chain head doesn't fail to match element name
+ */
+static inline bool _chain_good_for_element_name(const css_selector *selector,
+ const css_qname *qname)
+{
+ if (lwc_string_length(selector->data.qname.name) != 1 ||
+ lwc_string_data(selector->data.qname.name)[0] != '*') {
+ bool match;
+ if (lwc_string_caseless_isequal(
+ selector->data.qname.name, qname->name,
+ &match) == lwc_error_ok && match == false) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Test whether the rule applies for current media.
+ *
+ * \param rule Rule to test
+ * \meaid media Current media type(s)
+ * \return true iff chain's rule applies for media
+ */
+static inline bool _rule_good_for_media(const css_rule *rule, uint64_t media)
+{
+ bool applies = true;
+ const css_rule *ancestor = rule;
+
+ while (ancestor != NULL) {
+ const css_rule_media *m = (const css_rule_media *) ancestor;
+
+ if (ancestor->type == CSS_RULE_MEDIA &&
+ (m->media & media) == 0) {
+ applies = false;
+ break;
+ }
+
+ if (ancestor->ptype != CSS_RULE_PARENT_STYLESHEET)
+ ancestor = ancestor->parent;
+ else
+ ancestor = NULL;
+ }
+
+ return applies;
+}
+
+
/**
* Create a hash
*
@@ -288,19 +364,19 @@ css_error css__selector_hash_remove(css_selector_hash *hash,
* If nothing matches, CSS_OK will be returned and **matched == NULL
*/
css_error css__selector_hash_find(css_selector_hash *hash,
- css_qname *qname,
+ const struct css_hash_selection_requirments *req,
css_selector_hash_iterator *iterator,
const css_selector ***matched)
{
uint32_t index, mask;
hash_entry *head;
- if (hash == NULL || qname == NULL || iterator == NULL || matched == NULL)
+ if (hash == NULL || req == NULL || iterator == NULL || matched == NULL)
return CSS_BADPARM;
/* Find index */
mask = hash->elements.n_slots - 1;
- index = _hash_name(qname->name) & mask;
+ index = _hash_name(req->qname.name) & mask;
head = &hash->elements.slots[index];
@@ -311,13 +387,22 @@ css_error css__selector_hash_find(css_selector_hash *hash,
bool match = false;
lerror = lwc_string_caseless_isequal(
- qname->name, head->sel->data.qname.name,
+ req->qname.name,
+ head->sel->data.qname.name,
&match);
if (lerror != lwc_error_ok)
return css_error_from_lwc_error(lerror);
- if (match)
- break;
+ if (match && RULE_HAS_BYTECODE(head)) {
+ if (css_bloom_in_bloom(
+ head->sel_chain_bloom,
+ req->node_bloom) &&
+ _rule_good_for_media(head->sel->rule,
+ req->media)) {
+ /* Found a match */
+ break;
+ }
+ }
head = head->next;
}
@@ -344,19 +429,20 @@ css_error css__selector_hash_find(css_selector_hash *hash,
* If nothing matches, CSS_OK will be returned and **matched == NULL
*/
css_error css__selector_hash_find_by_class(css_selector_hash *hash,
- lwc_string *name,
+ const struct css_hash_selection_requirments *req,
css_selector_hash_iterator *iterator,
const css_selector ***matched)
{
uint32_t index, mask;
hash_entry *head;
- if (hash == NULL || name == NULL || iterator == NULL || matched == NULL)
+ if (hash == NULL || req == NULL || req->class == NULL ||
+ iterator == NULL || matched == NULL)
return CSS_BADPARM;
/* Find index */
mask = hash->classes.n_slots - 1;
- index = _hash_name(name) & mask;
+ index = _hash_name(req->class) & mask;
head = &hash->classes.slots[index];
@@ -370,12 +456,24 @@ css_error css__selector_hash_find_by_class(css_selector_hash *hash,
n = _class_name(head->sel);
if (n != NULL) {
lerror = lwc_string_caseless_isequal(
- name, n, &match);
+ req->class, n, &match);
if (lerror != lwc_error_ok)
return css_error_from_lwc_error(lerror);
- if (match)
- break;
+ if (match && RULE_HAS_BYTECODE(head)) {
+ if (css_bloom_in_bloom(
+ head->sel_chain_bloom,
+ req->node_bloom) &&
+ _chain_good_for_element_name(
+ head->sel,
+ &(req->qname)) &&
+ _rule_good_for_media(
+ head->sel->rule,
+ req->media)) {
+ /* Found a match */
+ break;
+ }
+ }
}
head = head->next;
@@ -403,19 +501,20 @@ css_error css__selector_hash_find_by_class(css_selector_hash *hash,
* If nothing matches, CSS_OK will be returned and **matched == NULL
*/
css_error css__selector_hash_find_by_id(css_selector_hash *hash,
- lwc_string *name,
+ const struct css_hash_selection_requirments *req,
css_selector_hash_iterator *iterator,
const css_selector ***matched)
{
uint32_t index, mask;
hash_entry *head;
- if (hash == NULL || name == NULL || iterator == NULL || matched == NULL)
+ if (hash == NULL || req == NULL || req->id == NULL ||
+ iterator == NULL || matched == NULL)
return CSS_BADPARM;
/* Find index */
mask = hash->ids.n_slots - 1;
- index = _hash_name(name) & mask;
+ index = _hash_name(req->id) & mask;
head = &hash->ids.slots[index];
@@ -429,12 +528,24 @@ css_error css__selector_hash_find_by_id(css_selector_hash *hash,
n = _id_name(head->sel);
if (n != NULL) {
lerror = lwc_string_caseless_isequal(
- name, n, &match);
+ req->id, n, &match);
if (lerror != lwc_error_ok)
return css_error_from_lwc_error(lerror);
- if (match)
- break;
+ if (match && RULE_HAS_BYTECODE(head)) {
+ if (css_bloom_in_bloom(
+ head->sel_chain_bloom,
+ req->node_bloom) &&
+ _chain_good_for_element_name(
+ head->sel,
+ &req->qname) &&
+ _rule_good_for_media(
+ head->sel->rule,
+ req->media)) {
+ /* Found a match */
+ break;
+ }
+ }
}
head = head->next;
@@ -461,14 +572,39 @@ css_error css__selector_hash_find_by_id(css_selector_hash *hash,
* If nothing matches, CSS_OK will be returned and **matched == NULL
*/
css_error css__selector_hash_find_universal(css_selector_hash *hash,
+ const struct css_hash_selection_requirments *req,
css_selector_hash_iterator *iterator,
const css_selector ***matched)
{
- if (hash == NULL || iterator == NULL || matched == NULL)
+ hash_entry *head;
+
+ if (hash == NULL || req == NULL || iterator == NULL || matched == NULL)
return CSS_BADPARM;
+ head = &hash->universal;
+
+ if (head->sel != NULL) {
+ /* Search through chain for first match */
+ while (head != NULL) {
+ if (RULE_HAS_BYTECODE(head) &&
+ css_bloom_in_bloom(
+ head->sel_chain_bloom,
+ req->node_bloom) &&
+ _rule_good_for_media(head->sel->rule,
+ req->media)) {
+ /* Found a match */
+ break;
+ }
+
+ head = head->next;
+ }
+
+ if (head == NULL)
+ head = &empty_slot;
+ }
+
(*iterator) = _iterate_universal;
- (*matched) = (const css_selector **) &hash->universal;
+ (*matched) = (const css_selector **) head;
return CSS_OK;
}
@@ -503,7 +639,7 @@ css_error css__selector_hash_size(css_selector_hash *hash, size_t *size)
* \param name Name to hash
* \return hash value
*/
-uint32_t _hash_name(lwc_string *name)
+uint32_t _hash_name(const lwc_string *name)
{
uint32_t z = 0x811c9dc5;
const char *data = lwc_string_data(name);
@@ -573,6 +709,119 @@ lwc_string *_id_name(const css_selector *selector)
return name;
}
+
+/**
+ * Add a selector detail to the bloom filter, if the detail is relevant.
+ *
+ * \param d Selector detail to consider and add if relevant
+ * \param bloom Bloom filter to add to.
+ */
+static inline void _chain_bloom_add_detail(
+ const css_selector_detail *d,
+ css_bloom bloom[CSS_BLOOM_SIZE])
+{
+ lwc_string *add = NULL; /* String to add to bloom */
+
+ switch (d->type) {
+ case CSS_SELECTOR_ELEMENT:
+ /* Don't add universal element selector to bloom */
+ if (lwc_string_length(d->qname.name) == 1 &&
+ lwc_string_data(d->qname.name)[0] == '*') {
+ return;
+ }
+
+ /* TODO: Remain case sensitive for XML */
+ if (d->qname.name->insensitive == NULL) {
+ lwc__intern_caseless_string(d->qname.name);
+ }
+ add = d->qname.name->insensitive;
+ break;
+
+ case CSS_SELECTOR_CLASS:
+ case CSS_SELECTOR_ID:
+ /* TODO: Remain case sensitive in standards mode */
+ if (d->qname.name->insensitive == NULL) {
+ lwc__intern_caseless_string(d->qname.name);
+ }
+ add = d->qname.name->insensitive;
+ break;
+
+ default:
+ return;
+ }
+
+ /* Don't really care if intern for caseless string failed, if we're
+ * out of memory, something else will panic. Everything still works
+ * if the string isn't added the the rule bloom. Just less optimally.
+ */
+ if (add != NULL) {
+ css_bloom_add_hash(bloom, lwc_string_hash_value(add));
+ }
+
+ return;
+}
+
+
+/**
+ * Generate a selector chain's bloom filter
+ *
+ * \param s Selector at head of selector chain
+ * \param bloom Bloom filter to generate.
+ */
+static void _chain_bloom_generate(const css_selector *s,
+ css_bloom bloom[CSS_BLOOM_SIZE])
+{
+ css_bloom_init(bloom);
+
+ /* Work back through selector chain... */
+ do {
+ /* ...looking for Ancestor/Parent combinators */
+ if (s->data.comb == CSS_COMBINATOR_ANCESTOR ||
+ s->data.comb == CSS_COMBINATOR_PARENT) {
+ const css_selector_detail *d = &s->combinator->data;
+ while (d != NULL) {
+ if (d->negate == 0) {
+ _chain_bloom_add_detail(d, bloom);
+ }
+ d = (d->next != 0) ? d + 1 : NULL;
+ }
+ }
+
+ s = s->combinator;
+ } while (s != NULL);
+}
+
+#ifdef PRINT_CHAIN_BLOOM_DETAILS
+/* Count bits set in uint32_t */
+static int bits_set(uint32_t n) {
+ n = n - ((n >> 1) & 0x55555555);
+ n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
+ n = (n + (n >> 4)) & 0x0f0f0f0f;
+ n = n + (n >> 8);
+ n = n + (n >> 16);
+ return n & 0x0000003f;
+}
+
+/* Selector chain bloom instrumentation ouput display. */
+static void print_chain_bloom_details(css_bloom bloom[CSS_BLOOM_SIZE])
+{
+ printf("Chain bloom:\t");
+ int total = 0, i;
+ int set[4];
+ for (i = 0; i < CSS_BLOOM_SIZE; i++) {
+ set[i] = bits_set(bloom[i]);
+ total += set[i];
+ }
+ printf("bits set:");
+ for (i = 0; i < CSS_BLOOM_SIZE; i++) {
+ printf(" %2i", set[i]);
+ }
+ printf(" (total:%4i of %i) saturation: %3i%%\n", total,
+ (32 * CSS_BLOOM_SIZE),
+ (100 * total) / (32 * CSS_BLOOM_SIZE));
+}
+#endif
+
/**
* Insert a selector into a hash chain
*
@@ -588,6 +837,11 @@ css_error _insert_into_chain(css_selector_hash *ctx, hash_entry *head,
if (head->sel == NULL) {
head->sel = selector;
head->next = NULL;
+ _chain_bloom_generate(selector, head->sel_chain_bloom);
+
+#ifdef PRINT_CHAIN_BLOOM_DETAILS
+ print_chain_bloom_details(head->sel_chain_bloom);
+#endif
} else {
hash_entry *search = head;
hash_entry *prev = NULL;
@@ -612,13 +866,19 @@ css_error _insert_into_chain(css_selector_hash *ctx, hash_entry *head,
search = search->next;
} while (search != NULL);
+ entry->sel = selector;
+ _chain_bloom_generate(selector, entry->sel_chain_bloom);
+
+#ifdef PRINT_CHAIN_BLOOM_DETAILS
+ print_chain_bloom_details(entry->sel_chain_bloom);
+#endif
+
if (prev == NULL) {
- entry->sel = head->sel;
- entry->next = head->next;
- head->sel = selector;
- head->next = entry;
+ hash_entry temp = *entry;
+ *entry = *head;
+ temp.next = entry;
+ *head = temp;
} else {
- entry->sel = selector;
entry->next = prev->next;
prev->next = entry;
}
@@ -685,7 +945,9 @@ css_error _remove_from_chain(css_selector_hash *ctx, hash_entry *head,
*
* If nothing further matches, CSS_OK will be returned and **next == NULL
*/
-css_error _iterate_elements(const css_selector **current,
+css_error _iterate_elements(
+ const struct css_hash_selection_requirments *req,
+ const css_selector **current,
const css_selector ***next)
{
const hash_entry *head = (const hash_entry *) current;
@@ -693,14 +955,29 @@ css_error _iterate_elements(const css_selector **current,
lwc_error lerror = lwc_error_ok;
lwc_string *name;
- name = head->sel->data.qname.name;
+ name = req->qname.name;
+ head = head->next;
+
+ if (head != NULL && head->sel != NULL) {
+ /* Search through chain for first match */
+ while (head != NULL) {
+ lerror = lwc_string_caseless_isequal(name,
+ head->sel->data.qname.name, &match);
+ if (lerror != lwc_error_ok)
+ return css_error_from_lwc_error(lerror);
- /* Look for the next selector that matches the key */
- while (match == false && (head = head->next) != NULL) {
- lerror = lwc_string_caseless_isequal(
- name, head->sel->data.qname.name, &match);
- if (lerror != lwc_error_ok)
- return css_error_from_lwc_error(lerror);
+ if (match && RULE_HAS_BYTECODE(head)) {
+ if (css_bloom_in_bloom(
+ head->sel_chain_bloom,
+ req->node_bloom) &&
+ _rule_good_for_media(head->sel->rule,
+ req->media)) {
+ /* Found a match */
+ break;
+ }
+ }
+ head = head->next;
+ }
}
if (head == NULL)
@@ -720,7 +997,9 @@ css_error _iterate_elements(const css_selector **current,
*
* If nothing further matches, CSS_OK will be returned and **next == NULL
*/
-css_error _iterate_classes(const css_selector **current,
+css_error _iterate_classes(
+ const struct css_hash_selection_requirments *req,
+ const css_selector **current,
const css_selector ***next)
{
const hash_entry *head = (const hash_entry *) current;
@@ -728,18 +1007,37 @@ css_error _iterate_classes(const css_selector **current,
lwc_error lerror = lwc_error_ok;
lwc_string *name, *ref;
- ref = _class_name(head->sel);
+ ref = req->class;
+ head = head->next;
- /* Look for the next selector that matches the key */
- while (match == false && (head = head->next) != NULL) {
- name = _class_name(head->sel);
- if (name == NULL)
- continue;
+ if (head != NULL && head->sel != NULL) {
+ /* Search through chain for first match */
+ while (head != NULL) {
+ name = _class_name(head->sel);
+
+ if (name != NULL) {
+ lerror = lwc_string_caseless_isequal(
+ ref, name, &match);
+ if (lerror != lwc_error_ok)
+ return css_error_from_lwc_error(lerror);
- lerror = lwc_string_caseless_isequal(
- ref, name, &match);
- if (lerror != lwc_error_ok)
- return css_error_from_lwc_error(lerror);
+ if (match && RULE_HAS_BYTECODE(head)) {
+ if (css_bloom_in_bloom(
+ head->sel_chain_bloom,
+ req->node_bloom) &&
+ _chain_good_for_element_name(
+ head->sel,
+ &(req->qname)) &&
+ _rule_good_for_media(
+ head->sel->rule,
+ req->media)) {
+ /* Found a match */
+ break;
+ }
+ }
+ }
+ head = head->next;
+ }
}
if (head == NULL)
@@ -759,7 +1057,9 @@ css_error _iterate_classes(const css_selector **current,
*
* If nothing further matches, CSS_OK will be returned and **next == NULL
*/
-css_error _iterate_ids(const css_selector **current,
+css_error _iterate_ids(
+ const struct css_hash_selection_requirments *req,
+ const css_selector **current,
const css_selector ***next)
{
const hash_entry *head = (const hash_entry *) current;
@@ -767,18 +1067,37 @@ css_error _iterate_ids(const css_selector **current,
lwc_error lerror = lwc_error_ok;
lwc_string *name, *ref;
- ref = _id_name(head->sel);
+ ref = req->id;
+ head = head->next;
- /* Look for the next selector that matches the key */
- while (match == false && (head = head->next) != NULL) {
- name = _id_name(head->sel);
- if (name == NULL)
- continue;
+ if (head != NULL && head->sel != NULL) {
+ /* Search through chain for first match */
+ while (head != NULL) {
+ name = _id_name(head->sel);
- lerror = lwc_string_caseless_isequal(
- ref, name, &match);
- if (lerror != lwc_error_ok)
- return css_error_from_lwc_error(lerror);
+ if (name != NULL) {
+ lerror = lwc_string_caseless_isequal(
+ ref, name, &match);
+ if (lerror != lwc_error_ok)
+ return css_error_from_lwc_error(lerror);
+
+ if (match && RULE_HAS_BYTECODE(head)) {
+ if (css_bloom_in_bloom(
+ head->sel_chain_bloom,
+ req->node_bloom) &&
+ _chain_good_for_element_name(
+ head->sel,
+ &req->qname) &&
+ _rule_good_for_media(
+ head->sel->rule,
+ req->media)) {
+ /* Found a match */
+ break;
+ }
+ }
+ }
+ head = head->next;
+ }
}
if (head == NULL)
@@ -798,15 +1117,32 @@ css_error _iterate_ids(const css_selector **current,
*
* If nothing further matches, CSS_OK will be returned and **next == NULL
*/
-css_error _iterate_universal(const css_selector **current,
+css_error _iterate_universal(
+ const struct css_hash_selection_requirments *req,
+ const css_selector **current,
const css_selector ***next)
{
const hash_entry *head = (const hash_entry *) current;
+ head = head->next;
- if (head->next == NULL)
+ if (head != NULL && head->sel != NULL) {
+ /* Search through chain for first match */
+ while (head != NULL) {
+ if (RULE_HAS_BYTECODE(head) &&
+ css_bloom_in_bloom(
+ head->sel_chain_bloom,
+ req->node_bloom) &&
+ _rule_good_for_media(head->sel->rule,
+ req->media)) {
+ /* Found a match */
+ break;
+ }
+ head = head->next;
+ }
+ }
+
+ if (head == NULL)
head = &empty_slot;
- else
- head = head->next;
(*next) = (const css_selector **) head;