diff options
Diffstat (limited to 'src/select/hash.c')
-rw-r--r-- | src/select/hash.c | 466 |
1 files changed, 401 insertions, 65 deletions
diff --git a/src/select/hash.c b/src/select/hash.c index 37492c1..5420d6d 100644 --- a/src/select/hash.c +++ b/src/select/hash.c @@ -12,8 +12,11 @@ #include "select/hash.h" #include "utils/utils.h" +#undef PRINT_CHAIN_BLOOM_DETAILS + typedef struct hash_entry { const css_selector *sel; + css_bloom sel_chain_bloom[CSS_BLOOM_SIZE]; struct hash_entry *next; } hash_entry; @@ -41,7 +44,7 @@ struct css_selector_hash { static hash_entry empty_slot; -static inline uint32_t _hash_name(lwc_string *name); +static inline uint32_t _hash_name(const lwc_string *name); static inline lwc_string *_class_name(const css_selector *selector); static inline lwc_string *_id_name(const css_selector *selector); static css_error _insert_into_chain(css_selector_hash *ctx, hash_entry *head, @@ -49,15 +52,88 @@ static css_error _insert_into_chain(css_selector_hash *ctx, hash_entry *head, static css_error _remove_from_chain(css_selector_hash *ctx, hash_entry *head, const css_selector *selector); -static css_error _iterate_elements(const css_selector **current, +static css_error _iterate_elements( + const struct css_hash_selection_requirments *req, + const css_selector **current, const css_selector ***next); -static css_error _iterate_classes(const css_selector **current, +static css_error _iterate_classes( + const struct css_hash_selection_requirments *req, + const css_selector **current, const css_selector ***next); -static css_error _iterate_ids(const css_selector **current, +static css_error _iterate_ids( + const struct css_hash_selection_requirments *req, + const css_selector **current, const css_selector ***next); -static css_error _iterate_universal(const css_selector **current, +static css_error _iterate_universal( + const struct css_hash_selection_requirments *req, + const css_selector **current, const css_selector ***next); + +/* No bytecode if rule body is empty or wholly invalid -- + * Only interested in rules with bytecode */ +#define RULE_HAS_BYTECODE(r) \ + (((css_rule_selector *)(r->sel->rule))->style != NULL) + + +/** + * Test first selector on selector chain for having matching element name. + * + * If source of rule is element or universal hash, we know the + * element name is a match. If it comes from the class or id hash, + * we have to test for a match. + * + * \param selector selector chain head to test + * \param qname element name to look for + * \return true iff chain head doesn't fail to match element name + */ +static inline bool _chain_good_for_element_name(const css_selector *selector, + const css_qname *qname) +{ + if (lwc_string_length(selector->data.qname.name) != 1 || + lwc_string_data(selector->data.qname.name)[0] != '*') { + bool match; + if (lwc_string_caseless_isequal( + selector->data.qname.name, qname->name, + &match) == lwc_error_ok && match == false) { + return false; + } + } + + return true; +} + +/** + * Test whether the rule applies for current media. + * + * \param rule Rule to test + * \meaid media Current media type(s) + * \return true iff chain's rule applies for media + */ +static inline bool _rule_good_for_media(const css_rule *rule, uint64_t media) +{ + bool applies = true; + const css_rule *ancestor = rule; + + while (ancestor != NULL) { + const css_rule_media *m = (const css_rule_media *) ancestor; + + if (ancestor->type == CSS_RULE_MEDIA && + (m->media & media) == 0) { + applies = false; + break; + } + + if (ancestor->ptype != CSS_RULE_PARENT_STYLESHEET) + ancestor = ancestor->parent; + else + ancestor = NULL; + } + + return applies; +} + + /** * Create a hash * @@ -288,19 +364,19 @@ css_error css__selector_hash_remove(css_selector_hash *hash, * If nothing matches, CSS_OK will be returned and **matched == NULL */ css_error css__selector_hash_find(css_selector_hash *hash, - css_qname *qname, + const struct css_hash_selection_requirments *req, css_selector_hash_iterator *iterator, const css_selector ***matched) { uint32_t index, mask; hash_entry *head; - if (hash == NULL || qname == NULL || iterator == NULL || matched == NULL) + if (hash == NULL || req == NULL || iterator == NULL || matched == NULL) return CSS_BADPARM; /* Find index */ mask = hash->elements.n_slots - 1; - index = _hash_name(qname->name) & mask; + index = _hash_name(req->qname.name) & mask; head = &hash->elements.slots[index]; @@ -311,13 +387,22 @@ css_error css__selector_hash_find(css_selector_hash *hash, bool match = false; lerror = lwc_string_caseless_isequal( - qname->name, head->sel->data.qname.name, + req->qname.name, + head->sel->data.qname.name, &match); if (lerror != lwc_error_ok) return css_error_from_lwc_error(lerror); - if (match) - break; + if (match && RULE_HAS_BYTECODE(head)) { + if (css_bloom_in_bloom( + head->sel_chain_bloom, + req->node_bloom) && + _rule_good_for_media(head->sel->rule, + req->media)) { + /* Found a match */ + break; + } + } head = head->next; } @@ -344,19 +429,20 @@ css_error css__selector_hash_find(css_selector_hash *hash, * If nothing matches, CSS_OK will be returned and **matched == NULL */ css_error css__selector_hash_find_by_class(css_selector_hash *hash, - lwc_string *name, + const struct css_hash_selection_requirments *req, css_selector_hash_iterator *iterator, const css_selector ***matched) { uint32_t index, mask; hash_entry *head; - if (hash == NULL || name == NULL || iterator == NULL || matched == NULL) + if (hash == NULL || req == NULL || req->class == NULL || + iterator == NULL || matched == NULL) return CSS_BADPARM; /* Find index */ mask = hash->classes.n_slots - 1; - index = _hash_name(name) & mask; + index = _hash_name(req->class) & mask; head = &hash->classes.slots[index]; @@ -370,12 +456,24 @@ css_error css__selector_hash_find_by_class(css_selector_hash *hash, n = _class_name(head->sel); if (n != NULL) { lerror = lwc_string_caseless_isequal( - name, n, &match); + req->class, n, &match); if (lerror != lwc_error_ok) return css_error_from_lwc_error(lerror); - if (match) - break; + if (match && RULE_HAS_BYTECODE(head)) { + if (css_bloom_in_bloom( + head->sel_chain_bloom, + req->node_bloom) && + _chain_good_for_element_name( + head->sel, + &(req->qname)) && + _rule_good_for_media( + head->sel->rule, + req->media)) { + /* Found a match */ + break; + } + } } head = head->next; @@ -403,19 +501,20 @@ css_error css__selector_hash_find_by_class(css_selector_hash *hash, * If nothing matches, CSS_OK will be returned and **matched == NULL */ css_error css__selector_hash_find_by_id(css_selector_hash *hash, - lwc_string *name, + const struct css_hash_selection_requirments *req, css_selector_hash_iterator *iterator, const css_selector ***matched) { uint32_t index, mask; hash_entry *head; - if (hash == NULL || name == NULL || iterator == NULL || matched == NULL) + if (hash == NULL || req == NULL || req->id == NULL || + iterator == NULL || matched == NULL) return CSS_BADPARM; /* Find index */ mask = hash->ids.n_slots - 1; - index = _hash_name(name) & mask; + index = _hash_name(req->id) & mask; head = &hash->ids.slots[index]; @@ -429,12 +528,24 @@ css_error css__selector_hash_find_by_id(css_selector_hash *hash, n = _id_name(head->sel); if (n != NULL) { lerror = lwc_string_caseless_isequal( - name, n, &match); + req->id, n, &match); if (lerror != lwc_error_ok) return css_error_from_lwc_error(lerror); - if (match) - break; + if (match && RULE_HAS_BYTECODE(head)) { + if (css_bloom_in_bloom( + head->sel_chain_bloom, + req->node_bloom) && + _chain_good_for_element_name( + head->sel, + &req->qname) && + _rule_good_for_media( + head->sel->rule, + req->media)) { + /* Found a match */ + break; + } + } } head = head->next; @@ -461,14 +572,39 @@ css_error css__selector_hash_find_by_id(css_selector_hash *hash, * If nothing matches, CSS_OK will be returned and **matched == NULL */ css_error css__selector_hash_find_universal(css_selector_hash *hash, + const struct css_hash_selection_requirments *req, css_selector_hash_iterator *iterator, const css_selector ***matched) { - if (hash == NULL || iterator == NULL || matched == NULL) + hash_entry *head; + + if (hash == NULL || req == NULL || iterator == NULL || matched == NULL) return CSS_BADPARM; + head = &hash->universal; + + if (head->sel != NULL) { + /* Search through chain for first match */ + while (head != NULL) { + if (RULE_HAS_BYTECODE(head) && + css_bloom_in_bloom( + head->sel_chain_bloom, + req->node_bloom) && + _rule_good_for_media(head->sel->rule, + req->media)) { + /* Found a match */ + break; + } + + head = head->next; + } + + if (head == NULL) + head = &empty_slot; + } + (*iterator) = _iterate_universal; - (*matched) = (const css_selector **) &hash->universal; + (*matched) = (const css_selector **) head; return CSS_OK; } @@ -503,7 +639,7 @@ css_error css__selector_hash_size(css_selector_hash *hash, size_t *size) * \param name Name to hash * \return hash value */ -uint32_t _hash_name(lwc_string *name) +uint32_t _hash_name(const lwc_string *name) { uint32_t z = 0x811c9dc5; const char *data = lwc_string_data(name); @@ -573,6 +709,119 @@ lwc_string *_id_name(const css_selector *selector) return name; } + +/** + * Add a selector detail to the bloom filter, if the detail is relevant. + * + * \param d Selector detail to consider and add if relevant + * \param bloom Bloom filter to add to. + */ +static inline void _chain_bloom_add_detail( + const css_selector_detail *d, + css_bloom bloom[CSS_BLOOM_SIZE]) +{ + lwc_string *add = NULL; /* String to add to bloom */ + + switch (d->type) { + case CSS_SELECTOR_ELEMENT: + /* Don't add universal element selector to bloom */ + if (lwc_string_length(d->qname.name) == 1 && + lwc_string_data(d->qname.name)[0] == '*') { + return; + } + + /* TODO: Remain case sensitive for XML */ + if (d->qname.name->insensitive == NULL) { + lwc__intern_caseless_string(d->qname.name); + } + add = d->qname.name->insensitive; + break; + + case CSS_SELECTOR_CLASS: + case CSS_SELECTOR_ID: + /* TODO: Remain case sensitive in standards mode */ + if (d->qname.name->insensitive == NULL) { + lwc__intern_caseless_string(d->qname.name); + } + add = d->qname.name->insensitive; + break; + + default: + return; + } + + /* Don't really care if intern for caseless string failed, if we're + * out of memory, something else will panic. Everything still works + * if the string isn't added the the rule bloom. Just less optimally. + */ + if (add != NULL) { + css_bloom_add_hash(bloom, lwc_string_hash_value(add)); + } + + return; +} + + +/** + * Generate a selector chain's bloom filter + * + * \param s Selector at head of selector chain + * \param bloom Bloom filter to generate. + */ +static void _chain_bloom_generate(const css_selector *s, + css_bloom bloom[CSS_BLOOM_SIZE]) +{ + css_bloom_init(bloom); + + /* Work back through selector chain... */ + do { + /* ...looking for Ancestor/Parent combinators */ + if (s->data.comb == CSS_COMBINATOR_ANCESTOR || + s->data.comb == CSS_COMBINATOR_PARENT) { + const css_selector_detail *d = &s->combinator->data; + while (d != NULL) { + if (d->negate == 0) { + _chain_bloom_add_detail(d, bloom); + } + d = (d->next != 0) ? d + 1 : NULL; + } + } + + s = s->combinator; + } while (s != NULL); +} + +#ifdef PRINT_CHAIN_BLOOM_DETAILS +/* Count bits set in uint32_t */ +static int bits_set(uint32_t n) { + n = n - ((n >> 1) & 0x55555555); + n = (n & 0x33333333) + ((n >> 2) & 0x33333333); + n = (n + (n >> 4)) & 0x0f0f0f0f; + n = n + (n >> 8); + n = n + (n >> 16); + return n & 0x0000003f; +} + +/* Selector chain bloom instrumentation ouput display. */ +static void print_chain_bloom_details(css_bloom bloom[CSS_BLOOM_SIZE]) +{ + printf("Chain bloom:\t"); + int total = 0, i; + int set[4]; + for (i = 0; i < CSS_BLOOM_SIZE; i++) { + set[i] = bits_set(bloom[i]); + total += set[i]; + } + printf("bits set:"); + for (i = 0; i < CSS_BLOOM_SIZE; i++) { + printf(" %2i", set[i]); + } + printf(" (total:%4i of %i) saturation: %3i%%\n", total, + (32 * CSS_BLOOM_SIZE), + (100 * total) / (32 * CSS_BLOOM_SIZE)); +} +#endif + /** * Insert a selector into a hash chain * @@ -588,6 +837,11 @@ css_error _insert_into_chain(css_selector_hash *ctx, hash_entry *head, if (head->sel == NULL) { head->sel = selector; head->next = NULL; + _chain_bloom_generate(selector, head->sel_chain_bloom); + +#ifdef PRINT_CHAIN_BLOOM_DETAILS + print_chain_bloom_details(head->sel_chain_bloom); +#endif } else { hash_entry *search = head; hash_entry *prev = NULL; @@ -612,13 +866,19 @@ css_error _insert_into_chain(css_selector_hash *ctx, hash_entry *head, search = search->next; } while (search != NULL); + entry->sel = selector; + _chain_bloom_generate(selector, entry->sel_chain_bloom); + +#ifdef PRINT_CHAIN_BLOOM_DETAILS + print_chain_bloom_details(entry->sel_chain_bloom); +#endif + if (prev == NULL) { - entry->sel = head->sel; - entry->next = head->next; - head->sel = selector; - head->next = entry; + hash_entry temp = *entry; + *entry = *head; + temp.next = entry; + *head = temp; } else { - entry->sel = selector; entry->next = prev->next; prev->next = entry; } @@ -685,7 +945,9 @@ css_error _remove_from_chain(css_selector_hash *ctx, hash_entry *head, * * If nothing further matches, CSS_OK will be returned and **next == NULL */ -css_error _iterate_elements(const css_selector **current, +css_error _iterate_elements( + const struct css_hash_selection_requirments *req, + const css_selector **current, const css_selector ***next) { const hash_entry *head = (const hash_entry *) current; @@ -693,14 +955,29 @@ css_error _iterate_elements(const css_selector **current, lwc_error lerror = lwc_error_ok; lwc_string *name; - name = head->sel->data.qname.name; + name = req->qname.name; + head = head->next; + + if (head != NULL && head->sel != NULL) { + /* Search through chain for first match */ + while (head != NULL) { + lerror = lwc_string_caseless_isequal(name, + head->sel->data.qname.name, &match); + if (lerror != lwc_error_ok) + return css_error_from_lwc_error(lerror); - /* Look for the next selector that matches the key */ - while (match == false && (head = head->next) != NULL) { - lerror = lwc_string_caseless_isequal( - name, head->sel->data.qname.name, &match); - if (lerror != lwc_error_ok) - return css_error_from_lwc_error(lerror); + if (match && RULE_HAS_BYTECODE(head)) { + if (css_bloom_in_bloom( + head->sel_chain_bloom, + req->node_bloom) && + _rule_good_for_media(head->sel->rule, + req->media)) { + /* Found a match */ + break; + } + } + head = head->next; + } } if (head == NULL) @@ -720,7 +997,9 @@ css_error _iterate_elements(const css_selector **current, * * If nothing further matches, CSS_OK will be returned and **next == NULL */ -css_error _iterate_classes(const css_selector **current, +css_error _iterate_classes( + const struct css_hash_selection_requirments *req, + const css_selector **current, const css_selector ***next) { const hash_entry *head = (const hash_entry *) current; @@ -728,18 +1007,37 @@ css_error _iterate_classes(const css_selector **current, lwc_error lerror = lwc_error_ok; lwc_string *name, *ref; - ref = _class_name(head->sel); + ref = req->class; + head = head->next; - /* Look for the next selector that matches the key */ - while (match == false && (head = head->next) != NULL) { - name = _class_name(head->sel); - if (name == NULL) - continue; + if (head != NULL && head->sel != NULL) { + /* Search through chain for first match */ + while (head != NULL) { + name = _class_name(head->sel); + + if (name != NULL) { + lerror = lwc_string_caseless_isequal( + ref, name, &match); + if (lerror != lwc_error_ok) + return css_error_from_lwc_error(lerror); - lerror = lwc_string_caseless_isequal( - ref, name, &match); - if (lerror != lwc_error_ok) - return css_error_from_lwc_error(lerror); + if (match && RULE_HAS_BYTECODE(head)) { + if (css_bloom_in_bloom( + head->sel_chain_bloom, + req->node_bloom) && + _chain_good_for_element_name( + head->sel, + &(req->qname)) && + _rule_good_for_media( + head->sel->rule, + req->media)) { + /* Found a match */ + break; + } + } + } + head = head->next; + } } if (head == NULL) @@ -759,7 +1057,9 @@ css_error _iterate_classes(const css_selector **current, * * If nothing further matches, CSS_OK will be returned and **next == NULL */ -css_error _iterate_ids(const css_selector **current, +css_error _iterate_ids( + const struct css_hash_selection_requirments *req, + const css_selector **current, const css_selector ***next) { const hash_entry *head = (const hash_entry *) current; @@ -767,18 +1067,37 @@ css_error _iterate_ids(const css_selector **current, lwc_error lerror = lwc_error_ok; lwc_string *name, *ref; - ref = _id_name(head->sel); + ref = req->id; + head = head->next; - /* Look for the next selector that matches the key */ - while (match == false && (head = head->next) != NULL) { - name = _id_name(head->sel); - if (name == NULL) - continue; + if (head != NULL && head->sel != NULL) { + /* Search through chain for first match */ + while (head != NULL) { + name = _id_name(head->sel); - lerror = lwc_string_caseless_isequal( - ref, name, &match); - if (lerror != lwc_error_ok) - return css_error_from_lwc_error(lerror); + if (name != NULL) { + lerror = lwc_string_caseless_isequal( + ref, name, &match); + if (lerror != lwc_error_ok) + return css_error_from_lwc_error(lerror); + + if (match && RULE_HAS_BYTECODE(head)) { + if (css_bloom_in_bloom( + head->sel_chain_bloom, + req->node_bloom) && + _chain_good_for_element_name( + head->sel, + &req->qname) && + _rule_good_for_media( + head->sel->rule, + req->media)) { + /* Found a match */ + break; + } + } + } + head = head->next; + } } if (head == NULL) @@ -798,15 +1117,32 @@ css_error _iterate_ids(const css_selector **current, * * If nothing further matches, CSS_OK will be returned and **next == NULL */ -css_error _iterate_universal(const css_selector **current, +css_error _iterate_universal( + const struct css_hash_selection_requirments *req, + const css_selector **current, const css_selector ***next) { const hash_entry *head = (const hash_entry *) current; + head = head->next; - if (head->next == NULL) + if (head != NULL && head->sel != NULL) { + /* Search through chain for first match */ + while (head != NULL) { + if (RULE_HAS_BYTECODE(head) && + css_bloom_in_bloom( + head->sel_chain_bloom, + req->node_bloom) && + _rule_good_for_media(head->sel->rule, + req->media)) { + /* Found a match */ + break; + } + head = head->next; + } + } + + if (head == NULL) head = &empty_slot; - else - head = head->next; (*next) = (const css_selector **) head; |