diff options
author | John-Mark Bell <jmb@netsurf-browser.org> | 2021-08-09 21:00:10 +0100 |
---|---|---|
committer | John-Mark Bell <jmb@netsurf-browser.org> | 2021-08-09 23:59:34 +0100 |
commit | 3c116337d2cad81fa27df86b1d9f995d8906fcd8 (patch) | |
tree | df0ac68e21777613274fe4b0502107ca7de67556 /src/rufl_init.c | |
parent | 5be9bd8e833c84dbd55511046657e2fcf678d03c (diff) | |
download | librufl-3c116337d2cad81fa27df86b1d9f995d8906fcd8.tar.gz librufl-3c116337d2cad81fa27df86b1d9f995d8906fcd8.tar.bz2 |
Perform font substitution for astral characters, too.
This significantly reworks the construction of the substitution
table (and hides its implementation from the rest of the library).
It is no longer practical to use a directly-indexed array so,
instead, we front it with a perfect hash function. The storage
required for the (unoptimised) hash data is currently about 6 bits
per entry. Implementing compression would reduce this to the order
of ~2 bits per entry.
As the resulting data structure is sparse, we must store the
original Unicode codepoint value along with the identity of the
font providing a suitable glyph. This has necessitated expanding
the size of substitution table entries from 16 to 64 bits (of
which 27 bits are currently unused).
With the 55757 codepoint coverage I have been testing with, this
results in an increase in the substitution table storage
requirements from the original 128kB directly-indexed array
(covering the Basic Multilingual Plane only) to a rather fatter
512kB (for the codepoint+font id array) + ~41kB of hash metadata.
This is still ~25% the size of a linear array, however, so is not
completely outrageous.
Diffstat (limited to 'src/rufl_init.c')
-rw-r--r-- | src/rufl_init.c | 66 |
1 files changed, 2 insertions, 64 deletions
diff --git a/src/rufl_init.c b/src/rufl_init.c index d12fac0..25753d6 100644 --- a/src/rufl_init.c +++ b/src/rufl_init.c @@ -31,7 +31,6 @@ unsigned int rufl_family_list_entries = 0; struct rufl_family_map_entry *rufl_family_map = 0; os_error *rufl_fm_error = 0; void *rufl_family_menu = 0; -unsigned short *rufl_substitution_table = 0; struct rufl_cache_entry rufl_cache[rufl_CACHE_SIZE]; int rufl_cache_time = 0; bool rufl_old_font_manager = false; @@ -87,7 +86,6 @@ static rufl_code rufl_init_read_encoding(font_f font, void *pw); static int rufl_glyph_map_cmp(const void *keyval, const void *datum); static int rufl_unicode_map_cmp(const void *z1, const void *z2); -static rufl_code rufl_init_substitution_table(void); static rufl_code rufl_save_cache(void); static rufl_code rufl_load_cache(void); static int rufl_font_list_cmp(const void *keyval, const void *datum); @@ -214,9 +212,9 @@ rufl_code rufl_init(void) xhourglass_leds(2, 0, 0); xhourglass_colours(0x0000ff, 0x00ffff, &old_sand, &old_glass); - code = rufl_init_substitution_table(); + code = rufl_substitution_table_init(); if (code != rufl_OK) { - LOG("rufl_init_substitution_table: 0x%x", code); + LOG("rufl_substitution_table_init: 0x%x", code); rufl_quit(); xhourglass_off(); return code; @@ -1332,66 +1330,6 @@ int rufl_unicode_map_cmp(const void *z1, const void *z2) /** - * Construct the font substitution table. - */ - -rufl_code rufl_init_substitution_table(void) -{ - unsigned char z; - unsigned int i; - unsigned int block, byte, bit; - unsigned int u; - unsigned int index; - const struct rufl_character_set *charset; - - rufl_substitution_table = malloc(65536 * - sizeof rufl_substitution_table[0]); - if (!rufl_substitution_table) { - LOG("malloc(%zu) failed", 65536 * - sizeof rufl_substitution_table[0]); - return rufl_OUT_OF_MEMORY; - } - - for (u = 0; u != 0x10000; u++) - rufl_substitution_table[u] = NOT_AVAILABLE; - - for (i = 0; i != rufl_font_list_entries; i++) { - charset = rufl_font_list[i].charset; - if (!charset) - continue; - for (block = 0; block != 256; block++) { - if (charset->index[block] == BLOCK_EMPTY) - continue; - if (charset->index[block] == BLOCK_FULL) { - for (u = block << 8; u != (block << 8) + 256; - u++) { - if (rufl_substitution_table[u] == - NOT_AVAILABLE) - rufl_substitution_table[u] = i; - } - continue; - } - index = charset->index[block]; - for (byte = 0; byte != 32; byte++) { - z = charset->block[index][byte]; - if (z == 0) - continue; - u = (block << 8) | (byte << 3); - for (bit = 0; bit != 8; bit++, u++) { - if (rufl_substitution_table[u] == - NOT_AVAILABLE && - z & (1 << bit)) - rufl_substitution_table[u] = i; - } - } - } - } - - return rufl_OK; -} - - -/** * Save character sets to cache. */ |