summaryrefslogtreecommitdiff
path: root/src/rufl_init.c
diff options
context:
space:
mode:
authorJohn-Mark Bell <jmb@netsurf-browser.org>2021-08-09 21:00:10 +0100
committerJohn-Mark Bell <jmb@netsurf-browser.org>2021-08-09 23:59:34 +0100
commit3c116337d2cad81fa27df86b1d9f995d8906fcd8 (patch)
treedf0ac68e21777613274fe4b0502107ca7de67556 /src/rufl_init.c
parent5be9bd8e833c84dbd55511046657e2fcf678d03c (diff)
downloadlibrufl-3c116337d2cad81fa27df86b1d9f995d8906fcd8.tar.gz
librufl-3c116337d2cad81fa27df86b1d9f995d8906fcd8.tar.bz2
Perform font substitution for astral characters, too.
This significantly reworks the construction of the substitution table (and hides its implementation from the rest of the library). It is no longer practical to use a directly-indexed array so, instead, we front it with a perfect hash function. The storage required for the (unoptimised) hash data is currently about 6 bits per entry. Implementing compression would reduce this to the order of ~2 bits per entry. As the resulting data structure is sparse, we must store the original Unicode codepoint value along with the identity of the font providing a suitable glyph. This has necessitated expanding the size of substitution table entries from 16 to 64 bits (of which 27 bits are currently unused). With the 55757 codepoint coverage I have been testing with, this results in an increase in the substitution table storage requirements from the original 128kB directly-indexed array (covering the Basic Multilingual Plane only) to a rather fatter 512kB (for the codepoint+font id array) + ~41kB of hash metadata. This is still ~25% the size of a linear array, however, so is not completely outrageous.
Diffstat (limited to 'src/rufl_init.c')
-rw-r--r--src/rufl_init.c66
1 files changed, 2 insertions, 64 deletions
diff --git a/src/rufl_init.c b/src/rufl_init.c
index d12fac0..25753d6 100644
--- a/src/rufl_init.c
+++ b/src/rufl_init.c
@@ -31,7 +31,6 @@ unsigned int rufl_family_list_entries = 0;
struct rufl_family_map_entry *rufl_family_map = 0;
os_error *rufl_fm_error = 0;
void *rufl_family_menu = 0;
-unsigned short *rufl_substitution_table = 0;
struct rufl_cache_entry rufl_cache[rufl_CACHE_SIZE];
int rufl_cache_time = 0;
bool rufl_old_font_manager = false;
@@ -87,7 +86,6 @@ static rufl_code rufl_init_read_encoding(font_f font,
void *pw);
static int rufl_glyph_map_cmp(const void *keyval, const void *datum);
static int rufl_unicode_map_cmp(const void *z1, const void *z2);
-static rufl_code rufl_init_substitution_table(void);
static rufl_code rufl_save_cache(void);
static rufl_code rufl_load_cache(void);
static int rufl_font_list_cmp(const void *keyval, const void *datum);
@@ -214,9 +212,9 @@ rufl_code rufl_init(void)
xhourglass_leds(2, 0, 0);
xhourglass_colours(0x0000ff, 0x00ffff, &old_sand, &old_glass);
- code = rufl_init_substitution_table();
+ code = rufl_substitution_table_init();
if (code != rufl_OK) {
- LOG("rufl_init_substitution_table: 0x%x", code);
+ LOG("rufl_substitution_table_init: 0x%x", code);
rufl_quit();
xhourglass_off();
return code;
@@ -1332,66 +1330,6 @@ int rufl_unicode_map_cmp(const void *z1, const void *z2)
/**
- * Construct the font substitution table.
- */
-
-rufl_code rufl_init_substitution_table(void)
-{
- unsigned char z;
- unsigned int i;
- unsigned int block, byte, bit;
- unsigned int u;
- unsigned int index;
- const struct rufl_character_set *charset;
-
- rufl_substitution_table = malloc(65536 *
- sizeof rufl_substitution_table[0]);
- if (!rufl_substitution_table) {
- LOG("malloc(%zu) failed", 65536 *
- sizeof rufl_substitution_table[0]);
- return rufl_OUT_OF_MEMORY;
- }
-
- for (u = 0; u != 0x10000; u++)
- rufl_substitution_table[u] = NOT_AVAILABLE;
-
- for (i = 0; i != rufl_font_list_entries; i++) {
- charset = rufl_font_list[i].charset;
- if (!charset)
- continue;
- for (block = 0; block != 256; block++) {
- if (charset->index[block] == BLOCK_EMPTY)
- continue;
- if (charset->index[block] == BLOCK_FULL) {
- for (u = block << 8; u != (block << 8) + 256;
- u++) {
- if (rufl_substitution_table[u] ==
- NOT_AVAILABLE)
- rufl_substitution_table[u] = i;
- }
- continue;
- }
- index = charset->index[block];
- for (byte = 0; byte != 32; byte++) {
- z = charset->block[index][byte];
- if (z == 0)
- continue;
- u = (block << 8) | (byte << 3);
- for (bit = 0; bit != 8; bit++, u++) {
- if (rufl_substitution_table[u] ==
- NOT_AVAILABLE &&
- z & (1 << bit))
- rufl_substitution_table[u] = i;
- }
- }
- }
- }
-
- return rufl_OK;
-}
-
-
-/**
* Save character sets to cache.
*/