summaryrefslogtreecommitdiff
path: root/data/data_generator.rb
diff options
context:
space:
mode:
authorSteven G. Johnson <stevenj@mit.edu>2018-07-24 13:18:48 -0400
committerGitHub <noreply@github.com>2018-07-24 13:18:48 -0400
commitd4a58cfec5345bbb2bb0db1e85172a8cff278da7 (patch)
tree01a7a3f741550c3a1dbec3b49a70f5d5f2061d6f /data/data_generator.rb
parent02f4e1890cf8135b609b404c58ac7e8b27136ad6 (diff)
downloadlibutf8proc-d4a58cfec5345bbb2bb0db1e85172a8cff278da7.tar.gz
libutf8proc-d4a58cfec5345bbb2bb0db1e85172a8cff278da7.tar.bz2
update data and algorithms for Unicode 11 (#140)
Diffstat (limited to 'data/data_generator.rb')
-rw-r--r--data/data_generator.rb13
1 files changed, 13 insertions, 0 deletions
diff --git a/data/data_generator.rb b/data/data_generator.rb
index 795652c..fe549f8 100644
--- a/data/data_generator.rb
+++ b/data/data_generator.rb
@@ -85,6 +85,19 @@ $grapheme_boundclass_list.each_line do |entry|
end
end
+$emoji_data_list = File.read("emoji-data.txt")
+$emoji_data_list.each_line do |entry|
+ if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*Extended_Pictographic\W/
+ $1.hex.upto($2.hex) { |e2| $grapheme_boundclass[e2] = "UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC" }
+ elsif entry =~ /^([0-9A-F]+)\s*;\s*Extended_Pictographic\W/
+ $grapheme_boundclass[$1.hex] = "UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC"
+ elsif entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*Emoji_Modifier\W/
+ $1.hex.upto($2.hex) { |e2| $grapheme_boundclass[e2] = "UTF8PROC_BOUNDCLASS_EXTEND" }
+ elsif entry =~ /^([0-9A-F]+)\s*;\s*Emoji_Modifier\W/
+ $grapheme_boundclass[$1.hex] = "UTF8PROC_BOUNDCLASS_EXTEND"
+ end
+end
+
$charwidth_list = File.read("CharWidths.txt")
$charwidth = Hash.new(0)
$charwidth_list.each_line do |entry|