uppercase mapping ß (U+00df) to ẞ (U+1E9E) (#134)

* uppercase(0x00df) = 0x1e9e * tests for titlecase and u+00df uppercase * NEWS, another test
author: Steven G. Johnson <stevenj@mit.edu> 2018-05-02 14:18:26 -0400
committer: GitHub <noreply@github.com> 2018-05-02 14:18:26 -0400
commit: d81308faba0cfb3fccf8c3b12446863c7b76ae32 (patch)
tree: a7c9e0da3b030c9f1182633fb37f4facafd0b63d /data
parent: 86394501342fc7174a069a2d52d53f31b7ee62da (diff)
download: libutf8proc-d81308faba0cfb3fccf8c3b12446863c7b76ae32.tar.gz
libutf8proc-d81308faba0cfb3fccf8c3b12446863c7b76ae32.tar.bz2
1 files changed, 13 insertions, 13 deletions
diff --git a/data/data_generator.rb b/data/data_generator.rb
index fa09617..972f542 100644
--- a/data/data_generator.rb
+++ b/data/data_generator.rb
@@ -137,13 +137,13 @@ def cpary2utf16encoded(array)
 end
 def cpary2c(array)
   return "UINT16_MAX" if array.nil? || array.length == 0
-  lencode = array.length - 1 #no sequence has len 0, so we encode len 1 as 0, len 2 as 1, ... 
+  lencode = array.length - 1 #no sequence has len 0, so we encode len 1 as 0, len 2 as 1, ...
   array = cpary2utf16encoded(array)
   if lencode >= 7 #we have only 3 bits for the length (which is already cutting it close. might need to change it to 2 bits in future Unicode versions)
-    array = [lencode] + array 
+    array = [lencode] + array
     lencode = 7
-  end  
-  idx = pushary(array) 
+  end
+  idx = pushary(array)
   raise "Array index out of bound" if idx > 0x1FFF
   return "#{idx | (lencode << 13)}"
 end
@@ -188,9 +188,10 @@ class UnicodeChar
     @decomp_mapping    = ($8=='') ? nil :
                          $8.split.collect { |element| element.hex }
     @bidi_mirrored     = ($13=='Y') ? true : false
-    @uppercase_mapping = ($16=='') ? nil : $16.hex
+    # issue #130: use nonstandard uppercase ß -> ẞ
+    @uppercase_mapping = ($16=='') ? (code==0x00df ? 0x1e9e : nil) : $16.hex
     @lowercase_mapping = ($17=='') ? nil : $17.hex
-    @titlecase_mapping = ($18=='') ? nil : $18.hex
+    @titlecase_mapping = ($18=='') ? (code==0x00df ? 0x1e9e : nil) : $18.hex
   end
   def case_folding
     $case_folding[code]
@@ -260,17 +261,17 @@ chars.each do |char|
     end
     unless comb2nd_indicies[dm1]
       comb2nd_indicies_sorted_keys << dm1
-      comb2nd_indicies[dm1] = comb2nd_indicies.keys.length 
+      comb2nd_indicies[dm1] = comb2nd_indicies.keys.length
     end
     comb_array[comb1st_indicies[dm0]] ||= []
     raise "Duplicate canonical mapping: #{char.code} #{dm0} #{dm1}" if comb_array[comb1st_indicies[dm0]][comb2nd_indicies[dm1]]
     comb_array[comb1st_indicies[dm0]][comb2nd_indicies[dm1]] = char.code
-    
+
     comb2nd_indicies_nonbasic[dm1] = true if char.code > 0xFFFF
   end
   char.c_decomp_mapping = cpary2c(char.decomp_mapping)
   char.c_case_folding = cpary2c(char.case_folding)
-end 
+end
 
 comb_indicies = {}
 cumoffset = 0
@@ -281,7 +282,7 @@ comb1st_indicies.each do |dm0, index|
   last = nil
   offset = 0
   comb2nd_indicies_sorted_keys.each_with_index do |dm1, b|
-    if comb_array[index][b] 
+    if comb_array[index][b]
       first = offset unless first
       last = offset
       last += 1 if comb2nd_indicies_nonbasic[dm1]
@@ -391,7 +392,7 @@ comb1st_indicies.keys.each_index do |a|
   offset = 0
   $stdout << comb1st_indicies_firstoffsets[a] << ", " << comb1st_indicies_lastoffsets[a] << ", "
   comb2nd_indicies_sorted_keys.each_with_index do |dm1, b|
-    break if offset > comb1st_indicies_lastoffsets[a] 
+    break if offset > comb1st_indicies_lastoffsets[a]
     if offset >= comb1st_indicies_firstoffsets[a]
       i += 1
       if i == 8
@@ -403,9 +404,8 @@ comb1st_indicies.keys.each_index do |a|
       $stdout << (v & 0xFFFF) << ", "
     end
     offset += 1
-    offset += 1 if comb2nd_indicies_nonbasic[dm1]    
+    offset += 1 if comb2nd_indicies_nonbasic[dm1]
   end
   $stdout  << "\n"
 end
 $stdout << "};\n\n"
-
author	Steven G. Johnson <stevenj@mit.edu>	2018-05-02 14:18:26 -0400
committer	GitHub <noreply@github.com>	2018-05-02 14:18:26 -0400
commit	d81308faba0cfb3fccf8c3b12446863c7b76ae32 (patch)
tree	a7c9e0da3b030c9f1182633fb37f4facafd0b63d /data
parent	86394501342fc7174a069a2d52d53f31b7ee62da (diff)
download	libutf8proc-d81308faba0cfb3fccf8c3b12446863c7b76ae32.tar.gz libutf8proc-d81308faba0cfb3fccf8c3b12446863c7b76ae32.tar.bz2