summaryrefslogtreecommitdiff
path: root/data_generator.rb
blob: 0db0331c8a9fc3832889b4f870a2cfef500b584c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
#!/usr/pkg/bin/ruby

#  This file was used to generate the 'unicode_data.c' file by parsing the
#  Unicode data file 'UnicodeData.txt' of the Unicode Character Database.
#  It is included for informational purposes only and not intended for
#  production use.


#  Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
#
#  Permission is hereby granted, free of charge, to any person obtaining a
#  copy of this software and associated documentation files (the "Software"),
#  to deal in the Software without restriction, including without limitation
#  the rights to use, copy, modify, merge, publish, distribute, sublicense,
#  and/or sell copies of the Software, and to permit persons to whom the
#  Software is furnished to do so, subject to the following conditions:
#
#  The above copyright notice and this permission notice shall be included in
#  all copies or substantial portions of the Software.
#
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
#  DEALINGS IN THE SOFTWARE.


#  This file contains derived data from a modified version of the
#  Unicode data files. The following license applies to that data:
#
#  COPYRIGHT AND PERMISSION NOTICE
#
#  Copyright (c) 1991-2007 Unicode, Inc. All rights reserved. Distributed
#  under the Terms of Use in http://www.unicode.org/copyright.html.
#
#  Permission is hereby granted, free of charge, to any person obtaining a
#  copy of the Unicode data files and any associated documentation (the "Data
#  Files") or Unicode software and any associated documentation (the
#  "Software") to deal in the Data Files or Software without restriction,
#  including without limitation the rights to use, copy, modify, merge,
#  publish, distribute, and/or sell copies of the Data Files or Software, and
#  to permit persons to whom the Data Files or Software are furnished to do
#  so, provided that (a) the above copyright notice(s) and this permission
#  notice appear with all copies of the Data Files or Software, (b) both the
#  above copyright notice(s) and this permission notice appear in associated
#  documentation, and (c) there is clear notice in each modified Data File or
#  in the Software as well as in the documentation associated with the Data
#  File(s) or Software that the data or software has been modified.
#
#  THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
#  KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
#  THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
#  INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR
#  CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
#  USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
#  TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
#  PERFORMANCE OF THE DATA FILES OR SOFTWARE.
#
#  Except as contained in this notice, the name of a copyright holder shall
#  not be used in advertising or otherwise to promote the sale, use or other
#  dealings in these Data Files or Software without prior written
#  authorization of the copyright holder.



$ignorable_list = <<END_OF_LIST
0000..0008    ; Default_Ignorable_Code_Point # Cc   [9] <control-0000>..<control-0008>
000E..001F    ; Default_Ignorable_Code_Point # Cc  [18] <control-000E>..<control-001F>
007F..0084    ; Default_Ignorable_Code_Point # Cc   [6] <control-007F>..<control-0084>
0086..009F    ; Default_Ignorable_Code_Point # Cc  [26] <control-0086>..<control-009F>
00AD          ; Default_Ignorable_Code_Point # Cf       SOFT HYPHEN
034F          ; Default_Ignorable_Code_Point # Mn       COMBINING GRAPHEME JOINER
0600..0603    ; Default_Ignorable_Code_Point # Cf   [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
06DD          ; Default_Ignorable_Code_Point # Cf       ARABIC END OF AYAH
070F          ; Default_Ignorable_Code_Point # Cf       SYRIAC ABBREVIATION MARK
115F..1160    ; Default_Ignorable_Code_Point # Lo   [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER
17B4..17B5    ; Default_Ignorable_Code_Point # Cf   [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
180B..180D    ; Default_Ignorable_Code_Point # Mn   [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
200B..200F    ; Default_Ignorable_Code_Point # Cf   [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK
202A..202E    ; Default_Ignorable_Code_Point # Cf   [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
2060..2063    ; Default_Ignorable_Code_Point # Cf   [4] WORD JOINER..INVISIBLE SEPARATOR
2064..2069    ; Default_Ignorable_Code_Point # Cn   [6] <reserved-2064>..<reserved-2069>
206A..206F    ; Default_Ignorable_Code_Point # Cf   [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
3164          ; Default_Ignorable_Code_Point # Lo       HANGUL FILLER
D800..DFFF    ; Default_Ignorable_Code_Point # Cs [2048] <surrogate-D800>..<surrogate-DFFF>
FE00..FE0F    ; Default_Ignorable_Code_Point # Mn  [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
FEFF          ; Default_Ignorable_Code_Point # Cf       ZERO WIDTH NO-BREAK SPACE
FFA0          ; Default_Ignorable_Code_Point # Lo       HALFWIDTH HANGUL FILLER
FFF0..FFF8    ; Default_Ignorable_Code_Point # Cn   [9] <reserved-FFF0>..<reserved-FFF8>
1D173..1D17A  ; Default_Ignorable_Code_Point # Cf   [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
E0001         ; Default_Ignorable_Code_Point # Cf       LANGUAGE TAG
E0002..E001F  ; Default_Ignorable_Code_Point # Cn  [30] <reserved-E0002>..<reserved-E001F>
E0020..E007F  ; Default_Ignorable_Code_Point # Cf  [96] TAG SPACE..CANCEL TAG
E0080..E00FF  ; Default_Ignorable_Code_Point # Cn [128] <reserved-E0080>..<reserved-E00FF>
E0100..E01EF  ; Default_Ignorable_Code_Point # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
E01F0..E0FFF  ; Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
END_OF_LIST

$ignorable = []
$ignorable_list.each do |entry|
  if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
    $1.hex.upto($2.hex) { |e2| $ignorable << e2 }
  elsif entry =~ /^[0-9A-F]+/
    $ignorable << $&.hex
  end
end

$grapheme_extend_list = <<END_OF_LIST
0300..036F    ; Grapheme_Extend # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
0483..0486    ; Grapheme_Extend # Mn   [4] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PSILI PNEUMATA
0488..0489    ; Grapheme_Extend # Me   [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
0591..05BD    ; Grapheme_Extend # Mn  [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
05BF          ; Grapheme_Extend # Mn       HEBREW POINT RAFE
05C1..05C2    ; Grapheme_Extend # Mn   [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
05C4..05C5    ; Grapheme_Extend # Mn   [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
05C7          ; Grapheme_Extend # Mn       HEBREW POINT QAMATS QATAN
0610..0615    ; Grapheme_Extend # Mn   [6] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL HIGH TAH
064B..065E    ; Grapheme_Extend # Mn  [20] ARABIC FATHATAN..ARABIC FATHA WITH TWO DOTS
0670          ; Grapheme_Extend # Mn       ARABIC LETTER SUPERSCRIPT ALEF
06D6..06DC    ; Grapheme_Extend # Mn   [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
06DE          ; Grapheme_Extend # Me       ARABIC START OF RUB EL HIZB
06DF..06E4    ; Grapheme_Extend # Mn   [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
06E7..06E8    ; Grapheme_Extend # Mn   [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
06EA..06ED    ; Grapheme_Extend # Mn   [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
0711          ; Grapheme_Extend # Mn       SYRIAC LETTER SUPERSCRIPT ALAPH
0730..074A    ; Grapheme_Extend # Mn  [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
07A6..07B0    ; Grapheme_Extend # Mn  [11] THAANA ABAFILI..THAANA SUKUN
07EB..07F3    ; Grapheme_Extend # Mn   [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
0901..0902    ; Grapheme_Extend # Mn   [2] DEVANAGARI SIGN CANDRABINDU..DEVANAGARI SIGN ANUSVARA
093C          ; Grapheme_Extend # Mn       DEVANAGARI SIGN NUKTA
0941..0948    ; Grapheme_Extend # Mn   [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
094D          ; Grapheme_Extend # Mn       DEVANAGARI SIGN VIRAMA
0951..0954    ; Grapheme_Extend # Mn   [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
0962..0963    ; Grapheme_Extend # Mn   [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
0981          ; Grapheme_Extend # Mn       BENGALI SIGN CANDRABINDU
09BC          ; Grapheme_Extend # Mn       BENGALI SIGN NUKTA
09BE          ; Grapheme_Extend # Mc       BENGALI VOWEL SIGN AA
09C1..09C4    ; Grapheme_Extend # Mn   [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR
09CD          ; Grapheme_Extend # Mn       BENGALI SIGN VIRAMA
09D7          ; Grapheme_Extend # Mc       BENGALI AU LENGTH MARK
09E2..09E3    ; Grapheme_Extend # Mn   [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL
0A01..0A02    ; Grapheme_Extend # Mn   [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI
0A3C          ; Grapheme_Extend # Mn       GURMUKHI SIGN NUKTA
0A41..0A42    ; Grapheme_Extend # Mn   [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU
0A47..0A48    ; Grapheme_Extend # Mn   [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI
0A4B..0A4D    ; Grapheme_Extend # Mn   [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA
0A70..0A71    ; Grapheme_Extend # Mn   [2] GURMUKHI TIPPI..GURMUKHI ADDAK
0A81..0A82    ; Grapheme_Extend # Mn   [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA
0ABC          ; Grapheme_Extend # Mn       GUJARATI SIGN NUKTA
0AC1..0AC5    ; Grapheme_Extend # Mn   [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E
0AC7..0AC8    ; Grapheme_Extend # Mn   [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
0ACD          ; Grapheme_Extend # Mn       GUJARATI SIGN VIRAMA
0AE2..0AE3    ; Grapheme_Extend # Mn   [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
0B01          ; Grapheme_Extend # Mn       ORIYA SIGN CANDRABINDU
0B3C          ; Grapheme_Extend # Mn       ORIYA SIGN NUKTA
0B3E          ; Grapheme_Extend # Mc       ORIYA VOWEL SIGN AA
0B3F          ; Grapheme_Extend # Mn       ORIYA VOWEL SIGN I
0B41..0B43    ; Grapheme_Extend # Mn   [3] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC R
0B4D          ; Grapheme_Extend # Mn       ORIYA SIGN VIRAMA
0B56          ; Grapheme_Extend # Mn       ORIYA AI LENGTH MARK
0B57          ; Grapheme_Extend # Mc       ORIYA AU LENGTH MARK
0B82          ; Grapheme_Extend # Mn       TAMIL SIGN ANUSVARA
0BBE          ; Grapheme_Extend # Mc       TAMIL VOWEL SIGN AA
0BC0          ; Grapheme_Extend # Mn       TAMIL VOWEL SIGN II
0BCD          ; Grapheme_Extend # Mn       TAMIL SIGN VIRAMA
0BD7          ; Grapheme_Extend # Mc       TAMIL AU LENGTH MARK
0C3E..0C40    ; Grapheme_Extend # Mn   [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
0C46..0C48    ; Grapheme_Extend # Mn   [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
0C4A..0C4D    ; Grapheme_Extend # Mn   [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
0C55..0C56    ; Grapheme_Extend # Mn   [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
0CBC          ; Grapheme_Extend # Mn       KANNADA SIGN NUKTA
0CBF          ; Grapheme_Extend # Mn       KANNADA VOWEL SIGN I
0CC2          ; Grapheme_Extend # Mc       KANNADA VOWEL SIGN UU
0CC6          ; Grapheme_Extend # Mn       KANNADA VOWEL SIGN E
0CCC..0CCD    ; Grapheme_Extend # Mn   [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
0CD5..0CD6    ; Grapheme_Extend # Mc   [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
0CE2..0CE3    ; Grapheme_Extend # Mn   [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
0D3E          ; Grapheme_Extend # Mc       MALAYALAM VOWEL SIGN AA
0D41..0D43    ; Grapheme_Extend # Mn   [3] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC R
0D4D          ; Grapheme_Extend # Mn       MALAYALAM SIGN VIRAMA
0D57          ; Grapheme_Extend # Mc       MALAYALAM AU LENGTH MARK
0DCA          ; Grapheme_Extend # Mn       SINHALA SIGN AL-LAKUNA
0DCF          ; Grapheme_Extend # Mc       SINHALA VOWEL SIGN AELA-PILLA
0DD2..0DD4    ; Grapheme_Extend # Mn   [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
0DD6          ; Grapheme_Extend # Mn       SINHALA VOWEL SIGN DIGA PAA-PILLA
0DDF          ; Grapheme_Extend # Mc       SINHALA VOWEL SIGN GAYANUKITTA
0E31          ; Grapheme_Extend # Mn       THAI CHARACTER MAI HAN-AKAT
0E34..0E3A    ; Grapheme_Extend # Mn   [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU
0E47..0E4E    ; Grapheme_Extend # Mn   [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN
0EB1          ; Grapheme_Extend # Mn       LAO VOWEL SIGN MAI KAN
0EB4..0EB9    ; Grapheme_Extend # Mn   [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU
0EBB..0EBC    ; Grapheme_Extend # Mn   [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO
0EC8..0ECD    ; Grapheme_Extend # Mn   [6] LAO TONE MAI EK..LAO NIGGAHITA
0F18..0F19    ; Grapheme_Extend # Mn   [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
0F35          ; Grapheme_Extend # Mn       TIBETAN MARK NGAS BZUNG NYI ZLA
0F37          ; Grapheme_Extend # Mn       TIBETAN MARK NGAS BZUNG SGOR RTAGS
0F39          ; Grapheme_Extend # Mn       TIBETAN MARK TSA -PHRU
0F71..0F7E    ; Grapheme_Extend # Mn  [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO
0F80..0F84    ; Grapheme_Extend # Mn   [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA
0F86..0F87    ; Grapheme_Extend # Mn   [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
0F90..0F97    ; Grapheme_Extend # Mn   [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA
0F99..0FBC    ; Grapheme_Extend # Mn  [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
0FC6          ; Grapheme_Extend # Mn       TIBETAN SYMBOL PADMA GDAN
102D..1030    ; Grapheme_Extend # Mn   [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU
1032          ; Grapheme_Extend # Mn       MYANMAR VOWEL SIGN AI
1036..1037    ; Grapheme_Extend # Mn   [2] MYANMAR SIGN ANUSVARA..MYANMAR SIGN DOT BELOW
1039          ; Grapheme_Extend # Mn       MYANMAR SIGN VIRAMA
1058..1059    ; Grapheme_Extend # Mn   [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL
135F          ; Grapheme_Extend # Mn       ETHIOPIC COMBINING GEMINATION MARK
1712..1714    ; Grapheme_Extend # Mn   [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
1732..1734    ; Grapheme_Extend # Mn   [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
1752..1753    ; Grapheme_Extend # Mn   [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
1772..1773    ; Grapheme_Extend # Mn   [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
17B7..17BD    ; Grapheme_Extend # Mn   [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA
17C6          ; Grapheme_Extend # Mn       KHMER SIGN NIKAHIT
17C9..17D3    ; Grapheme_Extend # Mn  [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
17DD          ; Grapheme_Extend # Mn       KHMER SIGN ATTHACAN
180B..180D    ; Grapheme_Extend # Mn   [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
18A9          ; Grapheme_Extend # Mn       MONGOLIAN LETTER ALI GALI DAGALGA
1920..1922    ; Grapheme_Extend # Mn   [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
1927..1928    ; Grapheme_Extend # Mn   [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O
1932          ; Grapheme_Extend # Mn       LIMBU SMALL LETTER ANUSVARA
1939..193B    ; Grapheme_Extend # Mn   [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
1A17..1A18    ; Grapheme_Extend # Mn   [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
1B00..1B03    ; Grapheme_Extend # Mn   [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1B34          ; Grapheme_Extend # Mn       BALINESE SIGN REREKAN
1B36..1B3A    ; Grapheme_Extend # Mn   [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
1B3C          ; Grapheme_Extend # Mn       BALINESE VOWEL SIGN LA LENGA
1B42          ; Grapheme_Extend # Mn       BALINESE VOWEL SIGN PEPET
1B6B..1B73    ; Grapheme_Extend # Mn   [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
1DC0..1DCA    ; Grapheme_Extend # Mn  [11] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER R BELOW
1DFE..1DFF    ; Grapheme_Extend # Mn   [2] COMBINING LEFT ARROWHEAD ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
200C..200D    ; Grapheme_Extend # Cf   [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
20D0..20DC    ; Grapheme_Extend # Mn  [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
20DD..20E0    ; Grapheme_Extend # Me   [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
20E1          ; Grapheme_Extend # Mn       COMBINING LEFT RIGHT ARROW ABOVE
20E2..20E4    ; Grapheme_Extend # Me   [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
20E5..20EF    ; Grapheme_Extend # Mn  [11] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING RIGHT ARROW BELOW
302A..302F    ; Grapheme_Extend # Mn   [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
3099..309A    ; Grapheme_Extend # Mn   [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
A806          ; Grapheme_Extend # Mn       SYLOTI NAGRI SIGN HASANTA
A80B          ; Grapheme_Extend # Mn       SYLOTI NAGRI SIGN ANUSVARA
A825..A826    ; Grapheme_Extend # Mn   [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
FB1E          ; Grapheme_Extend # Mn       HEBREW POINT JUDEO-SPANISH VARIKA
FE00..FE0F    ; Grapheme_Extend # Mn  [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
FE20..FE23    ; Grapheme_Extend # Mn   [4] COMBINING LIGATURE LEFT HALF..COMBINING DOUBLE TILDE RIGHT HALF
10A01..10A03  ; Grapheme_Extend # Mn   [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R
10A05..10A06  ; Grapheme_Extend # Mn   [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
10A0C..10A0F  ; Grapheme_Extend # Mn   [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
10A38..10A3A  ; Grapheme_Extend # Mn   [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
10A3F         ; Grapheme_Extend # Mn       KHAROSHTHI VIRAMA
1D165         ; Grapheme_Extend # Mc       MUSICAL SYMBOL COMBINING STEM
1D167..1D169  ; Grapheme_Extend # Mn   [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
1D16E..1D172  ; Grapheme_Extend # Mc   [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5
1D17B..1D182  ; Grapheme_Extend # Mn   [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
1D185..1D18B  ; Grapheme_Extend # Mn   [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
1D1AA..1D1AD  ; Grapheme_Extend # Mn   [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
1D242..1D244  ; Grapheme_Extend # Mn   [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
E0100..E01EF  ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
END_OF_LIST

$grapheme_extend = []
$grapheme_extend_list.each do |entry|
  if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
    $1.hex.upto($2.hex) { |e2| $grapheme_extend << e2 }
  elsif entry =~ /^[0-9A-F]+/
    $grapheme_extend << $&.hex
  end
end

$exclusions = <<END_OF_LIST
0958    #  DEVANAGARI LETTER QA
0959    #  DEVANAGARI LETTER KHHA
095A    #  DEVANAGARI LETTER GHHA
095B    #  DEVANAGARI LETTER ZA
095C    #  DEVANAGARI LETTER DDDHA
095D    #  DEVANAGARI LETTER RHA
095E    #  DEVANAGARI LETTER FA
095F    #  DEVANAGARI LETTER YYA
09DC    #  BENGALI LETTER RRA
09DD    #  BENGALI LETTER RHA
09DF    #  BENGALI LETTER YYA
0A33    #  GURMUKHI LETTER LLA
0A36    #  GURMUKHI LETTER SHA
0A59    #  GURMUKHI LETTER KHHA
0A5A    #  GURMUKHI LETTER GHHA
0A5B    #  GURMUKHI LETTER ZA
0A5E    #  GURMUKHI LETTER FA
0B5C    #  ORIYA LETTER RRA
0B5D    #  ORIYA LETTER RHA
0F43    #  TIBETAN LETTER GHA
0F4D    #  TIBETAN LETTER DDHA
0F52    #  TIBETAN LETTER DHA
0F57    #  TIBETAN LETTER BHA
0F5C    #  TIBETAN LETTER DZHA
0F69    #  TIBETAN LETTER KSSA
0F76    #  TIBETAN VOWEL SIGN VOCALIC R
0F78    #  TIBETAN VOWEL SIGN VOCALIC L
0F93    #  TIBETAN SUBJOINED LETTER GHA
0F9D    #  TIBETAN SUBJOINED LETTER DDHA
0FA2    #  TIBETAN SUBJOINED LETTER DHA
0FA7    #  TIBETAN SUBJOINED LETTER BHA
0FAC    #  TIBETAN SUBJOINED LETTER DZHA
0FB9    #  TIBETAN SUBJOINED LETTER KSSA
FB1D    #  HEBREW LETTER YOD WITH HIRIQ
FB1F    #  HEBREW LIGATURE YIDDISH YOD YOD PATAH
FB2A    #  HEBREW LETTER SHIN WITH SHIN DOT
FB2B    #  HEBREW LETTER SHIN WITH SIN DOT
FB2C    #  HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT
FB2D    #  HEBREW LETTER SHIN WITH DAGESH AND SIN DOT
FB2E    #  HEBREW LETTER ALEF WITH PATAH
FB2F    #  HEBREW LETTER ALEF WITH QAMATS
FB30    #  HEBREW LETTER ALEF WITH MAPIQ
FB31    #  HEBREW LETTER BET WITH DAGESH
FB32    #  HEBREW LETTER GIMEL WITH DAGESH
FB33    #  HEBREW LETTER DALET WITH DAGESH
FB34    #  HEBREW LETTER HE WITH MAPIQ
FB35    #  HEBREW LETTER VAV WITH DAGESH
FB36    #  HEBREW LETTER ZAYIN WITH DAGESH
FB38    #  HEBREW LETTER TET WITH DAGESH
FB39    #  HEBREW LETTER YOD WITH DAGESH
FB3A    #  HEBREW LETTER FINAL KAF WITH DAGESH
FB3B    #  HEBREW LETTER KAF WITH DAGESH
FB3C    #  HEBREW LETTER LAMED WITH DAGESH
FB3E    #  HEBREW LETTER MEM WITH DAGESH
FB40    #  HEBREW LETTER NUN WITH DAGESH
FB41    #  HEBREW LETTER SAMEKH WITH DAGESH
FB43    #  HEBREW LETTER FINAL PE WITH DAGESH
FB44    #  HEBREW LETTER PE WITH DAGESH
FB46    #  HEBREW LETTER TSADI WITH DAGESH
FB47    #  HEBREW LETTER QOF WITH DAGESH
FB48    #  HEBREW LETTER RESH WITH DAGESH
FB49    #  HEBREW LETTER SHIN WITH DAGESH
FB4A    #  HEBREW LETTER TAV WITH DAGESH
FB4B    #  HEBREW LETTER VAV WITH HOLAM
FB4C    #  HEBREW LETTER BET WITH RAFE
FB4D    #  HEBREW LETTER KAF WITH RAFE
FB4E    #  HEBREW LETTER PE WITH RAFE
END_OF_LIST
$exclusions = $exclusions.chomp.split("\n").collect { |e| e.hex }

$excl_version = <<END_OF_LIST
2ADC    #  FORKING
1D15E   #  MUSICAL SYMBOL HALF NOTE
1D15F   #  MUSICAL SYMBOL QUARTER NOTE
1D160   #  MUSICAL SYMBOL EIGHTH NOTE
1D161   #  MUSICAL SYMBOL SIXTEENTH NOTE
1D162   #  MUSICAL SYMBOL THIRTY-SECOND NOTE
1D163   #  MUSICAL SYMBOL SIXTY-FOURTH NOTE
1D164   #  MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
1D1BB   #  MUSICAL SYMBOL MINIMA
1D1BC   #  MUSICAL SYMBOL MINIMA BLACK
1D1BD   #  MUSICAL SYMBOL SEMIMINIMA WHITE
1D1BE   #  MUSICAL SYMBOL SEMIMINIMA BLACK
1D1BF   #  MUSICAL SYMBOL FUSA WHITE
1D1C0   #  MUSICAL SYMBOL FUSA BLACK
END_OF_LIST
$excl_version = $excl_version.chomp.split("\n").collect { |e| e.hex }

$case_folding_string = <<END_OF_LIST
0041; C; 0061; # LATIN CAPITAL LETTER A
0042; C; 0062; # LATIN CAPITAL LETTER B
0043; C; 0063; # LATIN CAPITAL LETTER C
0044; C; 0064; # LATIN CAPITAL LETTER D
0045; C; 0065; # LATIN CAPITAL LETTER E
0046; C; 0066; # LATIN CAPITAL LETTER F
0047; C; 0067; # LATIN CAPITAL LETTER G
0048; C; 0068; # LATIN CAPITAL LETTER H
0049; C; 0069; # LATIN CAPITAL LETTER I
004A; C; 006A; # LATIN CAPITAL LETTER J
004B; C; 006B; # LATIN CAPITAL LETTER K
004C; C; 006C; # LATIN CAPITAL LETTER L
004D; C; 006D; # LATIN CAPITAL LETTER M
004E; C; 006E; # LATIN CAPITAL LETTER N
004F; C; 006F; # LATIN CAPITAL LETTER O
0050; C; 0070; # LATIN CAPITAL LETTER P
0051; C; 0071; # LATIN CAPITAL LETTER Q
0052; C; 0072; # LATIN CAPITAL LETTER R
0053; C; 0073; # LATIN CAPITAL LETTER S
0054; C; 0074; # LATIN CAPITAL LETTER T
0055; C; 0075; # LATIN CAPITAL LETTER U
0056; C; 0076; # LATIN CAPITAL LETTER V
0057; C; 0077; # LATIN CAPITAL LETTER W
0058; C; 0078; # LATIN CAPITAL LETTER X
0059; C; 0079; # LATIN CAPITAL LETTER Y
005A; C; 007A; # LATIN CAPITAL LETTER Z
00B5; C; 03BC; # MICRO SIGN
00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE
00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE
00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE
00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS
00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE
00C6; C; 00E6; # LATIN CAPITAL LETTER AE
00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA
00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE
00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE
00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS
00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE
00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE
00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS
00D0; C; 00F0; # LATIN CAPITAL LETTER ETH
00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE
00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE
00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE
00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE
00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS
00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE
00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE
00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE
00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS
00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE
00DE; C; 00FE; # LATIN CAPITAL LETTER THORN
00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S
0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON
0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE
0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK
0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE
0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE
010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON
010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON
0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE
0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON
0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE
0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE
0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK
011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON
011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE
0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE
0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA
0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE
0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE
012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON
012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE
012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK
0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
0132; C; 0133; # LATIN CAPITAL LIGATURE IJ
0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA
0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE
013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA
013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON
013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE
0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE
0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA
0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON
0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
014A; C; 014B; # LATIN CAPITAL LETTER ENG
014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON
014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE
0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
0152; C; 0153; # LATIN CAPITAL LIGATURE OE
0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE
0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA
0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON
015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE
015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA
0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON
0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA
0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON
0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE
0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE
016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON
016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE
016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE
0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK
0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS
0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE
017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE
017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON
017F; C; 0073; # LATIN SMALL LETTER LONG S
0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK
0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR
0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX
0186; C; 0254; # LATIN CAPITAL LETTER OPEN O
0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK
0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D
018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK
018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR
018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E
018F; C; 0259; # LATIN CAPITAL LETTER SCHWA
0190; C; 025B; # LATIN CAPITAL LETTER OPEN E
0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK
0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK
0194; C; 0263; # LATIN CAPITAL LETTER GAMMA
0196; C; 0269; # LATIN CAPITAL LETTER IOTA
0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE
0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK
019C; C; 026F; # LATIN CAPITAL LETTER TURNED M
019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK
019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN
01A2; C; 01A3; # LATIN CAPITAL LETTER OI
01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK
01A6; C; 0280; # LATIN LETTER YR
01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO
01A9; C; 0283; # LATIN CAPITAL LETTER ESH
01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK
01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN
01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON
01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK
01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK
01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE
01B7; C; 0292; # LATIN CAPITAL LETTER EZH
01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED
01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE
01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON
01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
01C7; C; 01C9; # LATIN CAPITAL LETTER LJ
01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
01CA; C; 01CC; # LATIN CAPITAL LETTER NJ
01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON
01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON
01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON
01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON
01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON
01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE
01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON
01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON
01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK
01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON
01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
01F1; C; 01F3; # LATIN CAPITAL LETTER DZ
01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE
01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR
01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN
01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE
01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE
01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE
0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE
0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE
020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE
0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE
0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE
0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW
021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW
021C; C; 021D; # LATIN CAPITAL LETTER YOGH
021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON
0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
0222; C; 0223; # LATIN CAPITAL LETTER OU
0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK
0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE
0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA
022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON
022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE
0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON
023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE
023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE
023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR
023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP
0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE
0244; C; 0289; # LATIN CAPITAL LETTER U BAR
0245; C; 028C; # LATIN CAPITAL LETTER TURNED V
0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE
0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE
024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE
024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE
0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI
0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS
0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS
0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS
038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS
038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS
038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS
038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS
0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA
0392; C; 03B2; # GREEK CAPITAL LETTER BETA
0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA
0394; C; 03B4; # GREEK CAPITAL LETTER DELTA
0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON
0396; C; 03B6; # GREEK CAPITAL LETTER ZETA
0397; C; 03B7; # GREEK CAPITAL LETTER ETA
0398; C; 03B8; # GREEK CAPITAL LETTER THETA
0399; C; 03B9; # GREEK CAPITAL LETTER IOTA
039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA
039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA
039C; C; 03BC; # GREEK CAPITAL LETTER MU
039D; C; 03BD; # GREEK CAPITAL LETTER NU
039E; C; 03BE; # GREEK CAPITAL LETTER XI
039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON
03A0; C; 03C0; # GREEK CAPITAL LETTER PI
03A1; C; 03C1; # GREEK CAPITAL LETTER RHO
03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA
03A4; C; 03C4; # GREEK CAPITAL LETTER TAU
03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON
03A6; C; 03C6; # GREEK CAPITAL LETTER PHI
03A7; C; 03C7; # GREEK CAPITAL LETTER CHI
03A8; C; 03C8; # GREEK CAPITAL LETTER PSI
03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA
03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA
03D0; C; 03B2; # GREEK BETA SYMBOL
03D1; C; 03B8; # GREEK THETA SYMBOL
03D5; C; 03C6; # GREEK PHI SYMBOL
03D6; C; 03C0; # GREEK PI SYMBOL
03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA
03DA; C; 03DB; # GREEK LETTER STIGMA
03DC; C; 03DD; # GREEK LETTER DIGAMMA
03DE; C; 03DF; # GREEK LETTER KOPPA
03E0; C; 03E1; # GREEK LETTER SAMPI
03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI
03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI
03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI
03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI
03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA
03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA
03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI
03F0; C; 03BA; # GREEK KAPPA SYMBOL
03F1; C; 03C1; # GREEK RHO SYMBOL
03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL
03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL
03F7; C; 03F8; # GREEK CAPITAL LETTER SHO
03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL
03FA; C; 03FB; # GREEK CAPITAL LETTER SAN
03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL
03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL
03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE
0401; C; 0451; # CYRILLIC CAPITAL LETTER IO
0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE
0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE
0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE
0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE
0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
0407; C; 0457; # CYRILLIC CAPITAL LETTER YI
0408; C; 0458; # CYRILLIC CAPITAL LETTER JE
0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE
040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE
040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE
040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE
040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE
040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U
040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE
0410; C; 0430; # CYRILLIC CAPITAL LETTER A
0411; C; 0431; # CYRILLIC CAPITAL LETTER BE
0412; C; 0432; # CYRILLIC CAPITAL LETTER VE
0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE
0414; C; 0434; # CYRILLIC CAPITAL LETTER DE
0415; C; 0435; # CYRILLIC CAPITAL LETTER IE
0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE
0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE
0418; C; 0438; # CYRILLIC CAPITAL LETTER I
0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I
041A; C; 043A; # CYRILLIC CAPITAL LETTER KA
041B; C; 043B; # CYRILLIC CAPITAL LETTER EL
041C; C; 043C; # CYRILLIC CAPITAL LETTER EM
041D; C; 043D; # CYRILLIC CAPITAL LETTER EN
041E; C; 043E; # CYRILLIC CAPITAL LETTER O
041F; C; 043F; # CYRILLIC CAPITAL LETTER PE
0420; C; 0440; # CYRILLIC CAPITAL LETTER ER
0421; C; 0441; # CYRILLIC CAPITAL LETTER ES
0422; C; 0442; # CYRILLIC CAPITAL LETTER TE
0423; C; 0443; # CYRILLIC CAPITAL LETTER U
0424; C; 0444; # CYRILLIC CAPITAL LETTER EF
0425; C; 0445; # CYRILLIC CAPITAL LETTER HA
0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE
0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE
0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA
0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA
042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN
042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU
042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN
042D; C; 044D; # CYRILLIC CAPITAL LETTER E
042E; C; 044E; # CYRILLIC CAPITAL LETTER YU
042F; C; 044F; # CYRILLIC CAPITAL LETTER YA
0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA
0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT
0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E
0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS
0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS
046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI
0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI
0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA
0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA
0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
0478; C; 0479; # CYRILLIC CAPITAL LETTER UK
047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA
047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
047E; C; 047F; # CYRILLIC CAPITAL LETTER OT
0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA
048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK
0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE
0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE
04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA
04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE
04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U
04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE
04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA
04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA
04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE
04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK
04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL
04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK
04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL
04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL
04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE
04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE
04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE
04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA
04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON
04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS
04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS
04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O
04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS
04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON
04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS
04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER
04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK
04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK
04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE
0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE
0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE
0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE
0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE
0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE
050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE
050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE
050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE
0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE
0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK
0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB
0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN
0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM
0534; C; 0564; # ARMENIAN CAPITAL LETTER DA
0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH
0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA
0537; C; 0567; # ARMENIAN CAPITAL LETTER EH
0538; C; 0568; # ARMENIAN CAPITAL LETTER ET
0539; C; 0569; # ARMENIAN CAPITAL LETTER TO
053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE
053B; C; 056B; # ARMENIAN CAPITAL LETTER INI
053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN
053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH
053E; C; 056E; # ARMENIAN CAPITAL LETTER CA
053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN
0540; C; 0570; # ARMENIAN CAPITAL LETTER HO
0541; C; 0571; # ARMENIAN CAPITAL LETTER JA
0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD
0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH
0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN
0545; C; 0575; # ARMENIAN CAPITAL LETTER YI
0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW
0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA
0548; C; 0578; # ARMENIAN CAPITAL LETTER VO
0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA
054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH
054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH
054C; C; 057C; # ARMENIAN CAPITAL LETTER RA
054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH
054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW
054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN
0550; C; 0580; # ARMENIAN CAPITAL LETTER REH
0551; C; 0581; # ARMENIAN CAPITAL LETTER CO
0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN
0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR
0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH
0555; C; 0585; # ARMENIAN CAPITAL LETTER OH
0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH
0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN
10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN
10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN
10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN
10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON
10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN
10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN
10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN
10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN
10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN
10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN
10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS
10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN
10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR
10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON
10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR
10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR
10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE
10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN
10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR
10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN
10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR
10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR
10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN
10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR
10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN
10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN
10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN
10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL
10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL
10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR
10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN
10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN
10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE
10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE
10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE
10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE
10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR
10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE
1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW
1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE
1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW
1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW
1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE
1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW
1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW
1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA
1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW
1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE
1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON
1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE
1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW
1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS
1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA
1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW
1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW
1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE
1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW
1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW
1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW
1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW
1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE
1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE
1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW
1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE
1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW
1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW
1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE
1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE
1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE
1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW
1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW
1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE
1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW
1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE
1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW
1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW
1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW
1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE
1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW
1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE
1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE
1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS
1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE
1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW
1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE
1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS
1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE
1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW
1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW
1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW
1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS
1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE
1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE
1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW
1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE
1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW
1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE
1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE
1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE
1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW
1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW
1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE
1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE
1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE
1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE
1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW
1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE
1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE
1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE
1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE
1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE
1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW
1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE
1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE
1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI
1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA
1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI
1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA
1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI
1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA
1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI
1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA
1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI
1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA
1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA
1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI
1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA
1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY
1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON
1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA
1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA
1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
1FBE; C; 03B9; # GREEK PROSGEGRAMMENI
1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA
1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA
1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA
1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA
1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY
1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON
1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA
1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA
1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI
1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY
1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON
1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA
1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA
1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA
1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA
1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA
1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA
1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA
1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
2126; C; 03C9; # OHM SIGN
212A; C; 006B; # KELVIN SIGN
212B; C; 00E5; # ANGSTROM SIGN
2132; C; 214E; # TURNED CAPITAL F
2160; C; 2170; # ROMAN NUMERAL ONE
2161; C; 2171; # ROMAN NUMERAL TWO
2162; C; 2172; # ROMAN NUMERAL THREE
2163; C; 2173; # ROMAN NUMERAL FOUR
2164; C; 2174; # ROMAN NUMERAL FIVE
2165; C; 2175; # ROMAN NUMERAL SIX
2166; C; 2176; # ROMAN NUMERAL SEVEN
2167; C; 2177; # ROMAN NUMERAL EIGHT
2168; C; 2178; # ROMAN NUMERAL NINE
2169; C; 2179; # ROMAN NUMERAL TEN
216A; C; 217A; # ROMAN NUMERAL ELEVEN
216B; C; 217B; # ROMAN NUMERAL TWELVE
216C; C; 217C; # ROMAN NUMERAL FIFTY
216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED
216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED
216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND
2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED
24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A
24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B
24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C
24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D
24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E
24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F
24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G
24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H
24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I
24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J
24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K
24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L
24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M
24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N
24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O
24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P
24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q
24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R
24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S
24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T
24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U
24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V
24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W
24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X
24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y
24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z
2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU
2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY
2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE
2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI
2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO
2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU
2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE
2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO
2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA
2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE
2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE
2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I
2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI
2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO
2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE
2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE
2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI
2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU
2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI
2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI
2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO
2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO
2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU
2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU
2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU
2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU
2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE
2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA
2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI
2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI
2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA
2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU
2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI
2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI
2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA
2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU
2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS
2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL
2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO
2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS
2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS
2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS
2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA
2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA
2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC
2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A
2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR
2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE
2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE
2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL
2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER
2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER
2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER
2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H
2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA
2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA
2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA
2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA
2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE
2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU
2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA
2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE
2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE
2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA
2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA
2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA
2C98; C; 2C99; # COPTIC CAPITAL LETTER MI
2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI
2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI
2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O
2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI
2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO
2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA
2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU
2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA
2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI
2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI
2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI
2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU
2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF
2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN
2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE
2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA
2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI
2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI
2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU
2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI
2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI
2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI
2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH
2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI
2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI
2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI
2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA
2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA
2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI
2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT
2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA
2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA
2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA
2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA
2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI
2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI
2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU
FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF
FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI
FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL
FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI
FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL
FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T
FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST
FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW
FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH
FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI
FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW
FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH
FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A
FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B
FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C
FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D
FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E
FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F
FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G
FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H
FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I
FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J
FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K
FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L
FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M
FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N
FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O
FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P
FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q
FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R
FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S
FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T
FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U
FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V
FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W
FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X
FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y
FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
10400; C; 10428; # DESERET CAPITAL LETTER LONG I
10401; C; 10429; # DESERET CAPITAL LETTER LONG E
10402; C; 1042A; # DESERET CAPITAL LETTER LONG A
10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH
10404; C; 1042C; # DESERET CAPITAL LETTER LONG O
10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO
10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I
10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E
10408; C; 10430; # DESERET CAPITAL LETTER SHORT A
10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH
1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O
1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO
1040C; C; 10434; # DESERET CAPITAL LETTER AY
1040D; C; 10435; # DESERET CAPITAL LETTER OW
1040E; C; 10436; # DESERET CAPITAL LETTER WU
1040F; C; 10437; # DESERET CAPITAL LETTER YEE
10410; C; 10438; # DESERET CAPITAL LETTER H
10411; C; 10439; # DESERET CAPITAL LETTER PEE
10412; C; 1043A; # DESERET CAPITAL LETTER BEE
10413; C; 1043B; # DESERET CAPITAL LETTER TEE
10414; C; 1043C; # DESERET CAPITAL LETTER DEE
10415; C; 1043D; # DESERET CAPITAL LETTER CHEE
10416; C; 1043E; # DESERET CAPITAL LETTER JEE
10417; C; 1043F; # DESERET CAPITAL LETTER KAY
10418; C; 10440; # DESERET CAPITAL LETTER GAY
10419; C; 10441; # DESERET CAPITAL LETTER EF
1041A; C; 10442; # DESERET CAPITAL LETTER VEE
1041B; C; 10443; # DESERET CAPITAL LETTER ETH
1041C; C; 10444; # DESERET CAPITAL LETTER THEE
1041D; C; 10445; # DESERET CAPITAL LETTER ES
1041E; C; 10446; # DESERET CAPITAL LETTER ZEE
1041F; C; 10447; # DESERET CAPITAL LETTER ESH
10420; C; 10448; # DESERET CAPITAL LETTER ZHEE
10421; C; 10449; # DESERET CAPITAL LETTER ER
10422; C; 1044A; # DESERET CAPITAL LETTER EL
10423; C; 1044B; # DESERET CAPITAL LETTER EM
10424; C; 1044C; # DESERET CAPITAL LETTER EN
10425; C; 1044D; # DESERET CAPITAL LETTER ENG
10426; C; 1044E; # DESERET CAPITAL LETTER OI
10427; C; 1044F; # DESERET CAPITAL LETTER EW
END_OF_LIST

$case_folding = {}
$case_folding_string.chomp.split("\n").each do |line|
  next unless line =~ /([0-9A-F]+); [CFS]; ([0-9A-F ]+);/i
  $case_folding[$1.hex] = $2.split(" ").collect { |e| e.hex }
end

$int_array = []
$int_array_indicies = {}

def str2c(string, prefix)
  return "0" if string.nil?
  return "UTF8PROC_#{prefix}_#{string.upcase}"
end
def ary2c(array)
  return "NULL" if array.nil?
  unless $int_array_indicies[array]
    $int_array_indicies[array] = $int_array.length
    array.each { |entry| $int_array << entry }
    $int_array << -1
  end
  return "utf8proc_sequences + #{$int_array_indicies[array]}"
end

class UnicodeChar
  attr_accessor :code, :name, :category, :combining_class, :bidi_class,
                :decomp_type, :decomp_mapping,
                :bidi_mirrored,
                :uppercase_mapping, :lowercase_mapping, :titlecase_mapping
  def initialize(line)
    raise "Could not parse input." unless line =~ /^
      ([0-9A-F]+);        # code
      ([^;]+);            # name
      ([A-Z]+);           # general category
      ([0-9]+);           # canonical combining class
      ([A-Z]+);           # bidi class
      (<([A-Z]*)>)?       # decomposition type
      ((\ ?[0-9A-F]+)*);  # decompomposition mapping
      ([0-9]*);           # decimal digit
      ([0-9]*);           # digit
      ([^;]*);            # numeric
      ([YN]*);            # bidi mirrored
      ([^;]*);            # unicode 1.0 name
      ([^;]*);            # iso comment
      ([0-9A-F]*);        # simple uppercase mapping
      ([0-9A-F]*);        # simple lowercase mapping
      ([0-9A-F]*)$/ix     # simple titlecase mapping
    @code              = $1.hex
    @name              = $2
    @category          = $3
    @combining_class   = Integer($4)
    @bidi_class        = $5
    @decomp_type       = $7
    @decomp_mapping    = ($8=='') ? nil :
                         $8.split.collect { |element| element.hex }
    @bidi_mirrored     = ($13=='Y') ? true : false
    @uppercase_mapping = ($16=='') ? nil : $16.hex
    @lowercase_mapping = ($17=='') ? nil : $17.hex
    @titlecase_mapping = ($18=='') ? nil : $18.hex
  end
  def case_folding
    $case_folding[code]
  end
  def c_entry(comb1_indicies, comb2_indicies)
    "  " <<
    "{#{str2c category, 'CATEGORY'}, #{combining_class}, " <<
    "#{str2c bidi_class, 'BIDI_CLASS'}, " <<
    "#{str2c decomp_type, 'DECOMP_TYPE'}, " <<
    "#{ary2c decomp_mapping}, " <<
    "#{bidi_mirrored}, " <<
    "#{uppercase_mapping or -1}, " <<
    "#{lowercase_mapping or -1}, " <<
    "#{titlecase_mapping or -1}, " <<
    "#{comb1_indicies[code] ?
       (comb1_indicies[code]*comb2_indicies.keys.length) : -1
      }, #{comb2_indicies[code] or -1}, " <<
    "#{$exclusions.include?(code) or $excl_version.include?(code)}, " <<
    "#{$ignorable.include?(code)}, " <<
    "#{%W[Zl Zp Cc Cf].include?(category) and not [0x200C, 0x200D].include?(category)}, " <<
    "#{$grapheme_extend.include?(code)}, " <<
    "#{ary2c case_folding}},\n"
  end
end

chars = []
char_hash = {}

while gets
  if $_ =~ /^([0-9A-F]+);<[^;>,]+, First>;/i
    first = $1.hex
    gets
    char = UnicodeChar.new($_)
    raise "No last character of sequence found." unless
      $_ =~ /^([0-9A-F]+);<([^;>,]+), Last>;/i
    last = $1.hex
    name = "<#{$2}>"
    for i in first..last
      char_clone = char.clone
      char_clone.code = i
      char_clone.name = name
      char_hash[char_clone.code] = char_clone
      chars << char_clone
    end
  else
    char = UnicodeChar.new($_)
    char_hash[char.code] = char
    chars << char
  end
end

comb1st_indicies = {}
comb2nd_indicies = {}
comb_array = []

chars.each do |char|
  if char.decomp_type.nil? and char.decomp_mapping and
      char.decomp_mapping.length == 2 and
      char_hash[char.decomp_mapping[0]].combining_class == 0 and
      not $exclusions.include?(char.code)
    unless comb1st_indicies[char.decomp_mapping[0]]
      comb1st_indicies[char.decomp_mapping[0]] = comb1st_indicies.keys.length
    end
    unless comb2nd_indicies[char.decomp_mapping[1]]
      comb2nd_indicies[char.decomp_mapping[1]] = comb2nd_indicies.keys.length
    end
    comb_array[comb1st_indicies[char.decomp_mapping[0]]] ||= []
    raise "Duplicate canonical mapping" if
      comb_array[comb1st_indicies[char.decomp_mapping[0]]][
      comb2nd_indicies[char.decomp_mapping[1]]]
    comb_array[comb1st_indicies[char.decomp_mapping[0]]][
      comb2nd_indicies[char.decomp_mapping[1]]] = char.code
  end
end

properties_indicies = {}
properties = []
chars.each do |char|
  c_entry = char.c_entry(comb1st_indicies, comb2nd_indicies)
  unless properties_indicies[c_entry]
    properties_indicies[c_entry] = properties.length
    properties << c_entry
  end
end

stage1 = []
stage2 = []
for code in 0...0x110000
  next unless code % 0x100 == 0
  stage2_entry = []
  for code2 in code...(code+0x100)
    if char_hash[code2]
      stage2_entry << (properties_indicies[char_hash[code2].c_entry(
        comb1st_indicies, comb2nd_indicies)] + 1)
    else
      stage2_entry << 0
    end
  end
  old_index = stage2.index(stage2_entry)
  if old_index
    stage1 << (old_index * 0x100)
  else
    stage1 << (stage2.length * 0x100)
    stage2 << stage2_entry
  end
end

$stdout << "const int32_t utf8proc_sequences[] = {\n  "
i = 0
$int_array.each do |entry|
  i += 1
  if i == 8
    i = 0
    $stdout << "\n  "
  end
  $stdout << entry << ", "
end
$stdout << "};\n\n"

$stdout << "const uint16_t utf8proc_stage1table[] = {\n  "
i = 0
stage1.each do |entry|
  i += 1
  if i == 8
    i = 0
    $stdout << "\n  "
  end
  $stdout << entry << ", "
end
$stdout << "};\n\n"

$stdout << "const uint16_t utf8proc_stage2table[] = {\n  "
i = 0
stage2.flatten.each do |entry|
  i += 1
  if i == 8
    i = 0
    $stdout << "\n  "
  end
  $stdout << entry << ", "
end
$stdout << "};\n\n"

$stdout << "const utf8proc_property_t utf8proc_properties[] = {\n"
$stdout << "  {0, 0, 0, 0, NULL, false, -1, -1, -1, -1, -1, false},\n"
properties.each { |line|
  $stdout << line
}
$stdout << "};\n\n"

$stdout << "const int32_t utf8proc_combinations[] = {\n  "
i = 0
comb1st_indicies.keys.each_index do |a|
  comb2nd_indicies.keys.each_index do |b|
    i += 1
    if i == 8
      i = 0
      $stdout << "\n  "
    end
    $stdout << ( comb_array[a][b] or -1 ) << ", "
  end
end
$stdout << "};\n\n"