diff options
author | Vincent Sanders <vince@kyllikki.org> | 2014-11-14 10:56:33 +0000 |
---|---|---|
committer | Vincent Sanders <vince@kyllikki.org> | 2014-11-14 10:56:33 +0000 |
commit | c203e4dcb680ec3bdccf5fdf7a496549442c56de (patch) | |
tree | cf188d41632efcb2745593f3eca25dfd32863d20 | |
parent | c7b6a59b19555b35b6014bf2922c74911caa6f22 (diff) | |
download | libutf8proc-c203e4dcb680ec3bdccf5fdf7a496549442c56de.tar.gz libutf8proc-c203e4dcb680ec3bdccf5fdf7a496549442c56de.tar.bz2 |
Build with netsurf core buildsystem
-rw-r--r-- | .gitattributes | 2 | ||||
-rw-r--r-- | .gitignore | 3 | ||||
-rw-r--r-- | Makefile | 116 | ||||
-rw-r--r-- | README | 123 | ||||
-rw-r--r-- | data_generator.rb | 1624 | ||||
-rw-r--r-- | include/libutf8proc/utf8proc.h (renamed from utf8proc.h) | 0 | ||||
-rw-r--r-- | libutf8proc.pc.in | 10 | ||||
-rw-r--r-- | lump.txt | 26 | ||||
-rw-r--r-- | pgsql/Makefile | 10 | ||||
-rw-r--r-- | pgsql/utf8proc.sql | 6 | ||||
-rw-r--r-- | pgsql/utf8proc_pgsql.c | 139 | ||||
-rw-r--r-- | ruby/extconf.rb | 2 | ||||
-rw-r--r-- | ruby/gem/LICENSE | 64 | ||||
-rw-r--r-- | ruby/gem/utf8proc.gemspec | 12 | ||||
-rw-r--r-- | ruby/utf8proc.rb | 98 | ||||
-rw-r--r-- | ruby/utf8proc_native.c | 160 | ||||
-rw-r--r-- | src/Makefile | 3 | ||||
-rw-r--r-- | src/utf8proc.c (renamed from utf8proc.c) | 0 | ||||
-rw-r--r-- | src/utf8proc_data.c (renamed from utf8proc_data.c) | 0 |
19 files changed, 76 insertions, 2322 deletions
diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..de2f316 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +.gitignore export-ignore +.gitattributes export-ignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d5c7a48 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +build-* +Makefile.config.override + @@ -1,68 +1,48 @@ -# libutf8proc Makefile - - -# settings - -cflags = -O2 -std=c99 -pedantic -Wall -fpic $(CFLAGS) -cc = $(CC) $(cflags) - - -# meta targets - -c-library: libutf8proc.a libutf8proc.so - -ruby-library: ruby/utf8proc_native.so - -pgsql-library: pgsql/utf8proc_pgsql.so - -all: c-library ruby-library ruby-gem pgsql-library - -clean:: - rm -f utf8proc.o libutf8proc.a libutf8proc.so - cd ruby/ && test -e Makefile && (make clean && rm -f Makefile) || true - rm -Rf ruby/gem/lib ruby/gem/ext - rm -f ruby/gem/utf8proc-*.gem - cd pgsql/ && make clean - -# real targets - -utf8proc.o: utf8proc.h utf8proc.c utf8proc_data.c - $(cc) -c -o utf8proc.o utf8proc.c - -libutf8proc.a: utf8proc.o - rm -f libutf8proc.a - ar rs libutf8proc.a utf8proc.o - -libutf8proc.so: utf8proc.o - $(cc) -shared -o libutf8proc.so utf8proc.o - chmod a-x libutf8proc.so - -libutf8proc.dylib: utf8proc.o - $(cc) -dynamiclib -o $@ $^ -install_name $(libdir)/$@ - -ruby/Makefile: ruby/extconf.rb - cd ruby && ruby extconf.rb - -ruby/utf8proc_native.so: utf8proc.h utf8proc.c utf8proc_data.c \ - ruby/utf8proc_native.c ruby/Makefile - cd ruby && make - -ruby/gem/lib/utf8proc.rb: ruby/utf8proc.rb - test -e ruby/gem/lib || mkdir ruby/gem/lib - cp ruby/utf8proc.rb ruby/gem/lib/ - -ruby/gem/ext/extconf.rb: ruby/extconf.rb - test -e ruby/gem/ext || mkdir ruby/gem/ext - cp ruby/extconf.rb ruby/gem/ext/ - -ruby/gem/ext/utf8proc_native.c: utf8proc.h utf8proc_data.c utf8proc.c ruby/utf8proc_native.c - test -e ruby/gem/ext || mkdir ruby/gem/ext - cat utf8proc.h utf8proc_data.c utf8proc.c ruby/utf8proc_native.c | grep -v '#include "utf8proc.h"' | grep -v '#include "utf8proc_data.c"' | grep -v '#include "../utf8proc.c"' > ruby/gem/ext/utf8proc_native.c - -ruby-gem:: ruby/gem/lib/utf8proc.rb ruby/gem/ext/extconf.rb ruby/gem/ext/utf8proc_native.c - cd ruby/gem && gem build utf8proc.gemspec - -pgsql/utf8proc_pgsql.so: utf8proc.h utf8proc.c utf8proc_data.c \ - pgsql/utf8proc_pgsql.c - cd pgsql && make - +# Component settings +COMPONENT := utf8proc +COMPONENT_VERSION := 1.1.6 +# Default to a static library +COMPONENT_TYPE ?= lib-static + +# Setup the tooling +PREFIX ?= /opt/netsurf +NSSHARED ?= $(PREFIX)/share/netsurf-buildsystem +include $(NSSHARED)/makefiles/Makefile.tools + +# Reevaluate when used, as BUILDDIR won't be defined yet +TESTRUNNER = $(BUILDDIR)/test_testrunner$(EXEEXT) + +# Toolchain flags +WARNFLAGS := -Wall -W -Wundef -Wpointer-arith -Wcast-align \ + -Wwrite-strings -Wstrict-prototypes -Wmissing-prototypes \ + -Wmissing-declarations -Wnested-externs +# Debug builds compile with error warnings +# BeOS/Haiku standard library headers issue warnings +#ifneq ($(TARGET),beos) +# WARNFLAGS := $(WARNFLAGS) -Werror +#endif +CFLAGS := -I$(CURDIR)/include/libutf8proc/ \ + -I$(CURDIR)/src $(WARNFLAGS) $(CFLAGS) +ifneq ($(GCCVER),2) + CFLAGS := $(CFLAGS) -std=c99 +else + # __inline__ is a GCCism + CFLAGS := $(CFLAGS) -Dinline="__inline__" +endif + +include $(NSBUILD)/Makefile.top + +ifeq ($(WANT_TEST),yes) + ifneq ($(PKGCONFIG),) + TESTCFLAGS := $(TESTCFLAGS) $(shell $(PKGCONFIG) --cflags check) + TESTLDFLAGS := $(TESTLDFLAGS) $(shell $(PKGCONFIG) --libs check) + else + TESTLDFLAGS := $(TESTLDFLAGS) -lcheck + endif +endif + +# Extra installation rules +I := /include/libutf8proc +INSTALL_ITEMS := $(INSTALL_ITEMS) $(I):include/libutf8proc/utf8proc.h +INSTALL_ITEMS := $(INSTALL_ITEMS) /$(LIBDIR)/pkgconfig:lib$(COMPONENT).pc.in +INSTALL_ITEMS := $(INSTALL_ITEMS) /$(LIBDIR):$(OUTPUT) @@ -1,116 +1,13 @@ +libutf8proc +=========== -Please read the LICENSE file, which is shipping with this software. - - -*** QUICK START *** - -For compilation of the C library call "make c-library", for compilation of -the ruby library call "make ruby-library" and for compilation of the -PostgreSQL extension call "make pgsql-library". - -For ruby you can also create a gem-file by calling "make ruby-gem". - -"make all" can be used to build everything, but both ruby and PostgreSQL -installations are required in this case. - - -*** GENERAL INFORMATION *** - -The C library is found in this directory after successful compilation and -is named "libutf8proc.a" and "libutf8proc.so". The ruby library consists of -the files "utf8proc.rb" and "utf8proc_native.so", which are found in the -subdirectory "ruby/". If you chose to create a gem-file it is placed in the -"ruby/gem" directory. The PostgreSQL extension is named "utf8proc_pgsql.so" -and resides in the "pgsql/" directory. - -Both the ruby library and the PostgreSQL extension are built as stand-alone -libraries and are therefore not dependent the dynamic version of the -C library files, but this behaviour might change in future releases. - -The Unicode version being supported is 5.0.0. -Note: Version 4.1.0 of Unicode Standard Annex #29 was used, as - version 5.0.0 had not been available at the time of implementation. - -For Unicode normalizations, the following options have to be used: -Normalization Form C: STABLE, COMPOSE -Normalization Form D: STABLE, DECOMPOSE -Normalization Form KC: STABLE, COMPOSE, COMPAT -Normalization Form KD: STABLE, DECOMPOSE, COMPAT - - -*** C LIBRARY *** - -The documentation for the C library is found in the utf8proc.h header file. -"utf8proc_map" is most likely function you will be using for mapping UTF-8 -strings, unless you want to allocate memory yourself. - - -*** RUBY API *** - -The ruby library adds the methods "utf8map" and "utf8map!" to the String -class, and the method "utf8" to the Integer class. - -The String#utf8map method does the same as the "utf8proc_map" C function. -Options for the mapping procedure are passed as symbols, i.e: -"Hello".utf8map(:casefold) => "hello" - -The descriptions of all options are found in the C header file -"utf8proc.h". Please notice that the according symbols in ruby are all -lowercase. - -String#utf8map! is the destructive function in the meaning that the string -is replaced by the result. - -There are shortcuts for the 4 normalization forms specified by Unicode: -String#utf8nfd, String#utf8nfd!, -String#utf8nfc, String#utf8nfc!, -String#utf8nfkd, String#utf8nfkd!, -String#utf8nfkc, String#utf8nfkc! - -The method Integer#utf8 returns a UTF-8 string, which is containing the -unicode char given by the code point. -0x000A.utf8 => "\n" -0x2028.utf8 => "\342\200\250" - - -*** POSTGRESQL API *** - -For PostgreSQL there are two SQL functions supplied named "unifold" and -"unistrip". These functions function can be used to prepare index fields in -order to be folded in a way where string-comparisons make more sense, e.g. -where "bathtub" == "bath<soft hyphen>tub" -or "Hello World" == "hello world". - -CREATE TABLE people ( - id serial8 primary key, - name text, - CHECK (unifold(name) NOTNULL) -); -CREATE INDEX name_idx ON people (unifold(name)); -SELECT * FROM people WHERE unifold(name) = unifold('John Doe'); - -The function "unistrip" removes character marks like accents or diaeresis, -while "unifold" keeps then. - -NOTICE: The outputs of the function can change between releases, as - utf8proc does not follow a versioning stability policy. You have to - rebuild your database indicies, if you upgrade to a newer version - of utf8proc. - - -*** TODO *** - -- detect stable code points and process segments independently in order to - save memory -- do a quick check before normalizing strings to optimize speed -- support stream processing - - -*** CONTACT *** - -If you find any bugs or experience difficulties in compiling this software, -please contact us: - -Project page: http://www.public-software-group.org/utf8proc +This is the Public software group utf8proc library [1] repackaged as a +conveniance library for NetSurf. Previously this library was simply +copied into the NetSurf sources. +This takes the unicode 5 capable version 1.1.6 of the library and +converts it to the NetSurf build system. No C source code has been +changed from upstream and all the Makefiles are licenced as per the +utf8proc source. +[1] http://www.public-software-group.org/utf8proc
\ No newline at end of file diff --git a/data_generator.rb b/data_generator.rb deleted file mode 100644 index 0db0331..0000000 --- a/data_generator.rb +++ /dev/null @@ -1,1624 +0,0 @@ -#!/usr/pkg/bin/ruby - -# This file was used to generate the 'unicode_data.c' file by parsing the -# Unicode data file 'UnicodeData.txt' of the Unicode Character Database. -# It is included for informational purposes only and not intended for -# production use. - - -# Copyright (c) 2009 Public Software Group e. V., Berlin, Germany -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - - -# This file contains derived data from a modified version of the -# Unicode data files. The following license applies to that data: -# -# COPYRIGHT AND PERMISSION NOTICE -# -# Copyright (c) 1991-2007 Unicode, Inc. All rights reserved. Distributed -# under the Terms of Use in http://www.unicode.org/copyright.html. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of the Unicode data files and any associated documentation (the "Data -# Files") or Unicode software and any associated documentation (the -# "Software") to deal in the Data Files or Software without restriction, -# including without limitation the rights to use, copy, modify, merge, -# publish, distribute, and/or sell copies of the Data Files or Software, and -# to permit persons to whom the Data Files or Software are furnished to do -# so, provided that (a) the above copyright notice(s) and this permission -# notice appear with all copies of the Data Files or Software, (b) both the -# above copyright notice(s) and this permission notice appear in associated -# documentation, and (c) there is clear notice in each modified Data File or -# in the Software as well as in the documentation associated with the Data -# File(s) or Software that the data or software has been modified. -# -# THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY -# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF -# THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS -# INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR -# CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF -# USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER -# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -# PERFORMANCE OF THE DATA FILES OR SOFTWARE. -# -# Except as contained in this notice, the name of a copyright holder shall -# not be used in advertising or otherwise to promote the sale, use or other -# dealings in these Data Files or Software without prior written -# authorization of the copyright holder. - - - -$ignorable_list = <<END_OF_LIST -0000..0008 ; Default_Ignorable_Code_Point # Cc [9] <control-0000>..<control-0008> -000E..001F ; Default_Ignorable_Code_Point # Cc [18] <control-000E>..<control-001F> -007F..0084 ; Default_Ignorable_Code_Point # Cc [6] <control-007F>..<control-0084> -0086..009F ; Default_Ignorable_Code_Point # Cc [26] <control-0086>..<control-009F> -00AD ; Default_Ignorable_Code_Point # Cf SOFT HYPHEN -034F ; Default_Ignorable_Code_Point # Mn COMBINING GRAPHEME JOINER -0600..0603 ; Default_Ignorable_Code_Point # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA -06DD ; Default_Ignorable_Code_Point # Cf ARABIC END OF AYAH -070F ; Default_Ignorable_Code_Point # Cf SYRIAC ABBREVIATION MARK -115F..1160 ; Default_Ignorable_Code_Point # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER -17B4..17B5 ; Default_Ignorable_Code_Point # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA -180B..180D ; Default_Ignorable_Code_Point # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE -200B..200F ; Default_Ignorable_Code_Point # Cf [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK -202A..202E ; Default_Ignorable_Code_Point # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE -2060..2063 ; Default_Ignorable_Code_Point # Cf [4] WORD JOINER..INVISIBLE SEPARATOR -2064..2069 ; Default_Ignorable_Code_Point # Cn [6] <reserved-2064>..<reserved-2069> -206A..206F ; Default_Ignorable_Code_Point # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES -3164 ; Default_Ignorable_Code_Point # Lo HANGUL FILLER -D800..DFFF ; Default_Ignorable_Code_Point # Cs [2048] <surrogate-D800>..<surrogate-DFFF> -FE00..FE0F ; Default_Ignorable_Code_Point # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 -FEFF ; Default_Ignorable_Code_Point # Cf ZERO WIDTH NO-BREAK SPACE -FFA0 ; Default_Ignorable_Code_Point # Lo HALFWIDTH HANGUL FILLER -FFF0..FFF8 ; Default_Ignorable_Code_Point # Cn [9] <reserved-FFF0>..<reserved-FFF8> -1D173..1D17A ; Default_Ignorable_Code_Point # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE -E0001 ; Default_Ignorable_Code_Point # Cf LANGUAGE TAG -E0002..E001F ; Default_Ignorable_Code_Point # Cn [30] <reserved-E0002>..<reserved-E001F> -E0020..E007F ; Default_Ignorable_Code_Point # Cf [96] TAG SPACE..CANCEL TAG -E0080..E00FF ; Default_Ignorable_Code_Point # Cn [128] <reserved-E0080>..<reserved-E00FF> -E0100..E01EF ; Default_Ignorable_Code_Point # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> -END_OF_LIST - -$ignorable = [] -$ignorable_list.each do |entry| - if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/ - $1.hex.upto($2.hex) { |e2| $ignorable << e2 } - elsif entry =~ /^[0-9A-F]+/ - $ignorable << $&.hex - end -end - -$grapheme_extend_list = <<END_OF_LIST -0300..036F ; Grapheme_Extend # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X -0483..0486 ; Grapheme_Extend # Mn [4] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PSILI PNEUMATA -0488..0489 ; Grapheme_Extend # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN -0591..05BD ; Grapheme_Extend # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG -05BF ; Grapheme_Extend # Mn HEBREW POINT RAFE -05C1..05C2 ; Grapheme_Extend # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT -05C4..05C5 ; Grapheme_Extend # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT -05C7 ; Grapheme_Extend # Mn HEBREW POINT QAMATS QATAN -0610..0615 ; Grapheme_Extend # Mn [6] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL HIGH TAH -064B..065E ; Grapheme_Extend # Mn [20] ARABIC FATHATAN..ARABIC FATHA WITH TWO DOTS -0670 ; Grapheme_Extend # Mn ARABIC LETTER SUPERSCRIPT ALEF -06D6..06DC ; Grapheme_Extend # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN -06DE ; Grapheme_Extend # Me ARABIC START OF RUB EL HIZB -06DF..06E4 ; Grapheme_Extend # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA -06E7..06E8 ; Grapheme_Extend # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON -06EA..06ED ; Grapheme_Extend # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM -0711 ; Grapheme_Extend # Mn SYRIAC LETTER SUPERSCRIPT ALAPH -0730..074A ; Grapheme_Extend # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH -07A6..07B0 ; Grapheme_Extend # Mn [11] THAANA ABAFILI..THAANA SUKUN -07EB..07F3 ; Grapheme_Extend # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE -0901..0902 ; Grapheme_Extend # Mn [2] DEVANAGARI SIGN CANDRABINDU..DEVANAGARI SIGN ANUSVARA -093C ; Grapheme_Extend # Mn DEVANAGARI SIGN NUKTA -0941..0948 ; Grapheme_Extend # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI -094D ; Grapheme_Extend # Mn DEVANAGARI SIGN VIRAMA -0951..0954 ; Grapheme_Extend # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT -0962..0963 ; Grapheme_Extend # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL -0981 ; Grapheme_Extend # Mn BENGALI SIGN CANDRABINDU -09BC ; Grapheme_Extend # Mn BENGALI SIGN NUKTA -09BE ; Grapheme_Extend # Mc BENGALI VOWEL SIGN AA -09C1..09C4 ; Grapheme_Extend # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR -09CD ; Grapheme_Extend # Mn BENGALI SIGN VIRAMA -09D7 ; Grapheme_Extend # Mc BENGALI AU LENGTH MARK -09E2..09E3 ; Grapheme_Extend # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL -0A01..0A02 ; Grapheme_Extend # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI -0A3C ; Grapheme_Extend # Mn GURMUKHI SIGN NUKTA -0A41..0A42 ; Grapheme_Extend # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU -0A47..0A48 ; Grapheme_Extend # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI -0A4B..0A4D ; Grapheme_Extend # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA -0A70..0A71 ; Grapheme_Extend # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK -0A81..0A82 ; Grapheme_Extend # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA -0ABC ; Grapheme_Extend # Mn GUJARATI SIGN NUKTA -0AC1..0AC5 ; Grapheme_Extend # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E -0AC7..0AC8 ; Grapheme_Extend # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI -0ACD ; Grapheme_Extend # Mn GUJARATI SIGN VIRAMA -0AE2..0AE3 ; Grapheme_Extend # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL -0B01 ; Grapheme_Extend # Mn ORIYA SIGN CANDRABINDU -0B3C ; Grapheme_Extend # Mn ORIYA SIGN NUKTA -0B3E ; Grapheme_Extend # Mc ORIYA VOWEL SIGN AA -0B3F ; Grapheme_Extend # Mn ORIYA VOWEL SIGN I -0B41..0B43 ; Grapheme_Extend # Mn [3] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC R -0B4D ; Grapheme_Extend # Mn ORIYA SIGN VIRAMA -0B56 ; Grapheme_Extend # Mn ORIYA AI LENGTH MARK -0B57 ; Grapheme_Extend # Mc ORIYA AU LENGTH MARK -0B82 ; Grapheme_Extend # Mn TAMIL SIGN ANUSVARA -0BBE ; Grapheme_Extend # Mc TAMIL VOWEL SIGN AA -0BC0 ; Grapheme_Extend # Mn TAMIL VOWEL SIGN II -0BCD ; Grapheme_Extend # Mn TAMIL SIGN VIRAMA -0BD7 ; Grapheme_Extend # Mc TAMIL AU LENGTH MARK -0C3E..0C40 ; Grapheme_Extend # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II -0C46..0C48 ; Grapheme_Extend # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI -0C4A..0C4D ; Grapheme_Extend # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA -0C55..0C56 ; Grapheme_Extend # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK -0CBC ; Grapheme_Extend # Mn KANNADA SIGN NUKTA -0CBF ; Grapheme_Extend # Mn KANNADA VOWEL SIGN I -0CC2 ; Grapheme_Extend # Mc KANNADA VOWEL SIGN UU -0CC6 ; Grapheme_Extend # Mn KANNADA VOWEL SIGN E -0CCC..0CCD ; Grapheme_Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA -0CD5..0CD6 ; Grapheme_Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CE2..0CE3 ; Grapheme_Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL -0D3E ; Grapheme_Extend # Mc MALAYALAM VOWEL SIGN AA -0D41..0D43 ; Grapheme_Extend # Mn [3] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC R -0D4D ; Grapheme_Extend # Mn MALAYALAM SIGN VIRAMA -0D57 ; Grapheme_Extend # Mc MALAYALAM AU LENGTH MARK -0DCA ; Grapheme_Extend # Mn SINHALA SIGN AL-LAKUNA -0DCF ; Grapheme_Extend # Mc SINHALA VOWEL SIGN AELA-PILLA -0DD2..0DD4 ; Grapheme_Extend # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA -0DD6 ; Grapheme_Extend # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA -0DDF ; Grapheme_Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA -0E31 ; Grapheme_Extend # Mn THAI CHARACTER MAI HAN-AKAT -0E34..0E3A ; Grapheme_Extend # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU -0E47..0E4E ; Grapheme_Extend # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN -0EB1 ; Grapheme_Extend # Mn LAO VOWEL SIGN MAI KAN -0EB4..0EB9 ; Grapheme_Extend # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU -0EBB..0EBC ; Grapheme_Extend # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO -0EC8..0ECD ; Grapheme_Extend # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA -0F18..0F19 ; Grapheme_Extend # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS -0F35 ; Grapheme_Extend # Mn TIBETAN MARK NGAS BZUNG NYI ZLA -0F37 ; Grapheme_Extend # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS -0F39 ; Grapheme_Extend # Mn TIBETAN MARK TSA -PHRU -0F71..0F7E ; Grapheme_Extend # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO -0F80..0F84 ; Grapheme_Extend # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA -0F86..0F87 ; Grapheme_Extend # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS -0F90..0F97 ; Grapheme_Extend # Mn [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA -0F99..0FBC ; Grapheme_Extend # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA -0FC6 ; Grapheme_Extend # Mn TIBETAN SYMBOL PADMA GDAN -102D..1030 ; Grapheme_Extend # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU -1032 ; Grapheme_Extend # Mn MYANMAR VOWEL SIGN AI -1036..1037 ; Grapheme_Extend # Mn [2] MYANMAR SIGN ANUSVARA..MYANMAR SIGN DOT BELOW -1039 ; Grapheme_Extend # Mn MYANMAR SIGN VIRAMA -1058..1059 ; Grapheme_Extend # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL -135F ; Grapheme_Extend # Mn ETHIOPIC COMBINING GEMINATION MARK -1712..1714 ; Grapheme_Extend # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA -1732..1734 ; Grapheme_Extend # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD -1752..1753 ; Grapheme_Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U -1772..1773 ; Grapheme_Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U -17B7..17BD ; Grapheme_Extend # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA -17C6 ; Grapheme_Extend # Mn KHMER SIGN NIKAHIT -17C9..17D3 ; Grapheme_Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT -17DD ; Grapheme_Extend # Mn KHMER SIGN ATTHACAN -180B..180D ; Grapheme_Extend # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE -18A9 ; Grapheme_Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA -1920..1922 ; Grapheme_Extend # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U -1927..1928 ; Grapheme_Extend # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O -1932 ; Grapheme_Extend # Mn LIMBU SMALL LETTER ANUSVARA -1939..193B ; Grapheme_Extend # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I -1A17..1A18 ; Grapheme_Extend # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U -1B00..1B03 ; Grapheme_Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG -1B34 ; Grapheme_Extend # Mn BALINESE SIGN REREKAN -1B36..1B3A ; Grapheme_Extend # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA -1B3C ; Grapheme_Extend # Mn BALINESE VOWEL SIGN LA LENGA -1B42 ; Grapheme_Extend # Mn BALINESE VOWEL SIGN PEPET -1B6B..1B73 ; Grapheme_Extend # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG -1DC0..1DCA ; Grapheme_Extend # Mn [11] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER R BELOW -1DFE..1DFF ; Grapheme_Extend # Mn [2] COMBINING LEFT ARROWHEAD ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW -200C..200D ; Grapheme_Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER -20D0..20DC ; Grapheme_Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE -20DD..20E0 ; Grapheme_Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH -20E1 ; Grapheme_Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE -20E2..20E4 ; Grapheme_Extend # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE -20E5..20EF ; Grapheme_Extend # Mn [11] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING RIGHT ARROW BELOW -302A..302F ; Grapheme_Extend # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK -3099..309A ; Grapheme_Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK -A806 ; Grapheme_Extend # Mn SYLOTI NAGRI SIGN HASANTA -A80B ; Grapheme_Extend # Mn SYLOTI NAGRI SIGN ANUSVARA -A825..A826 ; Grapheme_Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E -FB1E ; Grapheme_Extend # Mn HEBREW POINT JUDEO-SPANISH VARIKA -FE00..FE0F ; Grapheme_Extend # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 -FE20..FE23 ; Grapheme_Extend # Mn [4] COMBINING LIGATURE LEFT HALF..COMBINING DOUBLE TILDE RIGHT HALF -10A01..10A03 ; Grapheme_Extend # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R -10A05..10A06 ; Grapheme_Extend # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O -10A0C..10A0F ; Grapheme_Extend # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA -10A38..10A3A ; Grapheme_Extend # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW -10A3F ; Grapheme_Extend # Mn KHAROSHTHI VIRAMA -1D165 ; Grapheme_Extend # Mc MUSICAL SYMBOL COMBINING STEM -1D167..1D169 ; Grapheme_Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 -1D16E..1D172 ; Grapheme_Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 -1D17B..1D182 ; Grapheme_Extend # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE -1D185..1D18B ; Grapheme_Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE -1D1AA..1D1AD ; Grapheme_Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO -1D242..1D244 ; Grapheme_Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME -E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -END_OF_LIST - -$grapheme_extend = [] -$grapheme_extend_list.each do |entry| - if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/ - $1.hex.upto($2.hex) { |e2| $grapheme_extend << e2 } - elsif entry =~ /^[0-9A-F]+/ - $grapheme_extend << $&.hex - end -end - -$exclusions = <<END_OF_LIST -0958 # DEVANAGARI LETTER QA -0959 # DEVANAGARI LETTER KHHA -095A # DEVANAGARI LETTER GHHA -095B # DEVANAGARI LETTER ZA -095C # DEVANAGARI LETTER DDDHA -095D # DEVANAGARI LETTER RHA -095E # DEVANAGARI LETTER FA -095F # DEVANAGARI LETTER YYA -09DC # BENGALI LETTER RRA -09DD # BENGALI LETTER RHA -09DF # BENGALI LETTER YYA -0A33 # GURMUKHI LETTER LLA -0A36 # GURMUKHI LETTER SHA -0A59 # GURMUKHI LETTER KHHA -0A5A # GURMUKHI LETTER GHHA -0A5B # GURMUKHI LETTER ZA -0A5E # GURMUKHI LETTER FA -0B5C # ORIYA LETTER RRA -0B5D # ORIYA LETTER RHA -0F43 # TIBETAN LETTER GHA -0F4D # TIBETAN LETTER DDHA -0F52 # TIBETAN LETTER DHA -0F57 # TIBETAN LETTER BHA -0F5C # TIBETAN LETTER DZHA -0F69 # TIBETAN LETTER KSSA -0F76 # TIBETAN VOWEL SIGN VOCALIC R -0F78 # TIBETAN VOWEL SIGN VOCALIC L -0F93 # TIBETAN SUBJOINED LETTER GHA -0F9D # TIBETAN SUBJOINED LETTER DDHA -0FA2 # TIBETAN SUBJOINED LETTER DHA -0FA7 # TIBETAN SUBJOINED LETTER BHA -0FAC # TIBETAN SUBJOINED LETTER DZHA -0FB9 # TIBETAN SUBJOINED LETTER KSSA -FB1D # HEBREW LETTER YOD WITH HIRIQ -FB1F # HEBREW LIGATURE YIDDISH YOD YOD PATAH -FB2A # HEBREW LETTER SHIN WITH SHIN DOT -FB2B # HEBREW LETTER SHIN WITH SIN DOT -FB2C # HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT -FB2D # HEBREW LETTER SHIN WITH DAGESH AND SIN DOT -FB2E # HEBREW LETTER ALEF WITH PATAH -FB2F # HEBREW LETTER ALEF WITH QAMATS -FB30 # HEBREW LETTER ALEF WITH MAPIQ -FB31 # HEBREW LETTER BET WITH DAGESH -FB32 # HEBREW LETTER GIMEL WITH DAGESH -FB33 # HEBREW LETTER DALET WITH DAGESH -FB34 # HEBREW LETTER HE WITH MAPIQ -FB35 # HEBREW LETTER VAV WITH DAGESH -FB36 # HEBREW LETTER ZAYIN WITH DAGESH -FB38 # HEBREW LETTER TET WITH DAGESH -FB39 # HEBREW LETTER YOD WITH DAGESH -FB3A # HEBREW LETTER FINAL KAF WITH DAGESH -FB3B # HEBREW LETTER KAF WITH DAGESH -FB3C # HEBREW LETTER LAMED WITH DAGESH -FB3E # HEBREW LETTER MEM WITH DAGESH -FB40 # HEBREW LETTER NUN WITH DAGESH -FB41 # HEBREW LETTER SAMEKH WITH DAGESH -FB43 # HEBREW LETTER FINAL PE WITH DAGESH -FB44 # HEBREW LETTER PE WITH DAGESH -FB46 # HEBREW LETTER TSADI WITH DAGESH -FB47 # HEBREW LETTER QOF WITH DAGESH -FB48 # HEBREW LETTER RESH WITH DAGESH -FB49 # HEBREW LETTER SHIN WITH DAGESH -FB4A # HEBREW LETTER TAV WITH DAGESH -FB4B # HEBREW LETTER VAV WITH HOLAM -FB4C # HEBREW LETTER BET WITH RAFE -FB4D # HEBREW LETTER KAF WITH RAFE -FB4E # HEBREW LETTER PE WITH RAFE -END_OF_LIST -$exclusions = $exclusions.chomp.split("\n").collect { |e| e.hex } - -$excl_version = <<END_OF_LIST -2ADC # FORKING -1D15E # MUSICAL SYMBOL HALF NOTE -1D15F # MUSICAL SYMBOL QUARTER NOTE -1D160 # MUSICAL SYMBOL EIGHTH NOTE -1D161 # MUSICAL SYMBOL SIXTEENTH NOTE -1D162 # MUSICAL SYMBOL THIRTY-SECOND NOTE -1D163 # MUSICAL SYMBOL SIXTY-FOURTH NOTE -1D164 # MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE -1D1BB # MUSICAL SYMBOL MINIMA -1D1BC # MUSICAL SYMBOL MINIMA BLACK -1D1BD # MUSICAL SYMBOL SEMIMINIMA WHITE -1D1BE # MUSICAL SYMBOL SEMIMINIMA BLACK -1D1BF # MUSICAL SYMBOL FUSA WHITE -1D1C0 # MUSICAL SYMBOL FUSA BLACK -END_OF_LIST -$excl_version = $excl_version.chomp.split("\n").collect { |e| e.hex } - -$case_folding_string = <<END_OF_LIST -0041; C; 0061; # LATIN CAPITAL LETTER A -0042; C; 0062; # LATIN CAPITAL LETTER B -0043; C; 0063; # LATIN CAPITAL LETTER C -0044; C; 0064; # LATIN CAPITAL LETTER D -0045; C; 0065; # LATIN CAPITAL LETTER E -0046; C; 0066; # LATIN CAPITAL LETTER F -0047; C; 0067; # LATIN CAPITAL LETTER G -0048; C; 0068; # LATIN CAPITAL LETTER H -0049; C; 0069; # LATIN CAPITAL LETTER I -004A; C; 006A; # LATIN CAPITAL LETTER J -004B; C; 006B; # LATIN CAPITAL LETTER K -004C; C; 006C; # LATIN CAPITAL LETTER L -004D; C; 006D; # LATIN CAPITAL LETTER M -004E; C; 006E; # LATIN CAPITAL LETTER N -004F; C; 006F; # LATIN CAPITAL LETTER O -0050; C; 0070; # LATIN CAPITAL LETTER P -0051; C; 0071; # LATIN CAPITAL LETTER Q -0052; C; 0072; # LATIN CAPITAL LETTER R -0053; C; 0073; # LATIN CAPITAL LETTER S -0054; C; 0074; # LATIN CAPITAL LETTER T -0055; C; 0075; # LATIN CAPITAL LETTER U -0056; C; 0076; # LATIN CAPITAL LETTER V -0057; C; 0077; # LATIN CAPITAL LETTER W -0058; C; 0078; # LATIN CAPITAL LETTER X -0059; C; 0079; # LATIN CAPITAL LETTER Y -005A; C; 007A; # LATIN CAPITAL LETTER Z -00B5; C; 03BC; # MICRO SIGN -00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE -00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE -00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX -00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE -00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS -00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE -00C6; C; 00E6; # LATIN CAPITAL LETTER AE -00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA -00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE -00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE -00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX -00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS -00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE -00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE -00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX -00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS -00D0; C; 00F0; # LATIN CAPITAL LETTER ETH -00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE -00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE -00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE -00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX -00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE -00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS -00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE -00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE -00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE -00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX -00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS -00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE -00DE; C; 00FE; # LATIN CAPITAL LETTER THORN -00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S -0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON -0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE -0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK -0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE -0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX -010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE -010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON -010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON -0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE -0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON -0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE -0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE -0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK -011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON -011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX -011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE -0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE -0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA -0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX -0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE -0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE -012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON -012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE -012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK -0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE -0132; C; 0133; # LATIN CAPITAL LIGATURE IJ -0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX -0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA -0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE -013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA -013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON -013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT -0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE -0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE -0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA -0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON -0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE -014A; C; 014B; # LATIN CAPITAL LETTER ENG -014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON -014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE -0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE -0152; C; 0153; # LATIN CAPITAL LIGATURE OE -0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE -0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA -0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON -015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE -015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX -015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA -0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON -0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA -0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON -0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE -0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE -016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON -016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE -016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE -0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE -0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK -0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX -0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX -0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS -0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE -017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE -017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON -017F; C; 0073; # LATIN SMALL LETTER LONG S -0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK -0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR -0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX -0186; C; 0254; # LATIN CAPITAL LETTER OPEN O -0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK -0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D -018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK -018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR -018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E -018F; C; 0259; # LATIN CAPITAL LETTER SCHWA -0190; C; 025B; # LATIN CAPITAL LETTER OPEN E -0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK -0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK -0194; C; 0263; # LATIN CAPITAL LETTER GAMMA -0196; C; 0269; # LATIN CAPITAL LETTER IOTA -0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE -0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK -019C; C; 026F; # LATIN CAPITAL LETTER TURNED M -019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK -019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE -01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN -01A2; C; 01A3; # LATIN CAPITAL LETTER OI -01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK -01A6; C; 0280; # LATIN LETTER YR -01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO -01A9; C; 0283; # LATIN CAPITAL LETTER ESH -01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK -01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK -01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN -01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON -01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK -01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK -01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE -01B7; C; 0292; # LATIN CAPITAL LETTER EZH -01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED -01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE -01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON -01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON -01C7; C; 01C9; # LATIN CAPITAL LETTER LJ -01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J -01CA; C; 01CC; # LATIN CAPITAL LETTER NJ -01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J -01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON -01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON -01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON -01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON -01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON -01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE -01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON -01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE -01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON -01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON -01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON -01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE -01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON -01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON -01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK -01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON -01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON -01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON -01F1; C; 01F3; # LATIN CAPITAL LETTER DZ -01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z -01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE -01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR -01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN -01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE -01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE -01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE -01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE -0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE -0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE -0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE -0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE -0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE -020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE -020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE -020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE -0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE -0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE -0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE -0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE -0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW -021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW -021C; C; 021D; # LATIN CAPITAL LETTER YOGH -021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON -0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG -0222; C; 0223; # LATIN CAPITAL LETTER OU -0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK -0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE -0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA -022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON -022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON -022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE -0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON -0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON -023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE -023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE -023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR -023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE -0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP -0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE -0244; C; 0289; # LATIN CAPITAL LETTER U BAR -0245; C; 028C; # LATIN CAPITAL LETTER TURNED V -0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE -0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE -024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL -024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE -024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE -0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI -0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS -0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS -0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS -038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS -038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS -038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS -038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS -0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS -0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA -0392; C; 03B2; # GREEK CAPITAL LETTER BETA -0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA -0394; C; 03B4; # GREEK CAPITAL LETTER DELTA -0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON -0396; C; 03B6; # GREEK CAPITAL LETTER ZETA -0397; C; 03B7; # GREEK CAPITAL LETTER ETA -0398; C; 03B8; # GREEK CAPITAL LETTER THETA -0399; C; 03B9; # GREEK CAPITAL LETTER IOTA -039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA -039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA -039C; C; 03BC; # GREEK CAPITAL LETTER MU -039D; C; 03BD; # GREEK CAPITAL LETTER NU -039E; C; 03BE; # GREEK CAPITAL LETTER XI -039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON -03A0; C; 03C0; # GREEK CAPITAL LETTER PI -03A1; C; 03C1; # GREEK CAPITAL LETTER RHO -03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA -03A4; C; 03C4; # GREEK CAPITAL LETTER TAU -03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON -03A6; C; 03C6; # GREEK CAPITAL LETTER PHI -03A7; C; 03C7; # GREEK CAPITAL LETTER CHI -03A8; C; 03C8; # GREEK CAPITAL LETTER PSI -03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA -03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA -03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA -03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS -03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA -03D0; C; 03B2; # GREEK BETA SYMBOL -03D1; C; 03B8; # GREEK THETA SYMBOL -03D5; C; 03C6; # GREEK PHI SYMBOL -03D6; C; 03C0; # GREEK PI SYMBOL -03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA -03DA; C; 03DB; # GREEK LETTER STIGMA -03DC; C; 03DD; # GREEK LETTER DIGAMMA -03DE; C; 03DF; # GREEK LETTER KOPPA -03E0; C; 03E1; # GREEK LETTER SAMPI -03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI -03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI -03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI -03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI -03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA -03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA -03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI -03F0; C; 03BA; # GREEK KAPPA SYMBOL -03F1; C; 03C1; # GREEK RHO SYMBOL -03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL -03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL -03F7; C; 03F8; # GREEK CAPITAL LETTER SHO -03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL -03FA; C; 03FB; # GREEK CAPITAL LETTER SAN -03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL -03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL -03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL -0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE -0401; C; 0451; # CYRILLIC CAPITAL LETTER IO -0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE -0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE -0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE -0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE -0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I -0407; C; 0457; # CYRILLIC CAPITAL LETTER YI -0408; C; 0458; # CYRILLIC CAPITAL LETTER JE -0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE -040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE -040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE -040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE -040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE -040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U -040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE -0410; C; 0430; # CYRILLIC CAPITAL LETTER A -0411; C; 0431; # CYRILLIC CAPITAL LETTER BE -0412; C; 0432; # CYRILLIC CAPITAL LETTER VE -0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE -0414; C; 0434; # CYRILLIC CAPITAL LETTER DE -0415; C; 0435; # CYRILLIC CAPITAL LETTER IE -0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE -0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE -0418; C; 0438; # CYRILLIC CAPITAL LETTER I -0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I -041A; C; 043A; # CYRILLIC CAPITAL LETTER KA -041B; C; 043B; # CYRILLIC CAPITAL LETTER EL -041C; C; 043C; # CYRILLIC CAPITAL LETTER EM -041D; C; 043D; # CYRILLIC CAPITAL LETTER EN -041E; C; 043E; # CYRILLIC CAPITAL LETTER O -041F; C; 043F; # CYRILLIC CAPITAL LETTER PE -0420; C; 0440; # CYRILLIC CAPITAL LETTER ER -0421; C; 0441; # CYRILLIC CAPITAL LETTER ES -0422; C; 0442; # CYRILLIC CAPITAL LETTER TE -0423; C; 0443; # CYRILLIC CAPITAL LETTER U -0424; C; 0444; # CYRILLIC CAPITAL LETTER EF -0425; C; 0445; # CYRILLIC CAPITAL LETTER HA -0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE -0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE -0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA -0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA -042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN -042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU -042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN -042D; C; 044D; # CYRILLIC CAPITAL LETTER E -042E; C; 044E; # CYRILLIC CAPITAL LETTER YU -042F; C; 044F; # CYRILLIC CAPITAL LETTER YA -0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA -0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT -0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E -0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS -0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS -046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS -046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS -046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI -0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI -0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA -0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA -0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT -0478; C; 0479; # CYRILLIC CAPITAL LETTER UK -047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA -047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO -047E; C; 047F; # CYRILLIC CAPITAL LETTER OT -0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA -048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL -048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN -048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK -0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN -0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE -0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK -0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER -0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER -049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER -049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE -049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE -04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA -04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER -04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE -04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK -04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA -04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER -04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER -04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U -04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE -04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER -04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE -04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER -04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE -04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA -04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE -04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER -04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA -04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE -04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK -04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL -04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK -04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL -04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE -04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL -04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE -04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS -04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE -04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE -04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA -04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS -04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS -04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS -04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE -04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON -04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS -04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS -04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O -04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS -04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS -04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON -04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS -04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE -04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS -04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER -04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS -04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK -04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK -04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE -0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE -0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE -0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE -0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE -0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE -050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE -050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE -050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE -0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE -0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK -0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB -0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN -0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM -0534; C; 0564; # ARMENIAN CAPITAL LETTER DA -0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH -0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA -0537; C; 0567; # ARMENIAN CAPITAL LETTER EH -0538; C; 0568; # ARMENIAN CAPITAL LETTER ET -0539; C; 0569; # ARMENIAN CAPITAL LETTER TO -053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE -053B; C; 056B; # ARMENIAN CAPITAL LETTER INI -053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN -053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH -053E; C; 056E; # ARMENIAN CAPITAL LETTER CA -053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN -0540; C; 0570; # ARMENIAN CAPITAL LETTER HO -0541; C; 0571; # ARMENIAN CAPITAL LETTER JA -0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD -0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH -0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN -0545; C; 0575; # ARMENIAN CAPITAL LETTER YI -0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW -0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA -0548; C; 0578; # ARMENIAN CAPITAL LETTER VO -0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA -054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH -054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH -054C; C; 057C; # ARMENIAN CAPITAL LETTER RA -054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH -054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW -054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN -0550; C; 0580; # ARMENIAN CAPITAL LETTER REH -0551; C; 0581; # ARMENIAN CAPITAL LETTER CO -0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN -0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR -0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH -0555; C; 0585; # ARMENIAN CAPITAL LETTER OH -0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH -0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN -10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN -10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN -10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN -10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON -10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN -10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN -10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN -10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN -10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN -10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN -10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS -10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN -10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR -10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON -10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR -10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR -10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE -10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN -10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR -10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN -10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR -10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR -10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN -10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR -10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN -10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN -10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN -10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL -10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL -10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR -10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN -10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN -10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE -10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE -10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE -10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE -10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR -10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE -1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW -1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE -1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW -1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW -1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE -1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE -1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW -1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW -1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA -1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW -1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE -1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE -1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW -1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW -1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE -1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE -1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON -1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE -1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW -1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS -1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA -1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW -1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW -1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE -1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE -1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW -1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW -1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW -1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON -1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW -1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW -1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE -1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE -1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW -1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE -1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW -1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW -1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW -1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE -1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS -1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE -1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE -1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE -1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE -1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE -1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW -1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON -1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW -1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE -1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW -1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE -1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE -1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE -1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE -1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW -1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW -1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW -1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW -1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW -1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW -1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE -1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS -1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE -1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW -1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE -1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE -1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS -1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE -1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW -1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE -1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS -1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE -1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX -1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW -1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW -1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW -1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS -1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE -1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE -1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING -1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE -1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW -1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE -1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE -1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE -1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE -1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE -1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW -1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE -1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE -1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE -1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE -1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW -1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW -1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE -1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE -1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE -1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE -1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE -1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE -1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW -1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE -1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW -1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW -1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE -1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE -1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE -1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE -1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE -1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW -1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE -1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE -1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE -1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE -1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW -1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW -1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE -1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE -1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE -1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE -1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE -1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW -1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE -1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW -1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE -1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE -1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI -1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA -1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA -1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA -1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA -1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA -1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI -1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI -1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI -1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA -1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA -1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA -1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA -1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA -1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI -1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA -1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA -1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA -1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA -1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA -1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI -1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI -1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI -1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA -1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA -1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA -1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA -1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA -1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI -1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI -1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI -1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA -1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA -1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA -1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA -1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA -1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI -1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA -1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA -1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI -1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA -1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA -1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA -1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI -1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI -1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA -1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA -1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA -1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA -1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA -1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI -1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI -1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI -1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI -1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI -1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI -1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI -1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI -1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI -1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI -1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI -1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI -1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI -1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI -1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI -1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI -1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI -1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI -1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI -1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI -1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI -1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI -1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI -1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI -1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI -1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI -1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI -1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI -1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI -1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI -1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI -1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI -1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI -1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI -1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI -1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI -1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI -1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI -1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI -1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI -1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI -1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI -1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI -1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI -1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI -1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI -1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI -1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI -1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI -1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI -1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI -1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI -1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI -1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI -1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI -1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI -1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI -1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI -1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI -1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI -1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI -1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI -1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI -1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI -1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI -1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI -1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI -1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI -1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI -1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI -1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI -1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI -1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI -1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI -1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI -1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI -1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI -1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI -1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI -1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY -1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON -1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA -1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA -1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI -1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI -1FBE; C; 03B9; # GREEK PROSGEGRAMMENI -1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI -1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI -1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI -1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI -1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI -1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA -1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA -1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA -1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA -1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI -1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI -1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA -1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA -1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI -1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI -1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY -1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON -1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA -1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA -1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA -1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA -1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI -1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI -1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI -1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY -1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON -1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA -1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA -1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA -1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI -1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI -1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI -1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI -1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI -1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA -1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA -1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA -1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA -1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI -1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI -2126; C; 03C9; # OHM SIGN -212A; C; 006B; # KELVIN SIGN -212B; C; 00E5; # ANGSTROM SIGN -2132; C; 214E; # TURNED CAPITAL F -2160; C; 2170; # ROMAN NUMERAL ONE -2161; C; 2171; # ROMAN NUMERAL TWO -2162; C; 2172; # ROMAN NUMERAL THREE -2163; C; 2173; # ROMAN NUMERAL FOUR -2164; C; 2174; # ROMAN NUMERAL FIVE -2165; C; 2175; # ROMAN NUMERAL SIX -2166; C; 2176; # ROMAN NUMERAL SEVEN -2167; C; 2177; # ROMAN NUMERAL EIGHT -2168; C; 2178; # ROMAN NUMERAL NINE -2169; C; 2179; # ROMAN NUMERAL TEN -216A; C; 217A; # ROMAN NUMERAL ELEVEN -216B; C; 217B; # ROMAN NUMERAL TWELVE -216C; C; 217C; # ROMAN NUMERAL FIFTY -216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED -216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED -216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND -2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED -24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A -24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B -24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C -24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D -24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E -24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F -24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G -24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H -24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I -24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J -24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K -24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L -24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M -24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N -24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O -24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P -24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q -24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R -24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S -24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T -24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U -24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V -24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W -24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X -24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y -24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z -2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU -2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY -2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE -2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI -2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO -2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU -2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE -2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO -2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA -2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE -2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE -2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I -2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI -2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO -2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE -2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE -2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI -2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU -2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI -2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI -2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO -2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO -2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU -2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU -2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU -2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU -2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE -2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA -2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI -2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI -2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA -2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU -2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI -2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI -2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA -2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU -2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS -2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL -2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO -2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS -2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS -2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS -2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA -2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA -2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC -2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A -2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE -2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR -2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE -2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE -2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL -2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER -2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER -2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER -2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H -2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA -2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA -2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA -2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA -2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE -2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU -2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA -2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE -2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE -2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA -2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA -2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA -2C98; C; 2C99; # COPTIC CAPITAL LETTER MI -2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI -2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI -2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O -2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI -2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO -2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA -2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU -2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA -2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI -2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI -2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI -2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU -2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF -2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN -2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE -2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA -2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI -2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI -2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU -2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI -2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI -2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI -2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH -2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI -2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI -2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI -2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA -2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA -2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI -2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT -2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA -2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA -2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA -2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA -2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI -2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI -2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU -FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF -FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI -FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL -FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI -FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL -FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T -FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST -FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW -FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH -FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI -FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW -FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH -FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A -FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B -FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C -FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D -FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E -FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F -FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G -FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H -FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I -FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J -FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K -FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L -FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M -FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N -FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O -FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P -FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q -FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R -FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S -FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T -FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U -FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V -FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W -FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X -FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y -FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z -10400; C; 10428; # DESERET CAPITAL LETTER LONG I -10401; C; 10429; # DESERET CAPITAL LETTER LONG E -10402; C; 1042A; # DESERET CAPITAL LETTER LONG A -10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH -10404; C; 1042C; # DESERET CAPITAL LETTER LONG O -10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO -10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I -10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E -10408; C; 10430; # DESERET CAPITAL LETTER SHORT A -10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH -1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O -1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO -1040C; C; 10434; # DESERET CAPITAL LETTER AY -1040D; C; 10435; # DESERET CAPITAL LETTER OW -1040E; C; 10436; # DESERET CAPITAL LETTER WU -1040F; C; 10437; # DESERET CAPITAL LETTER YEE -10410; C; 10438; # DESERET CAPITAL LETTER H -10411; C; 10439; # DESERET CAPITAL LETTER PEE -10412; C; 1043A; # DESERET CAPITAL LETTER BEE -10413; C; 1043B; # DESERET CAPITAL LETTER TEE -10414; C; 1043C; # DESERET CAPITAL LETTER DEE -10415; C; 1043D; # DESERET CAPITAL LETTER CHEE -10416; C; 1043E; # DESERET CAPITAL LETTER JEE -10417; C; 1043F; # DESERET CAPITAL LETTER KAY -10418; C; 10440; # DESERET CAPITAL LETTER GAY -10419; C; 10441; # DESERET CAPITAL LETTER EF -1041A; C; 10442; # DESERET CAPITAL LETTER VEE -1041B; C; 10443; # DESERET CAPITAL LETTER ETH -1041C; C; 10444; # DESERET CAPITAL LETTER THEE -1041D; C; 10445; # DESERET CAPITAL LETTER ES -1041E; C; 10446; # DESERET CAPITAL LETTER ZEE -1041F; C; 10447; # DESERET CAPITAL LETTER ESH -10420; C; 10448; # DESERET CAPITAL LETTER ZHEE -10421; C; 10449; # DESERET CAPITAL LETTER ER -10422; C; 1044A; # DESERET CAPITAL LETTER EL -10423; C; 1044B; # DESERET CAPITAL LETTER EM -10424; C; 1044C; # DESERET CAPITAL LETTER EN -10425; C; 1044D; # DESERET CAPITAL LETTER ENG -10426; C; 1044E; # DESERET CAPITAL LETTER OI -10427; C; 1044F; # DESERET CAPITAL LETTER EW -END_OF_LIST - -$case_folding = {} -$case_folding_string.chomp.split("\n").each do |line| - next unless line =~ /([0-9A-F]+); [CFS]; ([0-9A-F ]+);/i - $case_folding[$1.hex] = $2.split(" ").collect { |e| e.hex } -end - -$int_array = [] -$int_array_indicies = {} - -def str2c(string, prefix) - return "0" if string.nil? - return "UTF8PROC_#{prefix}_#{string.upcase}" -end -def ary2c(array) - return "NULL" if array.nil? - unless $int_array_indicies[array] - $int_array_indicies[array] = $int_array.length - array.each { |entry| $int_array << entry } - $int_array << -1 - end - return "utf8proc_sequences + #{$int_array_indicies[array]}" -end - -class UnicodeChar - attr_accessor :code, :name, :category, :combining_class, :bidi_class, - :decomp_type, :decomp_mapping, - :bidi_mirrored, - :uppercase_mapping, :lowercase_mapping, :titlecase_mapping - def initialize(line) - raise "Could not parse input." unless line =~ /^ - ([0-9A-F]+); # code - ([^;]+); # name - ([A-Z]+); # general category - ([0-9]+); # canonical combining class - ([A-Z]+); # bidi class - (<([A-Z]*)>)? # decomposition type - ((\ ?[0-9A-F]+)*); # decompomposition mapping - ([0-9]*); # decimal digit - ([0-9]*); # digit - ([^;]*); # numeric - ([YN]*); # bidi mirrored - ([^;]*); # unicode 1.0 name - ([^;]*); # iso comment - ([0-9A-F]*); # simple uppercase mapping - ([0-9A-F]*); # simple lowercase mapping - ([0-9A-F]*)$/ix # simple titlecase mapping - @code = $1.hex - @name = $2 - @category = $3 - @combining_class = Integer($4) - @bidi_class = $5 - @decomp_type = $7 - @decomp_mapping = ($8=='') ? nil : - $8.split.collect { |element| element.hex } - @bidi_mirrored = ($13=='Y') ? true : false - @uppercase_mapping = ($16=='') ? nil : $16.hex - @lowercase_mapping = ($17=='') ? nil : $17.hex - @titlecase_mapping = ($18=='') ? nil : $18.hex - end - def case_folding - $case_folding[code] - end - def c_entry(comb1_indicies, comb2_indicies) - " " << - "{#{str2c category, 'CATEGORY'}, #{combining_class}, " << - "#{str2c bidi_class, 'BIDI_CLASS'}, " << - "#{str2c decomp_type, 'DECOMP_TYPE'}, " << - "#{ary2c decomp_mapping}, " << - "#{bidi_mirrored}, " << - "#{uppercase_mapping or -1}, " << - "#{lowercase_mapping or -1}, " << - "#{titlecase_mapping or -1}, " << - "#{comb1_indicies[code] ? - (comb1_indicies[code]*comb2_indicies.keys.length) : -1 - }, #{comb2_indicies[code] or -1}, " << - "#{$exclusions.include?(code) or $excl_version.include?(code)}, " << - "#{$ignorable.include?(code)}, " << - "#{%W[Zl Zp Cc Cf].include?(category) and not [0x200C, 0x200D].include?(category)}, " << - "#{$grapheme_extend.include?(code)}, " << - "#{ary2c case_folding}},\n" - end -end - -chars = [] -char_hash = {} - -while gets - if $_ =~ /^([0-9A-F]+);<[^;>,]+, First>;/i - first = $1.hex - gets - char = UnicodeChar.new($_) - raise "No last character of sequence found." unless - $_ =~ /^([0-9A-F]+);<([^;>,]+), Last>;/i - last = $1.hex - name = "<#{$2}>" - for i in first..last - char_clone = char.clone - char_clone.code = i - char_clone.name = name - char_hash[char_clone.code] = char_clone - chars << char_clone - end - else - char = UnicodeChar.new($_) - char_hash[char.code] = char - chars << char - end -end - -comb1st_indicies = {} -comb2nd_indicies = {} -comb_array = [] - -chars.each do |char| - if char.decomp_type.nil? and char.decomp_mapping and - char.decomp_mapping.length == 2 and - char_hash[char.decomp_mapping[0]].combining_class == 0 and - not $exclusions.include?(char.code) - unless comb1st_indicies[char.decomp_mapping[0]] - comb1st_indicies[char.decomp_mapping[0]] = comb1st_indicies.keys.length - end - unless comb2nd_indicies[char.decomp_mapping[1]] - comb2nd_indicies[char.decomp_mapping[1]] = comb2nd_indicies.keys.length - end - comb_array[comb1st_indicies[char.decomp_mapping[0]]] ||= [] - raise "Duplicate canonical mapping" if - comb_array[comb1st_indicies[char.decomp_mapping[0]]][ - comb2nd_indicies[char.decomp_mapping[1]]] - comb_array[comb1st_indicies[char.decomp_mapping[0]]][ - comb2nd_indicies[char.decomp_mapping[1]]] = char.code - end -end - -properties_indicies = {} -properties = [] -chars.each do |char| - c_entry = char.c_entry(comb1st_indicies, comb2nd_indicies) - unless properties_indicies[c_entry] - properties_indicies[c_entry] = properties.length - properties << c_entry - end -end - -stage1 = [] -stage2 = [] -for code in 0...0x110000 - next unless code % 0x100 == 0 - stage2_entry = [] - for code2 in code...(code+0x100) - if char_hash[code2] - stage2_entry << (properties_indicies[char_hash[code2].c_entry( - comb1st_indicies, comb2nd_indicies)] + 1) - else - stage2_entry << 0 - end - end - old_index = stage2.index(stage2_entry) - if old_index - stage1 << (old_index * 0x100) - else - stage1 << (stage2.length * 0x100) - stage2 << stage2_entry - end -end - -$stdout << "const int32_t utf8proc_sequences[] = {\n " -i = 0 -$int_array.each do |entry| - i += 1 - if i == 8 - i = 0 - $stdout << "\n " - end - $stdout << entry << ", " -end -$stdout << "};\n\n" - -$stdout << "const uint16_t utf8proc_stage1table[] = {\n " -i = 0 -stage1.each do |entry| - i += 1 - if i == 8 - i = 0 - $stdout << "\n " - end - $stdout << entry << ", " -end -$stdout << "};\n\n" - -$stdout << "const uint16_t utf8proc_stage2table[] = {\n " -i = 0 -stage2.flatten.each do |entry| - i += 1 - if i == 8 - i = 0 - $stdout << "\n " - end - $stdout << entry << ", " -end -$stdout << "};\n\n" - -$stdout << "const utf8proc_property_t utf8proc_properties[] = {\n" -$stdout << " {0, 0, 0, 0, NULL, false, -1, -1, -1, -1, -1, false},\n" -properties.each { |line| - $stdout << line -} -$stdout << "};\n\n" - -$stdout << "const int32_t utf8proc_combinations[] = {\n " -i = 0 -comb1st_indicies.keys.each_index do |a| - comb2nd_indicies.keys.each_index do |b| - i += 1 - if i == 8 - i = 0 - $stdout << "\n " - end - $stdout << ( comb_array[a][b] or -1 ) << ", " - end -end -$stdout << "};\n\n" - diff --git a/utf8proc.h b/include/libutf8proc/utf8proc.h index 24a891b..24a891b 100644 --- a/utf8proc.h +++ b/include/libutf8proc/utf8proc.h diff --git a/libutf8proc.pc.in b/libutf8proc.pc.in new file mode 100644 index 0000000..2155841 --- /dev/null +++ b/libutf8proc.pc.in @@ -0,0 +1,10 @@ +prefix=PREFIX +exec_prefix=${prefix} +libdir=${exec_prefix}/LIBDIR +includedir=${prefix}/include + +Name: libutf8proc +Description: UTF8 processing +Version: VERSION +Libs: -L${libdir} -lutf8proc +Cflags: -I${includedir} diff --git a/lump.txt b/lump.txt deleted file mode 100644 index 442cafb..0000000 --- a/lump.txt +++ /dev/null @@ -1,26 +0,0 @@ -U+0020 <-- all space characters (general category Zs) -U+0027 ' <-- left/right single quotation mark U+2018..2019, - modifier letter apostrophe U+02BC, - modifier letter vertical line U+02C8 -U+002D - <-- all dash characters (general category Pd), - minus U+2212 -U+002F / <-- fraction slash U+2044, - division slash U+2215 -U+003A : <-- ratio U+2236 -U+003C < <-- single left-pointing angle quotation mark U+2039, - left-pointing angle bracket U+2329, - left angle bracket U+3008 -U+003E > <-- single right-pointing angle quotation mark U+203A, - right-pointing angle bracket U+232A, - right angle bracket U+3009 -U+005C \ <-- set minus U+2216 -U+005E ^ <-- modifier letter up arrowhead U+02C4, - modifier letter circumflex accent U+02C6, - caret U+2038, - up arrowhead U+2303 -U+005F _ <-- all connector characters (general category Pc), - modifier letter low macron U+02CD -U+0060 ` <-- modifier letter grave accent U+02CB -U+007C | <-- divides U+2223 -U+007E ~ <-- tilde operator U+223C - diff --git a/pgsql/Makefile b/pgsql/Makefile deleted file mode 100644 index f2888f0..0000000 --- a/pgsql/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -utf8proc_pgsql.so: utf8proc_pgsql.o - ld -shared -o utf8proc_pgsql.so utf8proc_pgsql.o - -utf8proc_pgsql.o: utf8proc_pgsql.c - gcc -Wall -fpic -c -I`pg_config --includedir-server` \ - -o utf8proc_pgsql.o utf8proc_pgsql.c - -clean: - rm -f *.o *.so - diff --git a/pgsql/utf8proc.sql b/pgsql/utf8proc.sql deleted file mode 100644 index 605c646..0000000 --- a/pgsql/utf8proc.sql +++ /dev/null @@ -1,6 +0,0 @@ -CREATE OR REPLACE FUNCTION unifold (text) RETURNS text - LANGUAGE 'c' IMMUTABLE STRICT AS '$libdir/utf8proc_pgsql.so', - 'utf8proc_pgsql_unifold'; -CREATE OR REPLACE FUNCTION unistrip (text) RETURNS text - LANGUAGE 'c' IMMUTABLE STRICT AS '$libdir/utf8proc_pgsql.so', - 'utf8proc_pgsql_unistrip'; diff --git a/pgsql/utf8proc_pgsql.c b/pgsql/utf8proc_pgsql.c deleted file mode 100644 index b33795a..0000000 --- a/pgsql/utf8proc_pgsql.c +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) Public Software Group e. V., Berlin, Germany - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - - -/* - * File name: pgsql/utf8proc_pgsql.c - * - * Description: - * PostgreSQL extension to provide two functions 'unifold' and 'unistrip', - * which can be used to case-fold and normalize index fields and - * optionally strip marks (e.g. accents) from strings. - */ - - -#include "../utf8proc.c" - -#include <postgres.h> -#include <utils/elog.h> -#include <fmgr.h> -#include <string.h> -#include <unistd.h> -#include <utils/builtins.h> - -#ifdef PG_MODULE_MAGIC -PG_MODULE_MAGIC; -#endif - -#define UTF8PROC_PGSQL_FOLD_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \ - UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \ - UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP ) -#define UTF8PROC_PGSQL_STRIP_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \ - UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \ - UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP | UTF8PROC_STRIPMARK ) - -ssize_t utf8proc_pgsql_utf8map( - text *input_string, text **output_string_ptr, int options -) { - ssize_t result; - text *output_string; - result = utf8proc_decompose( - VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ, - NULL, 0, options - ); - if (result < 0) return result; - if (result > (SIZE_MAX-1-VARHDRSZ)/sizeof(int32_t)) - return UTF8PROC_ERROR_OVERFLOW; - /* reserve one extra byte for termination */ - *output_string_ptr = palloc(result * sizeof(int32_t) + 1 + VARHDRSZ); - output_string = *output_string_ptr; - if (!output_string) return UTF8PROC_ERROR_NOMEM; - result = utf8proc_decompose( - VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ, - (int32_t *)VARDATA(output_string), result, options - ); - if (result < 0) return result; - result = utf8proc_reencode( - (int32_t *)VARDATA(output_string), result, options - ); - if (result >= 0) SET_VARSIZE(output_string, result + VARHDRSZ); - return result; -} - -void utf8proc_pgsql_utf8map_errchk(ssize_t result, text *output_string) { - if (result < 0) { - int sqlerrcode; - if (output_string) pfree(output_string); - switch(result) { - case UTF8PROC_ERROR_NOMEM: - sqlerrcode = ERRCODE_OUT_OF_MEMORY; break; - case UTF8PROC_ERROR_OVERFLOW: - sqlerrcode = ERRCODE_PROGRAM_LIMIT_EXCEEDED; break; - case UTF8PROC_ERROR_INVALIDUTF8: - case UTF8PROC_ERROR_NOTASSIGNED: - return; - default: - sqlerrcode = ERRCODE_INTERNAL_ERROR; - } - ereport(ERROR, ( - errcode(sqlerrcode), - errmsg("%s", utf8proc_errmsg(result)) - )); - } -} - -PG_FUNCTION_INFO_V1(utf8proc_pgsql_unifold); -Datum utf8proc_pgsql_unifold(PG_FUNCTION_ARGS) { - text *input_string; - text *output_string = NULL; - ssize_t result; - input_string = PG_GETARG_TEXT_P(0); - result = utf8proc_pgsql_utf8map( - input_string, &output_string, UTF8PROC_PGSQL_FOLD_OPTS - ); - PG_FREE_IF_COPY(input_string, 0); - utf8proc_pgsql_utf8map_errchk(result, output_string); - if (result >= 0) { - PG_RETURN_TEXT_P(output_string); - } else { - PG_RETURN_NULL(); - } -} - -PG_FUNCTION_INFO_V1(utf8proc_pgsql_unistrip); -Datum utf8proc_pgsql_unistrip(PG_FUNCTION_ARGS) { - text *input_string; - text *output_string = NULL; - ssize_t result; - input_string = PG_GETARG_TEXT_P(0); - result = utf8proc_pgsql_utf8map( - input_string, &output_string, UTF8PROC_PGSQL_STRIP_OPTS - ); - PG_FREE_IF_COPY(input_string, 0); - utf8proc_pgsql_utf8map_errchk(result, output_string); - if (result >= 0) { - PG_RETURN_TEXT_P(output_string); - } else { - PG_RETURN_NULL(); - } -} - diff --git a/ruby/extconf.rb b/ruby/extconf.rb deleted file mode 100644 index 6dbb095..0000000 --- a/ruby/extconf.rb +++ /dev/null @@ -1,2 +0,0 @@ -require 'mkmf' -create_makefile("utf8proc_native") diff --git a/ruby/gem/LICENSE b/ruby/gem/LICENSE deleted file mode 100644 index 504e4c5..0000000 --- a/ruby/gem/LICENSE +++ /dev/null @@ -1,64 +0,0 @@ - -Copyright (c) 2009 Public Software Group e. V., Berlin, Germany - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the "Software"), -to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, -and/or sell copies of the Software, and to permit persons to whom the -Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. - - -This software distribution contains derived data from a modified version of -the Unicode data files. The following license applies to that data: - -COPYRIGHT AND PERMISSION NOTICE - -Copyright (c) 1991-2007 Unicode, Inc. All rights reserved. Distributed -under the Terms of Use in http://www.unicode.org/copyright.html. - -Permission is hereby granted, free of charge, to any person obtaining a -copy of the Unicode data files and any associated documentation (the "Data -Files") or Unicode software and any associated documentation (the -"Software") to deal in the Data Files or Software without restriction, -including without limitation the rights to use, copy, modify, merge, -publish, distribute, and/or sell copies of the Data Files or Software, and -to permit persons to whom the Data Files or Software are furnished to do -so, provided that (a) the above copyright notice(s) and this permission -notice appear with all copies of the Data Files or Software, (b) both the -above copyright notice(s) and this permission notice appear in associated -documentation, and (c) there is clear notice in each modified Data File or -in the Software as well as in the documentation associated with the Data -File(s) or Software that the data or software has been modified. - -THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY -KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF -THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS -INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR -CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF -USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER -TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -PERFORMANCE OF THE DATA FILES OR SOFTWARE. - -Except as contained in this notice, the name of a copyright holder shall -not be used in advertising or otherwise to promote the sale, use or other -dealings in these Data Files or Software without prior written -authorization of the copyright holder. - - -Unicode and the Unicode logo are trademarks of Unicode, Inc., and may be -registered in some jurisdictions. All other trademarks and registered -trademarks mentioned herein are the property of their respective owners. - diff --git a/ruby/gem/utf8proc.gemspec b/ruby/gem/utf8proc.gemspec deleted file mode 100644 index 09f74dc..0000000 --- a/ruby/gem/utf8proc.gemspec +++ /dev/null @@ -1,12 +0,0 @@ -require 'rubygems' -SPEC = Gem::Specification.new do |s| - s.name = 'utf8proc' - s.version = '1.1.6' - s.author = 'Public Software Group e. V., Berlin, Germany' - s.homepage = 'http://www.public-software-group.org/utf8proc' - s.summary = 'UTF-8 Unicode string processing' - s.files = ['LICENSE', 'lib/utf8proc.rb', 'ext/utf8proc_native.c'] - s.require_path = 'lib/' - s.extensions = ['ext/extconf.rb'] - s.has_rdoc = false -end diff --git a/ruby/utf8proc.rb b/ruby/utf8proc.rb deleted file mode 100644 index 83f1649..0000000 --- a/ruby/utf8proc.rb +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (c) 2009 Public Software Group e. V., Berlin, Germany -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - - -# -# File name: ruby/utf8proc.rb -# -# Description: -# Part of the ruby wrapper for libutf8proc, which is written in ruby. -# - - -require 'utf8proc_native' - - -module Utf8Proc - - SpecialChars = { - :HT => "\x09", - :LF => "\x0A", - :VT => "\x0B", - :FF => "\x0C", - :CR => "\x0D", - :FS => "\x1C", - :GS => "\x1D", - :RS => "\x1E", - :US => "\x1F", - :LS => "\xE2\x80\xA8", - :PS => "\xE2\x80\xA9", - } - - module StringExtensions - def utf8map(*option_array) - options = 0 - option_array.each do |option| - flag = Utf8Proc::Options[option] - raise ArgumentError, "Unknown argument given to String#utf8map." unless - flag - options |= flag - end - return Utf8Proc::utf8map(self, options) - end - def utf8map!(*option_array) - self.replace(self.utf8map(*option_array)) - end - def utf8nfd; utf8map( :stable, :decompose); end - def utf8nfd!; utf8map!(:stable, :decompose); end - def utf8nfc; utf8map( :stable, :compose); end - def utf8nfc!; utf8map!(:stable, :compose); end - def utf8nfkd; utf8map( :stable, :decompose, :compat); end - def utf8nfkd!; utf8map!(:stable, :decompose, :compat); end - def utf8nfkc; utf8map( :stable, :compose, :compat); end - def utf8nfkc!; utf8map!(:stable, :compose, :compat); end - def utf8chars - result = self.utf8map(:charbound).split("\377") - result.shift if result.first == "" - result - end - def char_ary - # depecated, use String#utf8chars instead - utf8chars - end - end - - module IntegerExtensions - def utf8 - return Utf8Proc::utf8char(self) - end - end - -end - - -class String - include(Utf8Proc::StringExtensions) -end - -class Integer - include(Utf8Proc::IntegerExtensions) -end - diff --git a/ruby/utf8proc_native.c b/ruby/utf8proc_native.c deleted file mode 100644 index 9e702a9..0000000 --- a/ruby/utf8proc_native.c +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright (c) 2009 Public Software Group e. V., Berlin, Germany - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - - -/* - * File name: ruby/utf8proc_native.c - * - * Description: - * Native part of the ruby wrapper for libutf8proc. - */ - - -#include "../utf8proc.c" -#include "ruby.h" - -#ifndef RSTRING_PTR -#define RSTRING_PTR(s) (RSTRING(s)->ptr) -#endif -#ifndef RSTRING_LEN -#define RSTRING_LEN(s) (RSTRING(s)->len) -#endif - -typedef struct utf8proc_ruby_mapenv_struct { - int32_t *buffer; -} utf8proc_ruby_mapenv_t; - -void utf8proc_ruby_mapenv_free(utf8proc_ruby_mapenv_t *env) { - free(env->buffer); - free(env); -} - -VALUE utf8proc_ruby_module; -VALUE utf8proc_ruby_options; -VALUE utf8proc_ruby_eUnicodeError; -VALUE utf8proc_ruby_eInvalidUtf8Error; -VALUE utf8proc_ruby_eCodeNotAssignedError; - -VALUE utf8proc_ruby_map_error(ssize_t result) { - VALUE excpt_class; - switch (result) { - case UTF8PROC_ERROR_NOMEM: - excpt_class = rb_eNoMemError; break; - case UTF8PROC_ERROR_OVERFLOW: - case UTF8PROC_ERROR_INVALIDOPTS: - excpt_class = rb_eArgError; break; - case UTF8PROC_ERROR_INVALIDUTF8: - excpt_class = utf8proc_ruby_eInvalidUtf8Error; break; - case UTF8PROC_ERROR_NOTASSIGNED: - excpt_class = utf8proc_ruby_eCodeNotAssignedError; break; - default: - excpt_class = rb_eRuntimeError; - } - rb_raise(excpt_class, "%s", utf8proc_errmsg(result)); - return Qnil; -} - -VALUE utf8proc_ruby_map(VALUE self, VALUE str_param, VALUE options_param) { - VALUE str; - int options; - VALUE env_obj; - utf8proc_ruby_mapenv_t *env; - ssize_t result; - VALUE retval; - str = StringValue(str_param); - options = NUM2INT(options_param) & ~UTF8PROC_NULLTERM; - env_obj = Data_Make_Struct(rb_cObject, utf8proc_ruby_mapenv_t, NULL, - utf8proc_ruby_mapenv_free, env); - result = utf8proc_decompose(RSTRING_PTR(str), RSTRING_LEN(str), - NULL, 0, options); - if (result < 0) { - utf8proc_ruby_map_error(result); - return Qnil; /* needed to prevent problems with optimization */ - } - env->buffer = ALLOC_N(int32_t, result+1); - result = utf8proc_decompose(RSTRING_PTR(str), RSTRING_LEN(str), - env->buffer, result, options); - if (result < 0) { - free(env->buffer); - env->buffer = 0; - utf8proc_ruby_map_error(result); - return Qnil; /* needed to prevent problems with optimization */ - } - result = utf8proc_reencode(env->buffer, result, options); - if (result < 0) { - free(env->buffer); - env->buffer = 0; - utf8proc_ruby_map_error(result); - return Qnil; /* needed to prevent problems with optimization */ - } - retval = rb_str_new((char *)env->buffer, result); - free(env->buffer); - env->buffer = 0; - return retval; -} - -static VALUE utf8proc_ruby_char(VALUE self, VALUE code_param) { - char buffer[4]; - ssize_t result; - int uc; - uc = NUM2INT(code_param); - if (!utf8proc_codepoint_valid(uc)) - rb_raise(rb_eArgError, "Invalid Unicode code point"); - result = utf8proc_encode_char(uc, buffer); - return rb_str_new(buffer, result); -} - -#define register_utf8proc_option(sym, field) \ - rb_hash_aset(utf8proc_ruby_options, ID2SYM(rb_intern(sym)), INT2FIX(field)) - -void Init_utf8proc_native() { - utf8proc_ruby_module = rb_define_module("Utf8Proc"); - rb_define_module_function(utf8proc_ruby_module, "utf8map", - utf8proc_ruby_map, 2); - rb_define_module_function(utf8proc_ruby_module, "utf8char", - utf8proc_ruby_char, 1); - utf8proc_ruby_eUnicodeError = rb_define_class_under(utf8proc_ruby_module, - "UnicodeError", rb_eStandardError); - utf8proc_ruby_eInvalidUtf8Error = rb_define_class_under( - utf8proc_ruby_module, "InvalidUtf8Error", utf8proc_ruby_eUnicodeError); - utf8proc_ruby_eCodeNotAssignedError = rb_define_class_under( - utf8proc_ruby_module, "CodeNotAssignedError", - utf8proc_ruby_eUnicodeError); - utf8proc_ruby_options = rb_hash_new(); - register_utf8proc_option("stable", UTF8PROC_STABLE); - register_utf8proc_option("compat", UTF8PROC_COMPAT); - register_utf8proc_option("compose", UTF8PROC_COMPOSE); - register_utf8proc_option("decompose", UTF8PROC_DECOMPOSE); - register_utf8proc_option("ignore", UTF8PROC_IGNORE); - register_utf8proc_option("rejectna", UTF8PROC_REJECTNA); - register_utf8proc_option("nlf2ls", UTF8PROC_NLF2LS); - register_utf8proc_option("nlf2ps", UTF8PROC_NLF2PS); - register_utf8proc_option("nlf2lf", UTF8PROC_NLF2LF); - register_utf8proc_option("stripcc", UTF8PROC_STRIPCC); - register_utf8proc_option("casefold", UTF8PROC_CASEFOLD); - register_utf8proc_option("charbound", UTF8PROC_CHARBOUND); - register_utf8proc_option("lump", UTF8PROC_LUMP); - register_utf8proc_option("stripmark", UTF8PROC_STRIPMARK); - OBJ_FREEZE(utf8proc_ruby_options); - rb_define_const(utf8proc_ruby_module, "Options", utf8proc_ruby_options); -} - diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..5e80c79 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,3 @@ +DIR_SOURCES := utf8proc.c + +include $(NSBUILD)/Makefile.subdir diff --git a/utf8proc.c b/src/utf8proc.c index ef2d433..ef2d433 100644 --- a/utf8proc.c +++ b/src/utf8proc.c diff --git a/utf8proc_data.c b/src/utf8proc_data.c index 1426b76..1426b76 100644 --- a/utf8proc_data.c +++ b/src/utf8proc_data.c |