summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorVincent Sanders <vince@kyllikki.org>2020-06-22 10:49:34 +0100
committerVincent Sanders <vince@kyllikki.org>2020-06-27 23:30:54 +0100
commit421d796e9b462939de9e9e116e09e9384a8ba5bc (patch)
tree4b74bebbfa11814c6b58b03e6c45e93cd621b838 /tools
parent1dab82d655b8f54e344f21f062495d5cb35c8c4c (diff)
downloadnetsurf-421d796e9b462939de9e9e116e09e9384a8ba5bc.tar.gz
netsurf-421d796e9b462939de9e9e116e09e9384a8ba5bc.tar.bz2
split tools and utils
Diffstat (limited to 'tools')
-rw-r--r--tools/DerivedJoiningType.txt318
-rw-r--r--tools/Makefile75
-rw-r--r--tools/convert_font.c1215
-rw-r--r--tools/convert_image.c304
-rwxr-xr-xtools/coverity-build.sh72
-rw-r--r--tools/fetch-transifex.pl127
-rwxr-xr-xtools/git-date.sh36
-rw-r--r--tools/git-testament.pl227
-rw-r--r--tools/idna-derived-props-gen.pl182
-rw-r--r--tools/idna-tables-properties.csv2322
-rw-r--r--tools/import-messages.pl326
-rwxr-xr-xtools/jenkins-build.sh486
-rwxr-xr-xtools/memanalyze.pl380
-rw-r--r--tools/split-messages.c549
-rw-r--r--tools/split-messages.pl318
-rwxr-xr-xtools/test-netsurf35
-rw-r--r--tools/valgrind.supp14
-rw-r--r--tools/xxd.c135
18 files changed, 7121 insertions, 0 deletions
diff --git a/tools/DerivedJoiningType.txt b/tools/DerivedJoiningType.txt
new file mode 100644
index 000000000..d4dcc85f6
--- /dev/null
+++ b/tools/DerivedJoiningType.txt
@@ -0,0 +1,318 @@
+# DerivedJoiningType-5.2.0.txt
+# Date: 2009-05-28, 20:37:39 GMT [MD]
+#
+# Unicode Character Database
+# Copyright (c) 1991-2009 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+# For documentation, see http://www.unicode.org/reports/tr44/
+
+# ================================================
+
+# Type T is derived, as described in ArabicShaping.txt
+
+# All code points not explicitly listed for Joining_Type
+# have the value Non_Joining (U).
+
+# @missing: 0000..10FFFF; Non_Joining
+
+# ================================================
+
+# Joining_Type=Join_Causing
+
+0640 ; C # Lm ARABIC TATWEEL
+07FA ; C # Lm NKO LAJANYALAN
+200D ; C # Cf ZERO WIDTH JOINER
+
+# Total code points: 3
+
+# ================================================
+
+# Joining_Type=Dual_Joining
+
+0626 ; D # Lo ARABIC LETTER YEH WITH HAMZA ABOVE
+0628 ; D # Lo ARABIC LETTER BEH
+062A..062E ; D # Lo [5] ARABIC LETTER TEH..ARABIC LETTER KHAH
+0633..063F ; D # Lo [13] ARABIC LETTER SEEN..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
+0641..0647 ; D # Lo [7] ARABIC LETTER FEH..ARABIC LETTER HEH
+0649..064A ; D # Lo [2] ARABIC LETTER ALEF MAKSURA..ARABIC LETTER YEH
+066E..066F ; D # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF
+0678..0687 ; D # Lo [16] ARABIC LETTER HIGH HAMZA YEH..ARABIC LETTER TCHEHEH
+069A..06BF ; D # Lo [38] ARABIC LETTER SEEN WITH DOT BELOW AND DOT ABOVE..ARABIC LETTER TCHEH WITH DOT ABOVE
+06C1..06C2 ; D # Lo [2] ARABIC LETTER HEH GOAL..ARABIC LETTER HEH GOAL WITH HAMZA ABOVE
+06CC ; D # Lo ARABIC LETTER FARSI YEH
+06CE ; D # Lo ARABIC LETTER YEH WITH SMALL V
+06D0..06D1 ; D # Lo [2] ARABIC LETTER E..ARABIC LETTER YEH WITH THREE DOTS BELOW
+06FA..06FC ; D # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW
+06FF ; D # Lo ARABIC LETTER HEH WITH INVERTED V
+0712..0714 ; D # Lo [3] SYRIAC LETTER BETH..SYRIAC LETTER GAMAL GARSHUNI
+071A..071D ; D # Lo [4] SYRIAC LETTER HETH..SYRIAC LETTER YUDH
+071F..0727 ; D # Lo [9] SYRIAC LETTER KAPH..SYRIAC LETTER REVERSED PE
+0729 ; D # Lo SYRIAC LETTER QAPH
+072B ; D # Lo SYRIAC LETTER SHIN
+072D..072E ; D # Lo [2] SYRIAC LETTER PERSIAN BHETH..SYRIAC LETTER PERSIAN GHAMAL
+074E..0758 ; D # Lo [11] SYRIAC LETTER SOGDIAN KHAPH..ARABIC LETTER HAH WITH THREE DOTS POINTING UPWARDS BELOW
+075C..076A ; D # Lo [15] ARABIC LETTER SEEN WITH FOUR DOTS ABOVE..ARABIC LETTER LAM WITH BAR
+076D..0770 ; D # Lo [4] ARABIC LETTER SEEN WITH TWO DOTS VERTICALLY ABOVE..ARABIC LETTER SEEN WITH SMALL ARABIC LETTER TAH AND TWO DOTS
+0772 ; D # Lo ARABIC LETTER HAH WITH SMALL ARABIC LETTER TAH ABOVE
+0775..0777 ; D # Lo [3] ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT FOUR BELOW
+077A..077F ; D # Lo [6] ARABIC LETTER YEH BARREE WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER KAF WITH TWO DOTS ABOVE
+07CA..07EA ; D # Lo [33] NKO LETTER A..NKO LETTER JONA RA
+
+# Total code points: 188
+
+# ================================================
+
+# Joining_Type=Right_Joining
+
+0622..0625 ; R # Lo [4] ARABIC LETTER ALEF WITH MADDA ABOVE..ARABIC LETTER ALEF WITH HAMZA BELOW
+0627 ; R # Lo ARABIC LETTER ALEF
+0629 ; R # Lo ARABIC LETTER TEH MARBUTA
+062F..0632 ; R # Lo [4] ARABIC LETTER DAL..ARABIC LETTER ZAIN
+0648 ; R # Lo ARABIC LETTER WAW
+0671..0673 ; R # Lo [3] ARABIC LETTER ALEF WASLA..ARABIC LETTER ALEF WITH WAVY HAMZA BELOW
+0675..0677 ; R # Lo [3] ARABIC LETTER HIGH HAMZA ALEF..ARABIC LETTER U WITH HAMZA ABOVE
+0688..0699 ; R # Lo [18] ARABIC LETTER DDAL..ARABIC LETTER REH WITH FOUR DOTS ABOVE
+06C0 ; R # Lo ARABIC LETTER HEH WITH YEH ABOVE
+06C3..06CB ; R # Lo [9] ARABIC LETTER TEH MARBUTA GOAL..ARABIC LETTER VE
+06CD ; R # Lo ARABIC LETTER YEH WITH TAIL
+06CF ; R # Lo ARABIC LETTER WAW WITH DOT ABOVE
+06D2..06D3 ; R # Lo [2] ARABIC LETTER YEH BARREE..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
+06D5 ; R # Lo ARABIC LETTER AE
+06EE..06EF ; R # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V
+0710 ; R # Lo SYRIAC LETTER ALAPH
+0715..0719 ; R # Lo [5] SYRIAC LETTER DALATH..SYRIAC LETTER ZAIN
+071E ; R # Lo SYRIAC LETTER YUDH HE
+0728 ; R # Lo SYRIAC LETTER SADHE
+072A ; R # Lo SYRIAC LETTER RISH
+072C ; R # Lo SYRIAC LETTER TAW
+072F ; R # Lo SYRIAC LETTER PERSIAN DHALATH
+074D ; R # Lo SYRIAC LETTER SOGDIAN ZHAIN
+0759..075B ; R # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW AND SMALL TAH..ARABIC LETTER REH WITH STROKE
+076B..076C ; R # Lo [2] ARABIC LETTER REH WITH TWO DOTS VERTICALLY ABOVE..ARABIC LETTER REH WITH HAMZA ABOVE
+0771 ; R # Lo ARABIC LETTER REH WITH SMALL ARABIC LETTER TAH AND TWO DOTS
+0773..0774 ; R # Lo [2] ARABIC LETTER ALEF WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER ALEF WITH EXTENDED ARABIC-INDIC DIGIT THREE ABOVE
+0778..0779 ; R # Lo [2] ARABIC LETTER WAW WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER WAW WITH EXTENDED ARABIC-INDIC DIGIT THREE ABOVE
+
+# Total code points: 74
+
+# ================================================
+
+# Joining_Type=Transparent
+
+00AD ; T # Cf SOFT HYPHEN
+0300..036F ; T # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
+0483..0487 ; T # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE
+0488..0489 ; T # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
+0591..05BD ; T # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
+05BF ; T # Mn HEBREW POINT RAFE
+05C1..05C2 ; T # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
+05C4..05C5 ; T # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
+05C7 ; T # Mn HEBREW POINT QAMATS QATAN
+0610..061A ; T # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
+064B..065E ; T # Mn [20] ARABIC FATHATAN..ARABIC FATHA WITH TWO DOTS
+0670 ; T # Mn ARABIC LETTER SUPERSCRIPT ALEF
+06D6..06DC ; T # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
+06DE ; T # Me ARABIC START OF RUB EL HIZB
+06DF..06E4 ; T # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
+06E7..06E8 ; T # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
+06EA..06ED ; T # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
+070F ; T # Cf SYRIAC ABBREVIATION MARK
+0711 ; T # Mn SYRIAC LETTER SUPERSCRIPT ALAPH
+0730..074A ; T # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
+07A6..07B0 ; T # Mn [11] THAANA ABAFILI..THAANA SUKUN
+07EB..07F3 ; T # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
+0816..0819 ; T # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH
+081B..0823 ; T # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
+0825..0827 ; T # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
+0829..082D ; T # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
+0900..0902 ; T # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
+093C ; T # Mn DEVANAGARI SIGN NUKTA
+0941..0948 ; T # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
+094D ; T # Mn DEVANAGARI SIGN VIRAMA
+0951..0955 ; T # Mn [5] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN CANDRA LONG E
+0962..0963 ; T # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
+0981 ; T # Mn BENGALI SIGN CANDRABINDU
+09BC ; T # Mn BENGALI SIGN NUKTA
+09C1..09C4 ; T # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR
+09CD ; T # Mn BENGALI SIGN VIRAMA
+09E2..09E3 ; T # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL
+0A01..0A02 ; T # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI
+0A3C ; T # Mn GURMUKHI SIGN NUKTA
+0A41..0A42 ; T # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU
+0A47..0A48 ; T # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI
+0A4B..0A4D ; T # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA
+0A51 ; T # Mn GURMUKHI SIGN UDAAT
+0A70..0A71 ; T # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK
+0A75 ; T # Mn GURMUKHI SIGN YAKASH
+0A81..0A82 ; T # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA
+0ABC ; T # Mn GUJARATI SIGN NUKTA
+0AC1..0AC5 ; T # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E
+0AC7..0AC8 ; T # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
+0ACD ; T # Mn GUJARATI SIGN VIRAMA
+0AE2..0AE3 ; T # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
+0B01 ; T # Mn ORIYA SIGN CANDRABINDU
+0B3C ; T # Mn ORIYA SIGN NUKTA
+0B3F ; T # Mn ORIYA VOWEL SIGN I
+0B41..0B44 ; T # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR
+0B4D ; T # Mn ORIYA SIGN VIRAMA
+0B56 ; T # Mn ORIYA AI LENGTH MARK
+0B62..0B63 ; T # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
+0B82 ; T # Mn TAMIL SIGN ANUSVARA
+0BC0 ; T # Mn TAMIL VOWEL SIGN II
+0BCD ; T # Mn TAMIL SIGN VIRAMA
+0C3E..0C40 ; T # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
+0C46..0C48 ; T # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
+0C4A..0C4D ; T # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
+0C55..0C56 ; T # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
+0C62..0C63 ; T # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
+0CBC ; T # Mn KANNADA SIGN NUKTA
+0CBF ; T # Mn KANNADA VOWEL SIGN I
+0CC6 ; T # Mn KANNADA VOWEL SIGN E
+0CCC..0CCD ; T # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
+0CE2..0CE3 ; T # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
+0D41..0D44 ; T # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
+0D4D ; T # Mn MALAYALAM SIGN VIRAMA
+0D62..0D63 ; T # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
+0DCA ; T # Mn SINHALA SIGN AL-LAKUNA
+0DD2..0DD4 ; T # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
+0DD6 ; T # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA
+0E31 ; T # Mn THAI CHARACTER MAI HAN-AKAT
+0E34..0E3A ; T # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU
+0E47..0E4E ; T # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN
+0EB1 ; T # Mn LAO VOWEL SIGN MAI KAN
+0EB4..0EB9 ; T # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU
+0EBB..0EBC ; T # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO
+0EC8..0ECD ; T # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA
+0F18..0F19 ; T # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
+0F35 ; T # Mn TIBETAN MARK NGAS BZUNG NYI ZLA
+0F37 ; T # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS
+0F39 ; T # Mn TIBETAN MARK TSA -PHRU
+0F71..0F7E ; T # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO
+0F80..0F84 ; T # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA
+0F86..0F87 ; T # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
+0F90..0F97 ; T # Mn [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA
+0F99..0FBC ; T # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
+0FC6 ; T # Mn TIBETAN SYMBOL PADMA GDAN
+102D..1030 ; T # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU
+1032..1037 ; T # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW
+1039..103A ; T # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT
+103D..103E ; T # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA
+1058..1059 ; T # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL
+105E..1060 ; T # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA
+1071..1074 ; T # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE
+1082 ; T # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA
+1085..1086 ; T # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y
+108D ; T # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
+109D ; T # Mn MYANMAR VOWEL SIGN AITON AI
+135F ; T # Mn ETHIOPIC COMBINING GEMINATION MARK
+1712..1714 ; T # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
+1732..1734 ; T # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
+1752..1753 ; T # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
+1772..1773 ; T # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
+17B4..17B5 ; T # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
+17B7..17BD ; T # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA
+17C6 ; T # Mn KHMER SIGN NIKAHIT
+17C9..17D3 ; T # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
+17DD ; T # Mn KHMER SIGN ATTHACAN
+180B..180D ; T # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
+18A9 ; T # Mn MONGOLIAN LETTER ALI GALI DAGALGA
+1920..1922 ; T # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
+1927..1928 ; T # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O
+1932 ; T # Mn LIMBU SMALL LETTER ANUSVARA
+1939..193B ; T # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
+1A17..1A18 ; T # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
+1A56 ; T # Mn TAI THAM CONSONANT SIGN MEDIAL LA
+1A58..1A5E ; T # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA
+1A60 ; T # Mn TAI THAM SIGN SAKOT
+1A62 ; T # Mn TAI THAM VOWEL SIGN MAI SAT
+1A65..1A6C ; T # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW
+1A73..1A7C ; T # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN
+1A7F ; T # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
+1B00..1B03 ; T # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
+1B34 ; T # Mn BALINESE SIGN REREKAN
+1B36..1B3A ; T # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
+1B3C ; T # Mn BALINESE VOWEL SIGN LA LENGA
+1B42 ; T # Mn BALINESE VOWEL SIGN PEPET
+1B6B..1B73 ; T # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
+1B80..1B81 ; T # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR
+1BA2..1BA5 ; T # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU
+1BA8..1BA9 ; T # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
+1C2C..1C33 ; T # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
+1C36..1C37 ; T # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
+1CD0..1CD2 ; T # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
+1CD4..1CE0 ; T # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
+1CE2..1CE8 ; T # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
+1CED ; T # Mn VEDIC SIGN TIRYAK
+1DC0..1DE6 ; T # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
+1DFD..1DFF ; T # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
+200B ; T # Cf ZERO WIDTH SPACE
+200E..200F ; T # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
+202A..202E ; T # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
+2060..2064 ; T # Cf [5] WORD JOINER..INVISIBLE PLUS
+206A..206F ; T # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
+20D0..20DC ; T # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
+20DD..20E0 ; T # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
+20E1 ; T # Mn COMBINING LEFT RIGHT ARROW ABOVE
+20E2..20E4 ; T # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
+20E5..20F0 ; T # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
+2CEF..2CF1 ; T # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
+2DE0..2DFF ; T # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
+302A..302F ; T # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
+3099..309A ; T # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+A66F ; T # Mn COMBINING CYRILLIC VZMET
+A670..A672 ; T # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN
+A67C..A67D ; T # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK
+A6F0..A6F1 ; T # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
+A802 ; T # Mn SYLOTI NAGRI SIGN DVISVARA
+A806 ; T # Mn SYLOTI NAGRI SIGN HASANTA
+A80B ; T # Mn SYLOTI NAGRI SIGN ANUSVARA
+A825..A826 ; T # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
+A8C4 ; T # Mn SAURASHTRA SIGN VIRAMA
+A8E0..A8F1 ; T # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
+A926..A92D ; T # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
+A947..A951 ; T # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
+A980..A982 ; T # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
+A9B3 ; T # Mn JAVANESE SIGN CECAK TELU
+A9B6..A9B9 ; T # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
+A9BC ; T # Mn JAVANESE VOWEL SIGN PEPET
+AA29..AA2E ; T # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
+AA31..AA32 ; T # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
+AA35..AA36 ; T # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA
+AA43 ; T # Mn CHAM CONSONANT SIGN FINAL NG
+AA4C ; T # Mn CHAM CONSONANT SIGN FINAL M
+AAB0 ; T # Mn TAI VIET MAI KANG
+AAB2..AAB4 ; T # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U
+AAB7..AAB8 ; T # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
+AABE..AABF ; T # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK
+AAC1 ; T # Mn TAI VIET TONE MAI THO
+ABE5 ; T # Mn MEETEI MAYEK VOWEL SIGN ANAP
+ABE8 ; T # Mn MEETEI MAYEK VOWEL SIGN UNAP
+ABED ; T # Mn MEETEI MAYEK APUN IYEK
+FB1E ; T # Mn HEBREW POINT JUDEO-SPANISH VARIKA
+FE00..FE0F ; T # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
+FE20..FE26 ; T # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
+FEFF ; T # Cf ZERO WIDTH NO-BREAK SPACE
+FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
+101FD ; T # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
+10A01..10A03 ; T # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R
+10A05..10A06 ; T # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
+10A0C..10A0F ; T # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
+10A38..10A3A ; T # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
+10A3F ; T # Mn KHAROSHTHI VIRAMA
+11080..11081 ; T # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA
+110B3..110B6 ; T # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
+110B9..110BA ; T # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
+110BD ; T # Cf KAITHI NUMBER SIGN
+1D167..1D169 ; T # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
+1D173..1D17A ; T # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
+1D17B..1D182 ; T # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
+1D185..1D18B ; T # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
+1D1AA..1D1AD ; T # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
+1D242..1D244 ; T # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
+E0001 ; T # Cf LANGUAGE TAG
+E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG
+E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
+
+# Total code points: 1308
+
+# EOF
diff --git a/tools/Makefile b/tools/Makefile
new file mode 100644
index 000000000..5dc4a1000
--- /dev/null
+++ b/tools/Makefile
@@ -0,0 +1,75 @@
+# Tools
+
+# testament rules
+
+clean-testament:
+ $(VQ)echo " CLEAN: testament.h"
+ $(Q)$(RM) $(OBJROOT)/testament.h
+CLEANS += clean-testament
+
+.PHONY: testament
+
+testament $(OBJROOT)/testament.h:
+ $(Q)$(PERL) tools/git-testament.pl $(CURDIR) $(OBJROOT)/testament.h
+
+
+# lib png build compiler flags
+ifeq ($(HOST),OpenBSD)
+ BUILD_LIBPNG_CFLAGS += $(shell $(PKG_CONFIG) --cflags libpng)
+ BUILD_LIBPNG_LDFLAGS += $(shell $(PKG_CONFIG) --libs libpng)
+else
+ ifeq ($(HOST),FreeBSD)
+ BUILD_LIBPNG_CFLAGS += $(shell $(PKG_CONFIG) --cflags libpng)
+ BUILD_LIBPNG_LDFLAGS += $(shell $(PKG_CONFIG) --libs libpng)
+ else
+ BUILD_LIBPNG_CFLAGS +=
+ BUILD_LIBPNG_LDFLAGS += -lpng
+ endif
+endif
+
+
+# Build tool to convert file to comiled data
+#
+$(TOOLROOT)/xxd: tools/xxd.c $(TOOLROOT)/created
+ $(VQ)echo "BUILD CC: $@"
+ $(Q)$(BUILD_CC) $(BUILD_CFLAGS) -o $@ $< $(BUILD_LDFLAGS)
+
+
+# Build tool to filter messages
+#
+$(TOOLROOT)/split-messages: tools/split-messages.c $(TOOLROOT)/created
+ $(VQ)echo "BUILD CC: $@"
+ $(Q)$(BUILD_CC) $(BUILD_CFLAGS) -I. -o $@ $< $(BUILD_LDFLAGS) -lz
+
+
+# Build tool to convert image bitmaps to source code.
+#
+$(TOOLROOT)/convert_image: tools/convert_image.c $(TOOLROOT)/created
+ $(VQ)echo "BUILD CC: $@"
+ $(Q)$(BUILD_CC) $(BUILD_CFLAGS) $(BUILD_LIBPNG_CFLAGS) -o $@ $< $(BUILD_LDFLAGS) $(BUILD_LIBPNG_LDFLAGS)
+
+
+# Build too to perform font conversion
+$(TOOLROOT)/convert_font: tools/convert_font.c $(TOOLROOT)/created
+ $(VQ)echo "BUILD CC: $@"
+ $(Q)$(BUILD_CC) $(BUILD_CFLAGS) -o $@ $<
+
+# idna
+#
+IDNA_UNICODE_MAJOR=11
+
+tools/DerivedJoiningType.txt:
+ curl -o $@ "https://www.unicode.org/Public/$(IDNA_UNICODE_MAJOR).0.0/ucd/extracted/DerivedJoiningType.txt"
+
+tools/IdnaMappingTable.txt:
+ curl -o $@ "https://www.unicode.org/Public/idna/$(IDNA_UNICODE_MAJOR).0.0/IdnaMappingTable.txt"
+
+tools/idna-tables-properties.csv:
+ curl -o $@ "https://www.iana.org/assignments/idna-tables-$(IDNA_UNICODE_MAJOR).0.0/idna-tables-properties.csv"
+
+# the idna props header must be explicitly rebuilt
+ifneq ($(filter $(MAKECMDGOALS),utils/idna_props.h),)
+utils/idna_props.h: tools/DerivedJoiningType.txt tools/idna-tables-properties.csv
+ $(VQ)echo " IDNA: $@"
+ $(Q)$(PERL) tools/idna-derived-props-gen.pl -o $@ -p tools/idna-tables-properties.csv -j tools/DerivedJoiningType.txt
+endif
diff --git a/tools/convert_font.c b/tools/convert_font.c
new file mode 100644
index 000000000..9f5734b71
--- /dev/null
+++ b/tools/convert_font.c
@@ -0,0 +1,1215 @@
+/*
+ * Copyright 2014 Michael Drake <tlsa@netsurf-browser.org>
+ * Copyright 2014 Vincent Sanders <vince@netsurf-browser.org>
+ *
+ * This file is part of the convert_font tool used to convert font
+ * glyph data into a compilable representation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <unistd.h>
+#include <getopt.h>
+
+#define GLYPH_LEN 16
+#define BUCKETS 512
+#define CHUNK_SIZE (64 * 1024)
+#define HEADER_MAX 2000
+
+#define SECTION_SIZE (sizeof(uint16_t) * 256)
+
+const char *labels[4] = {
+ " Regular",
+ " Italic",
+ " Bold",
+ "Bold & Italic"
+};
+
+const char *var_lables[4] = {
+ "fb_regular",
+ "fb_italic",
+ "fb_bold",
+ "fb_bold_italic"
+};
+
+const char *short_labels[4] = {
+ " ",
+ " i",
+ "b ",
+ "bi"
+};
+
+enum font_style {
+ REGULAR = 0,
+ ITALIC = (1 << 0),
+ BOLD = (1 << 1),
+ ITALIC_BOLD = (1 << 2)
+};
+
+enum log_level {
+ LOG_DEBUG,
+ LOG_INFO,
+ LOG_RESULT,
+ LOG_WARNING,
+ LOG_ERROR
+};
+
+enum log_level level;
+
+typedef struct glyph_entry {
+ union {
+ uint32_t u32[GLYPH_LEN / 4];
+ uint8_t u8[GLYPH_LEN];
+ } data;
+ uint32_t index;
+ struct glyph_entry *next;
+} glyph_entry;
+
+/** Scratch glyph for generated code points */
+uint8_t code_point[GLYPH_LEN];
+
+/** Hash table */
+glyph_entry *ht[BUCKETS];
+
+#define LOG(lev, fmt, ...) \
+ if (lev >= level) \
+ printf(fmt, ##__VA_ARGS__);
+
+/**
+ * Get hash for glyph data
+ * \param g Glyph data (GLYPH_LEN bytes)
+ * \return glyph's hash
+ */
+static inline uint32_t glyph_hash(const uint8_t *g)
+{
+ uint32_t hash = 0x811c9dc5;
+ unsigned int len = GLYPH_LEN;
+
+ while (len > 0) {
+ hash *= 0x01000193;
+ hash ^= *g++;
+ len--;
+ }
+
+ return hash;
+}
+
+
+/**
+ * Check whether glyphs are identical (compares glyph data)
+ *
+ * \param g1 First glyph's data (GLYPH_LEN bytes)
+ * \param g2 Second glyph's data (GLYPH_LEN bytes)
+ * \return true iff both glyphs are identical, else false
+ */
+static inline bool glyphs_match(const uint8_t *g1, const uint8_t *g2)
+{
+ return (memcmp(g1, g2, GLYPH_LEN) == 0);
+}
+
+
+/**
+ * Add a glyph to a hash chain (or free, and return pointer to existing glyph)
+ *
+ * Note that if new glyph already exists in chain, it is freed and a pointer to
+ * the existing glyph is returned. If the glyph does not exist in the chain
+ * it is added and its pointer is returned.
+ *
+ * \param head Head of hash chain
+ * \param new New glyph to add (may be freed)
+ * \return pointer to glyph in hash chain
+ */
+static glyph_entry * glyph_add_to_chain(glyph_entry **head, glyph_entry *new)
+{
+ glyph_entry *e = *head;
+
+ if (*head == NULL) {
+ new->next = NULL;
+ *head = new;
+ return new;
+ }
+
+ do {
+ if (glyphs_match(new->data.u8, e->data.u8)) {
+ free(new);
+ return e;
+ }
+ if (e->next == NULL)
+ break;
+ e = e->next;
+ } while (1);
+
+ new->next = e->next;
+ e->next = new;
+ return new;
+}
+
+
+/**
+ * Free a glyph entry chain
+ *
+ * \param head Head of hash chain
+ */
+static void free_chain(glyph_entry *head)
+{
+ glyph_entry *e = head;
+
+ if (head == NULL)
+ return;
+
+ while (e != NULL) {
+ head = e->next;
+ free(e);
+ e = head;
+ };
+}
+
+
+/**
+ * Add new glyph to hash table (or free, and return pointer to existing glyph)
+ *
+ * Note that if new glyph already exists in table, it is freed and a pointer to
+ * the existing glyph is returned. If the glyph does not exist in the table
+ * it is added and its pointer is returned.
+ *
+ * \param new New glyph to add (may be freed)
+ * \return pointer to glyph in hash table
+ */
+static glyph_entry * glyph_add_to_table(glyph_entry *new)
+{
+ uint32_t hash = glyph_hash(new->data.u8);
+
+ return glyph_add_to_chain(&ht[hash % BUCKETS], new);
+}
+
+
+/**
+ * Free glyph table.
+ */
+static void free_table(void)
+{
+ int i;
+
+ for (i = 0; i < BUCKETS; i++) {
+ free_chain(ht[i]);
+ }
+}
+
+struct parse_context {
+ enum {
+ START,
+ IN_HEADER,
+ BEFORE_ID,
+ GLYPH_ID,
+ BEFORE_GLYPH_DATA,
+ IN_GLYPH_DATA
+ } state; /**< Current parser state */
+
+ union {
+ struct {
+ bool new_line;
+ } in_header;
+ struct {
+ bool new_line;
+ bool u;
+ } before_id;
+ struct {
+ int c;
+ } g_id;
+ struct {
+ bool new_line;
+ bool prev_h;
+ bool prev_s;
+ int c;
+ } before_gd;
+ struct {
+ int line;
+ int pos;
+ int styles;
+ int line_styles;
+ glyph_entry *e[4];
+ } in_gd;
+ } data; /**< The state specific data */
+
+ int id; /**< Current ID */
+
+ int codepoints; /**< Glyphs containing codepoints */
+ int count[4]; /**< Count of glyphs in file */
+};
+
+struct font_data {
+ char header[HEADER_MAX];
+ int header_len;
+
+ uint8_t section_table[4][256];
+ uint8_t sec_count[4];
+ uint16_t *sections[4];
+
+ glyph_entry *e[0xffff];
+ int glyphs;
+};
+
+static bool generate_font_header(const char *path, struct font_data *data)
+{
+ FILE *fp;
+ int s;
+
+ fp = fopen(path, "wb");
+ if (fp == NULL) {
+ LOG(LOG_ERROR, "Couldn't open header file \"%s\"\n", path);
+ return false;
+ }
+
+ fprintf(fp, "/*\n");
+ fwrite(data->header, 1, data->header_len, fp);
+ fprintf(fp, " */\n\n");
+ fprintf(fp, "/* Don't edit this file, it was generated from the "
+ "plain text source data. */\n\n");
+
+
+ for (s = 0; s < 4; s++) {
+ fprintf(fp, "const uint8_t *%s_section_table;\n",
+ var_lables[s]);
+ fprintf(fp, "const uint16_t *%s_sections;\n",
+ var_lables[s]);
+
+ }
+
+ fprintf(fp, "const uint8_t *font_glyph_data;\n");
+
+ fprintf(fp, "\n\n");
+
+ fclose(fp);
+
+ return true;
+
+}
+
+static bool generate_font_source(const char *path, struct font_data *data)
+{
+ int s, i, y;
+ int limit;
+ FILE *fp;
+
+ fp = fopen(path, "wb");
+ if (fp == NULL) {
+ LOG(LOG_ERROR, "Couldn't open output file \"%s\"\n", path);
+ return false;
+ }
+
+ fprintf(fp, "/*\n");
+ fwrite(data->header, 1, data->header_len, fp);
+ fprintf(fp, " */\n\n");
+ fprintf(fp, "/* Don't edit this file, it was generated from the "
+ "plain text source data. */\n\n");
+
+ fprintf(fp, "#include <stdint.h>\n");
+ fprintf(fp, "\n");
+
+ for (s = 0; s < 4; s++) {
+
+ fprintf(fp, "static const uint8_t %s_section_table_c[256] = {\n",
+ var_lables[s]);
+
+ for (i = 0; i < 256; i++) {
+ if (i == 255)
+ fprintf(fp, "0x%.2X\n",
+ data->section_table[s][i]);
+ else if (i % 8 == 7)
+ fprintf(fp, "0x%.2X,\n",
+ data->section_table[s][i]);
+ else if (i % 8 == 0)
+ fprintf(fp, "\t0x%.2X, ",
+ data->section_table[s][i]);
+ else
+ fprintf(fp, "0x%.2X, ",
+ data->section_table[s][i]);
+ }
+
+ fprintf(fp, "};\nconst uint8_t *%s_section_table = &%s_section_table_c[0];\n\n",
+ var_lables[s], var_lables[s]);
+ fprintf(fp, "static const uint16_t %s_sections_c[%i] = {\n",
+ var_lables[s], data->sec_count[s] * 256);
+
+ limit = data->sec_count[s] * 256;
+ for (i = 0; i < limit; i++) {
+ uint16_t offset = data->sections[s][i];
+ if (i == limit - 1)
+ fprintf(fp, "0x%.4X\n", offset);
+ else if (i % 4 == 3)
+ fprintf(fp, "0x%.4X,\n", offset);
+ else if (i % 4 == 0)
+ fprintf(fp, "\t0x%.4X, ", offset);
+ else
+ fprintf(fp, "0x%.4X, ", offset);
+ }
+
+ fprintf(fp, "};\nconst uint16_t *%s_sections = &%s_sections_c[0];\n\n", var_lables[s], var_lables[s]);
+ }
+
+ fprintf(fp, "static const uint8_t font_glyph_data_c[%i] = {\n",
+ (data->glyphs + 1) * 16);
+
+ fprintf(fp, "\t0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,\n"
+ "\t0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,\n");
+
+ limit = data->glyphs;
+ for (i = 0; i < limit; i++) {
+ glyph_entry *e = data->e[i];
+
+ for (y = 0; y < 16; y++) {
+ if (i == limit - 1 && y == 15)
+ fprintf(fp, "0x%.2X\n", e->data.u8[y]);
+ else if (y % 8 == 7)
+ fprintf(fp, "0x%.2X,\n", e->data.u8[y]);
+ else if (y % 8 == 0)
+ fprintf(fp, "\t0x%.2X, ", e->data.u8[y]);
+ else
+ fprintf(fp, "0x%.2X, ", e->data.u8[y]);
+ }
+ }
+
+ fprintf(fp, "};\n");
+ fprintf(fp, "const uint8_t *font_glyph_data = &font_glyph_data_c[0];\n\n");
+
+ fclose(fp);
+
+ return true;
+}
+
+static bool add_glyph_to_data(glyph_entry *add, int id, int style,
+ struct font_data *d)
+{
+ glyph_entry *e;
+ int offset;
+ int s;
+
+ /* Find out if 'add' is unique, and get its unique table entry */
+ e = glyph_add_to_table(add);
+ if (e == add) {
+ /* Unique glyph */
+ d->e[d->glyphs++] = e;
+ e->index = d->glyphs;
+ if (d->glyphs >= 0xfffd) {
+ LOG(LOG_ERROR, " Too many glyphs for internal data "
+ "representation\n");
+ return false;
+ }
+ } else {
+ /* Duplicate glyph */
+ LOG(LOG_DEBUG, " U+%.4X (%s) is duplicate\n",
+ id, short_labels[style]);
+ }
+
+ /* Find glyph's section */
+ s = id / 256;
+
+ /* Allocate section if needed */
+ if ((s == 0 && d->sections[style] == NULL) ||
+ (s != 0 && d->section_table[style][s] == 0)) {
+ size_t size = (d->sec_count[style] + 1) * SECTION_SIZE;
+ uint16_t *temp = realloc(d->sections[style], size);
+ if (temp == NULL) {
+ LOG(LOG_ERROR, " Couldn't increase sections "
+ "allocation\n");
+ return false;
+ }
+ memset(temp + d->sec_count[style] * 256, 0,
+ SECTION_SIZE);
+ d->section_table[style][s] = d->sec_count[style];
+ d->sections[style] = temp;
+ d->sec_count[style]++;
+ }
+
+ offset = d->section_table[style][s] * 256 + (id & 0xff);
+ d->sections[style][offset] = e->index;
+
+ return true;
+}
+
+
+static bool check_glyph_data_valid(int pos, char c)
+{
+ int offset = pos % 11;
+
+ if (pos == 44) {
+ if (c != '\n') {
+ LOG(LOG_ERROR, " Invalid glyph data: "
+ "expecting '\\n', got '%c' (%i)\n",
+ c, c);
+ return false;
+ } else {
+ return true;
+ }
+ } else if (pos < 3) {
+ if (c != ' ') {
+ LOG(LOG_ERROR, " Invalid glyph data: "
+ "expecting ' ', got '%c' (%i)\n",
+ c, c);
+ return false;
+ } else {
+ return true;
+ }
+ } else if (offset == 0) {
+ if (c != '\n' && c != ' ') {
+ LOG(LOG_ERROR, " Invalid glyph data: "
+ "expecting '\\n' or ' ', "
+ "got '%c' (%i)\n",
+ c, c);
+ return false;
+ } else {
+ return true;
+ }
+ } else if (offset < 3) {
+ if (c != ' ') {
+ LOG(LOG_ERROR, " Invalid glyph data: "
+ "expecting ' ', got '%c' (%i)\n",
+ c, c);
+ return false;
+ } else {
+ return true;
+ }
+ } else if (offset >= 3 && pos < 11) {
+ if (c != '.' && c != '#') {
+ LOG(LOG_ERROR, " Invalid glyph data: "
+ "expecting '.' or '#', "
+ "got '%c' (%i)\n",
+ c, c);
+ return false;
+ } else {
+ return true;
+ }
+ }
+
+ /* offset must be >=3 */
+ if (c != '.' && c != '#' && c != ' ') {
+ LOG(LOG_ERROR, " Invalid glyph data: "
+ "expecting '.', '#', or ' ', "
+ "got '%c' (%i)\n",
+ c, c);
+ return false;
+ }
+
+ return true;
+}
+
+#define SEVEN_SET ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 3) | \
+ (1 << 4) | (1 << 5) | (1 << 6))
+
+#define THREE_SSS ((1 << 0) | (1 << 1) | (1 << 2))
+#define THREE_S_S ((1 << 0) | (1 << 2))
+#define THREE__SS ((1 << 0) | (1 << 1) )
+#define THREE_SS_ ( (1 << 1) | (1 << 2))
+#define THREE_S__ (1 << 2)
+#define THREE__S_ (1 << 1)
+#define THREE___S (1 << 0)
+
+uint8_t frag[16][5] = {
+ { THREE_SSS,
+ THREE_S_S,
+ THREE_S_S,
+ THREE_S_S,
+ THREE_SSS },
+
+ { THREE__S_,
+ THREE_SS_,
+ THREE__S_,
+ THREE__S_,
+ THREE_SSS },
+
+ { THREE_SS_,
+ THREE___S,
+ THREE__S_,
+ THREE_S__,
+ THREE_SSS },
+
+ { THREE_SS_,
+ THREE___S,
+ THREE_SS_,
+ THREE___S,
+ THREE_SS_ },
+
+ { THREE_S_S,
+ THREE_S_S,
+ THREE_SSS,
+ THREE___S,
+ THREE___S },
+
+ { THREE_SSS,
+ THREE_S__,
+ THREE_SSS,
+ THREE___S,
+ THREE_SSS },
+
+ { THREE__SS,
+ THREE_S__,
+ THREE_SSS,
+ THREE_S_S,
+ THREE_SSS },
+
+ { THREE_SSS,
+ THREE___S,
+ THREE__S_,
+ THREE__S_,
+ THREE__S_ },
+
+ { THREE_SSS,
+ THREE_S_S,
+ THREE_SSS,
+ THREE_S_S,
+ THREE_SSS },
+
+ { THREE_SSS,
+ THREE_S_S,
+ THREE_SSS,
+ THREE___S,
+ THREE___S },
+
+ { THREE__S_,
+ THREE_S_S,
+ THREE_SSS,
+ THREE_S_S,
+ THREE_S_S },
+
+ { THREE_SS_,
+ THREE_S_S,
+ THREE_SS_,
+ THREE_S_S,
+ THREE_SS_ },
+
+ { THREE__S_,
+ THREE_S_S,
+ THREE_S__,
+ THREE_S_S,
+ THREE__S_ },
+
+ { THREE_SS_,
+ THREE_S_S,
+ THREE_S_S,
+ THREE_S_S,
+ THREE_SS_ },
+
+ { THREE_SSS,
+ THREE_S__,
+ THREE_SS_,
+ THREE_S__,
+ THREE_SSS },
+
+ { THREE_SSS,
+ THREE_S__,
+ THREE_SS_,
+ THREE_S__,
+ THREE_S__ }
+};
+
+static void build_codepoint(int id, bool italic, uint8_t *code_point)
+{
+ int shift = 0;
+ int l;
+ int r;
+
+ if (!italic)
+ shift = 1;
+
+ l = (id >> 12);
+ r = 0xf & (id >> 8);
+
+ code_point[ 0] = 0;
+ code_point[ 1] = SEVEN_SET << shift;
+ code_point[ 2] = 0;
+
+ code_point[ 3] = (frag[l][0] << (4 + shift)) | (frag[r][0] << shift);
+ code_point[ 4] = (frag[l][1] << (4 + shift)) | (frag[r][1] << shift);
+ code_point[ 5] = (frag[l][2] << (4 + shift)) | (frag[r][2] << shift);
+ code_point[ 6] = (frag[l][3] << (4 + shift)) | (frag[r][3] << shift);
+ code_point[ 7] = (frag[l][4] << (4 + shift)) | (frag[r][4] << shift);
+
+ code_point[ 8] = 0;
+
+ shift = 1;
+
+ l = 0xf & (id >> 4);
+ r = 0xf & id ;
+
+ code_point[ 9] = (frag[l][0] << (4 + shift)) | (frag[r][0] << shift);
+ code_point[10] = (frag[l][1] << (4 + shift)) | (frag[r][1] << shift);
+ code_point[11] = (frag[l][2] << (4 + shift)) | (frag[r][2] << shift);
+ code_point[12] = (frag[l][3] << (4 + shift)) | (frag[r][3] << shift);
+ code_point[13] = (frag[l][4] << (4 + shift)) | (frag[r][4] << shift);
+
+ code_point[14] = 0;
+ code_point[15] = SEVEN_SET << shift;
+}
+
+#undef SEVEN_SET
+#undef THREE_SSS
+#undef THREE_S_S
+#undef THREE__SS
+#undef THREE_SS_
+#undef THREE_S__
+#undef THREE__S_
+#undef THREE___S
+
+static bool glyph_is_codepoint(const glyph_entry *e, int id, int style)
+{
+ bool italic = false;
+
+ if (style == 1 || style == 3) {
+ italic = true;
+ }
+
+ build_codepoint(id, italic, code_point);
+
+ return glyphs_match(code_point, e->data.u8);
+}
+
+
+static bool parse_glyph_data(struct parse_context *ctx, char c,
+ struct font_data *d)
+{
+ int glyph = ctx->data.in_gd.pos / 11;
+ int g_pos = ctx->data.in_gd.pos % 11 - 3;
+ uint8_t *row;
+ bool ok;
+ int i;
+
+ /* Check that character is valid */
+ if (check_glyph_data_valid(ctx->data.in_gd.pos, c) == false) {
+ LOG(LOG_ERROR, " Error in U+%.4X data: "
+ "glyph line: %i, pos: %i\n",
+ ctx->id,
+ ctx->data.in_gd.line,
+ ctx->data.in_gd.pos);
+ goto error;
+ }
+
+ /* Allocate glyph data if needed */
+ if (ctx->data.in_gd.line == 0 &&
+ (c == '.' || c == '#')) {
+ if (ctx->data.in_gd.e[glyph] == NULL) {
+ ctx->data.in_gd.e[glyph] =
+ calloc(sizeof(struct glyph_entry), 1);
+ if (ctx->data.in_gd.e[glyph] == NULL) {
+ LOG(LOG_ERROR, " Couldn't allocate memory for "
+ "glyph entry\n");
+ goto error;
+ }
+
+ ctx->data.in_gd.styles |= 1 << glyph;
+ }
+ }
+
+ /* Build glyph data */
+ if (c == '#') {
+ row = &ctx->data.in_gd.e[glyph]->data.u8[ctx->data.in_gd.line];
+ *row += 1 << (7 - g_pos);
+
+ ctx->data.in_gd.line_styles |= 1 << glyph;
+ } else if (c == '.') {
+ ctx->data.in_gd.line_styles |= 1 << glyph;
+ }
+
+ /* Deal with current position */
+ if (c == '\n') {
+ if (ctx->data.in_gd.line == 0) {
+ if (ctx->data.in_gd.e[0] == NULL) {
+ LOG(LOG_ERROR, " Error in U+%.4X data: "
+ "\"Regular\" glyph style must "
+ "be present\n", ctx->id);
+ goto error;
+ }
+ } else if (ctx->data.in_gd.styles !=
+ ctx->data.in_gd.line_styles) {
+ LOG(LOG_ERROR, " Error in U+%.4X data: "
+ "glyph line: %i "
+ "styles don't match first line\n",
+ ctx->id,
+ ctx->data.in_gd.line);
+ goto error;
+ }
+
+ ctx->data.in_gd.pos = 0;
+ ctx->data.in_gd.line++;
+ ctx->data.in_gd.line_styles = 0;
+ } else {
+ ctx->data.in_gd.pos++;
+ }
+
+ /* If we've got all the glyph data, tidy up and advance state */
+ if (ctx->data.in_gd.line == 16) {
+ for (i = 0; i < 4; i++) {
+ if (ctx->data.in_gd.e[i] != NULL) {
+ ctx->count[i] += 1;
+ if (glyph_is_codepoint(ctx->data.in_gd.e[i],
+ ctx->id, i)) {
+ LOG(LOG_DEBUG, " U+%.4X (%s) is "
+ "codepoint\n",
+ ctx->id,
+ short_labels[i]);
+ ctx->codepoints += 1;
+ free(ctx->data.in_gd.e[i]);
+ ctx->data.in_gd.e[i] = NULL;
+ continue;
+ }
+
+ ok = add_glyph_to_data(ctx->data.in_gd.e[i],
+ ctx->id, i, d);
+ if (!ok) {
+ goto error;
+ }
+ }
+ }
+
+ ctx->data.before_id.new_line = false;
+ ctx->data.before_id.u = false;
+ ctx->state = BEFORE_ID;
+ }
+
+ return true;
+
+error:
+
+ for (i = 0; i < 4; i++) {
+ free(ctx->data.in_gd.e[i]);
+ }
+
+ return false;
+}
+
+static void parse_init(struct parse_context *ctx)
+{
+ memset(ctx, 0, sizeof(struct parse_context));
+}
+
+static bool get_hex_digit_value(char c, int *v)
+{
+ if (c >= '0' && c <= '9')
+ *v = (c - '0');
+ else if (c >= 'A' && c <= 'F')
+ *v = (10 + c - 'A');
+ else {
+ LOG(LOG_ERROR, "Invalid hex digit '%c' (%i)\n", c, c);
+ return false;
+ }
+
+ return true;
+}
+
+static bool assemble_codepoint(const char* c, int n, int *id)
+{
+ bool ok;
+ int v;
+
+ ok = get_hex_digit_value(*c, &v);
+ if (!ok) {
+ return false;
+ }
+
+ *id += v << (4 * (3 - n));
+
+ return true;
+}
+
+static bool parse_chunk(struct parse_context *ctx, const char *buf, size_t len,
+ struct font_data *d)
+{
+ int i;
+ bool ok;
+ int count[4];
+ const char *pos = buf;
+ const char *end = buf + len;
+
+ for (i = 0; i < 4; i++) {
+ count[i] = ctx->count[i];
+ }
+
+ while (pos < end) {
+ if (*pos == '\r') {
+ LOG(LOG_ERROR, "Detected \'\\r\': Bad line ending\n");
+ return false;
+ }
+
+ switch (ctx->state) {
+ case START:
+ if (*pos != '*') {
+ LOG(LOG_ERROR, "First character must be '*'\n");
+ printf("Got: %c (%i)\n", *pos, *pos);
+ return false;
+ }
+ d->header_len = 0;
+ ctx->data.in_header.new_line = true;
+ ctx->state = IN_HEADER;
+
+ /* Fall through */
+ case IN_HEADER:
+ if (ctx->data.in_header.new_line == true) {
+ if (*pos != '*') {
+ LOG(LOG_INFO, " Got header "
+ "(%i bytes)\n",
+ d->header_len);
+ LOG(LOG_DEBUG, " Header:\n\n%.*s\n",
+ d->header_len,
+ d->header);
+ ctx->data.before_id.new_line = false;
+ ctx->data.before_id.u = false;
+ ctx->state = BEFORE_ID;
+ continue;
+ } else if (*pos == '*') {
+ d->header[d->header_len++] = ' ';
+ }
+ ctx->data.in_header.new_line = false;
+
+ } else if (*pos == '\n') {
+ ctx->data.in_header.new_line = true;
+ }
+
+ if (d->header_len == HEADER_MAX) {
+ LOG(LOG_ERROR, " Header too long "
+ "(>%i bytes)\n",
+ d->header_len);
+ return false;
+ }
+
+ d->header[d->header_len++] = *pos;
+ break;
+
+ case BEFORE_ID:
+ if (*pos == '+' &&
+ ctx->data.before_id.new_line == true &&
+ ctx->data.before_id.u == true) {
+ ctx->data.g_id.c = 0;
+ ctx->id = 0;
+ ctx->state = GLYPH_ID;
+ break;
+
+ } else if (*pos == 'U' &&
+ ctx->data.before_id.new_line == true) {
+ ctx->data.before_id.u = true;
+
+ } else if (*pos == '\n') {
+ ctx->data.before_id.new_line = true;
+ ctx->data.before_id.u = false;
+
+ } else {
+ ctx->data.before_id.new_line = false;
+ ctx->data.before_id.u = false;
+ }
+ break;
+
+ case GLYPH_ID:
+ ok = assemble_codepoint(pos, ctx->data.g_id.c++,
+ &ctx->id);
+ if (!ok) {
+ LOG(LOG_ERROR, " Invalid glyph ID\n");
+ return false;
+ }
+
+ if (ctx->data.g_id.c == 4) {
+ ctx->data.before_gd.new_line = false;
+ ctx->data.before_gd.prev_h = false;
+ ctx->data.before_gd.prev_s = false;
+ ctx->data.before_gd.c = 0;
+ ctx->state = BEFORE_GLYPH_DATA;
+ break;
+ }
+ break;
+
+ case BEFORE_GLYPH_DATA:
+ /* Skip until end of dashed line */
+ if (*pos == '\n' && ctx->data.before_gd.c == 53) {
+ ctx->state = IN_GLYPH_DATA;
+ ctx->data.in_gd.e[0] = NULL;
+ ctx->data.in_gd.e[1] = NULL;
+ ctx->data.in_gd.e[2] = NULL;
+ ctx->data.in_gd.e[3] = NULL;
+ ctx->data.in_gd.line = 0;
+ ctx->data.in_gd.pos = 0;
+ ctx->data.in_gd.line_styles = 0;
+ ctx->data.in_gd.styles = 0;
+ break;
+
+ } else if (*pos == '\n') {
+ ctx->data.before_gd.new_line = true;
+ ctx->data.before_gd.prev_h = false;
+ ctx->data.before_gd.prev_s = false;
+ ctx->data.before_gd.c = 0;
+ } else if (*pos == '-' &&
+ ctx->data.before_gd.new_line == true) {
+ assert(ctx->data.before_gd.c == 0);
+ ctx->data.before_gd.new_line = false;
+ ctx->data.before_gd.c++;
+ ctx->data.before_gd.prev_h = true;
+ } else if (*pos == ' ' &&
+ ctx->data.before_gd.prev_h == true) {
+ assert(ctx->data.before_gd.prev_s == false);
+ ctx->data.before_gd.c++;
+ ctx->data.before_gd.prev_h = false;
+ ctx->data.before_gd.prev_s = true;
+ } else if (*pos == '-' &&
+ ctx->data.before_gd.prev_s == true) {
+ assert(ctx->data.before_gd.prev_h == false);
+ ctx->data.before_gd.c++;
+ ctx->data.before_gd.prev_h = true;
+ ctx->data.before_gd.prev_s = false;
+ } else {
+ ctx->data.before_gd.new_line = false;
+ ctx->data.before_gd.prev_h = false;
+ ctx->data.before_gd.prev_s = false;
+ ctx->data.before_gd.c = 0;
+ }
+ break;
+
+ case IN_GLYPH_DATA:
+ ok = parse_glyph_data(ctx, *pos, d);
+ if (!ok) {
+ return false;
+ }
+
+ break;
+ }
+
+ pos++;
+ }
+
+ for (i = 0; i < 4; i++) {
+ LOG(LOG_DEBUG, " %s: %i gylphs\n", labels[i],
+ ctx->count[i] - count[i]);
+ }
+
+ return true;
+}
+
+
+static bool load_font(const char *path, struct font_data **data)
+{
+ struct parse_context ctx;
+ struct font_data *d;
+ size_t file_len;
+ size_t done;
+ size_t len;
+ int count;
+ char *buf;
+ FILE *fp;
+ bool ok;
+ int i;
+
+ *data = NULL;
+
+ fp = fopen(path, "rb");
+ if (fp == NULL) {
+ LOG(LOG_ERROR, "Couldn't open font data file\n");
+ return false;
+ }
+
+ d = calloc(sizeof(struct font_data), 1);
+ if (d == NULL) {
+ LOG(LOG_ERROR, "Couldn't allocate memory for font data\n");
+ fclose(fp);
+ return false;
+ }
+
+ /* Find filesize */
+ fseek(fp, 0L, SEEK_END);
+ file_len = ftell(fp);
+ if ((long)file_len == -1) {
+ LOG(LOG_ERROR, "Could not size input file\n");
+ free(d);
+ fclose(fp);
+ return false;
+ }
+ fseek(fp, 0L, SEEK_SET);
+ LOG(LOG_DEBUG, "Input size: %zu bytes\n", file_len);
+
+ /* Allocate buffer for data chunks */
+ buf = malloc(CHUNK_SIZE);
+ if (buf == NULL) {
+ LOG(LOG_ERROR, "Couldn't allocate memory for input buffer\n");
+ free(d);
+ fclose(fp);
+ return false;
+ }
+
+ /* Initialise parser */
+ parse_init(&ctx);
+
+ LOG(LOG_DEBUG, "Using chunk size of %i bytes\n", CHUNK_SIZE);
+
+ /* Parse the input file in chunks */
+ for (done = 0; done < file_len; done += CHUNK_SIZE) {
+ LOG(LOG_INFO, "Parsing input chunk %zu\n", done / CHUNK_SIZE);
+
+ /* Read chunk */
+ len = fread(buf, 1, CHUNK_SIZE, fp);
+ if (file_len - done < CHUNK_SIZE &&
+ len != file_len - done) {
+ LOG(LOG_WARNING, "Last chunk has suspicious size\n");
+ } else if (file_len - done >= CHUNK_SIZE &&
+ len != CHUNK_SIZE) {
+ LOG(LOG_ERROR, "Problem reading file\n");
+ free(buf);
+ free(d);
+ fclose(fp);
+ return false;
+ }
+
+ /* Parse chunk */
+ ok = parse_chunk(&ctx, buf, len, d);
+ if (!ok) {
+ free(buf);
+ free(d);
+ fclose(fp);
+ return false;
+ }
+ LOG(LOG_DEBUG, "Parsed %zu bytes\n", done + len);
+ }
+
+ fclose(fp);
+
+ if (ctx.state != BEFORE_ID) {
+ LOG(LOG_ERROR, "Unexpected end of file\n");
+ free(buf);
+ free(d);
+ return false;
+ }
+
+ LOG(LOG_INFO, "Parsing complete:\n");
+ count = 0;
+ for (i = 0; i < 4; i++) {
+ LOG(LOG_INFO, " %s: %i gylphs\n", labels[i], ctx.count[i]);
+ count += ctx.count[i];
+ }
+
+ LOG(LOG_RESULT, " Total %i gylphs "
+ "(of which %i unique, %i codepoints, %i duplicates)\n",
+ count, d->glyphs, ctx.codepoints,
+ count - d->glyphs - ctx.codepoints);
+
+ free(buf);
+
+ *data = d;
+ return true;
+}
+
+static void log_usage(const char *argv0)
+{
+ level = LOG_INFO;
+ LOG(LOG_INFO,
+ "Usage:\n"
+ "\t%s [options] <in_file> <out_file>\n"
+ "\n"
+ "Options:\n"
+ "\t--help -h Display this text\n"
+ "\t--quiet -q Don't show warnings\n"
+ "\t--verbose -v Verbose output\n"
+ "\t--debug -d Full debug output\n",
+ argv0);
+}
+
+int main(int argc, char** argv)
+{
+ const char *in_path = NULL;
+ const char *out_path = NULL;
+ char *header_path = NULL;
+ struct font_data *data;
+ bool ok;
+ int i;
+ int opt;
+
+ level = LOG_RESULT;
+
+ /* Handle program arguments */
+ struct option long_options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "quiet", no_argument, NULL, 'q' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "debug", no_argument, NULL, 'd' },
+ { "header", required_argument, NULL, 'H' },
+ };
+
+ while ((opt = getopt_long(argc, argv, "hqvdH:", long_options, NULL)) != -1) {
+ switch (opt) {
+ case 'q':
+ level = LOG_WARNING;
+ break;
+
+ case 'v':
+ level = LOG_INFO;
+ break;
+
+ case 'd':
+ level = LOG_DEBUG;
+ break;
+
+ case 'H':
+ header_path = strdup(optarg);
+ break;
+
+ case 'h':
+ log_usage(argv[0]);
+ free(header_path);
+ return EXIT_SUCCESS;
+
+ default:
+ log_usage(argv[0]);
+ free(header_path);
+ return EXIT_FAILURE;
+ }
+ }
+
+ if ((argc - optind) < 2) {
+ log_usage(argv[0]);
+ free(header_path);
+ return EXIT_FAILURE;
+ }
+
+ in_path = argv[optind];
+ out_path = argv[optind + 1];
+
+ LOG(LOG_DEBUG, "Using input path: \"%s\"\n", in_path);
+ LOG(LOG_DEBUG, "Using output path: \"%s\"\n", out_path);
+
+ ok = load_font(in_path, &data);
+ if (!ok) {
+ free_table();
+ free(header_path);
+ return EXIT_FAILURE;
+ }
+
+ ok = generate_font_source(out_path, data);
+ if (ok && (header_path != NULL)) {
+ ok = generate_font_header(header_path, data);
+ }
+ free(header_path);
+ free_table();
+ for (i = 0; i < 4; i++) {
+ free(data->sections[i]);
+ }
+ free(data);
+ if (!ok) {
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tools/convert_image.c b/tools/convert_image.c
new file mode 100644
index 000000000..de772fc29
--- /dev/null
+++ b/tools/convert_image.c
@@ -0,0 +1,304 @@
+/*
+ * Copyright 2009 Daniel Silverstone <dsilvers@netsurf-browser.org>
+ *
+ * This file is part of NetSurf, http://www.netsurf-browser.org/
+ *
+ * NetSurf is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * NetSurf is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdbool.h>
+#include <errno.h>
+#include <stdio.h>
+#include <png.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if PNG_LIBPNG_VER < 10209
+#define png_set_expand_gray_1_2_4_to_8(png) png_set_gray_1_2_4_to_8(png)
+#endif
+
+static png_structp png;
+static png_infop info;
+static int interlace;
+static size_t rowbytes;
+static int raw_width, raw_height;
+static int rowstride;
+static unsigned char *bitmap_data;
+static bool is_cursor = true;
+static int raw_hot_x, raw_hot_y;
+
+#define WIDTH (is_cursor?raw_width-1:raw_width)
+#define HEIGHT (is_cursor?raw_height-1:raw_height)
+
+#define HOT_X (is_cursor?raw_hot_x-1:0)
+#define HOT_Y (is_cursor?raw_hot_y-1:0)
+
+#define REAL(v) (is_cursor?v+1:v)
+
+#define PPIX_AT(x,y) ((bitmap_data + (rowstride * y)) + (x * 4))
+
+#define R_OFF 2
+#define G_OFF 1
+#define B_OFF 0
+#define A_OFF 3
+
+#define R_AT(x,y) *(PPIX_AT(x,y) + R_OFF)
+#define G_AT(x,y) *(PPIX_AT(x,y) + G_OFF)
+#define B_AT(x,y) *(PPIX_AT(x,y) + B_OFF)
+#define A_AT(x,y) *(PPIX_AT(x,y) + A_OFF)
+
+
+static void
+usage(void)
+{
+ fprintf(stderr, "usage: fb_convert_image input.png output.inc varname\n");
+}
+
+
+static void
+detect_hotspot(void)
+{
+ int i;
+ int greenpixels = 0;
+
+ for (i = 0; i < raw_width; ++i) {
+ if (A_AT(i, 0) == 255) {
+ if (G_AT(i, 0) == 255) {
+ greenpixels++;
+ raw_hot_x = i;
+ }
+ if ((B_AT(i, 0) != 0) || (R_AT(i, 0) != 0)) {
+ is_cursor = false;
+ return;
+ }
+ } else if (A_AT(i, 0) != 0) {
+ is_cursor = false;
+ return;
+ }
+ }
+ if (greenpixels != 1) {
+ is_cursor = false;
+ return;
+ }
+
+ for (i = 0; i < raw_height; ++i) {
+ if (A_AT(0, i) == 255) {
+ if (G_AT(0, i) == 255) {
+ greenpixels++;
+ raw_hot_y = i;
+ }
+ if ((B_AT(0, i) != 0) || (R_AT(0, i) != 0)) {
+ is_cursor = false;
+ return;
+ }
+ } else if (A_AT(0, i) != 0) {
+ is_cursor = false;
+ return;
+ }
+ }
+ if (greenpixels != 2) {
+ is_cursor = false;
+ return;
+ }
+ printf(" Pointer detected. Adjusted hotspot at %d, %d (0-based)\n",
+ raw_hot_x - 1, raw_hot_y - 1);
+}
+
+
+static void
+info_callback(png_structp png, png_infop info)
+{
+ int bit_depth, color_type, interlace, intent;
+ double gamma;
+ png_uint_32 width, height;
+
+ /* Read the PNG details */
+ png_get_IHDR(png, info, &width, &height, &bit_depth,
+ &color_type, &interlace, 0, 0);
+
+ /* Set up our transformations */
+ if (color_type == PNG_COLOR_TYPE_PALETTE)
+ png_set_palette_to_rgb(png);
+ if (color_type == PNG_COLOR_TYPE_GRAY && bit_depth < 8)
+ png_set_expand_gray_1_2_4_to_8(png);
+ if (png_get_valid(png, info, PNG_INFO_tRNS))
+ png_set_tRNS_to_alpha(png);
+ if (bit_depth == 16)
+ png_set_strip_16(png);
+ if (color_type == PNG_COLOR_TYPE_GRAY ||
+ color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
+ png_set_gray_to_rgb(png);
+ if (!(color_type & PNG_COLOR_MASK_ALPHA))
+ png_set_filler(png, 0xff, PNG_FILLER_AFTER);
+ /* gamma correction - we use 2.2 as our screen gamma
+ * this appears to be correct (at least in respect to !Browse)
+ * see http://www.w3.org/Graphics/PNG/all_seven.html for a test case
+ */
+ if (png_get_sRGB(png, info, &intent))
+ png_set_gamma(png, 2.2, 0.45455);
+ else {
+ if (png_get_gAMA(png, info, &gamma))
+ png_set_gamma(png, 2.2, gamma);
+ else
+ png_set_gamma(png, 2.2, 0.45455);
+ }
+
+
+ png_read_update_info(png, info);
+
+ rowbytes = png_get_rowbytes(png, info);
+ interlace = (interlace == PNG_INTERLACE_ADAM7);
+ raw_width = width;
+ raw_height = height;
+
+ rowstride = raw_width * 4;
+ bitmap_data = malloc(rowstride * raw_height);
+}
+
+static unsigned int interlace_start[8] = {0, 16, 0, 8, 0, 4, 0};
+static unsigned int interlace_step[8] = {28, 28, 12, 12, 4, 4, 0};
+static unsigned int interlace_row_start[8] = {0, 0, 4, 0, 2, 0, 1};
+static unsigned int interlace_row_step[8] = {8, 8, 8, 4, 4, 2, 2};
+
+static void
+row_callback(png_structp png, png_bytep new_row,
+ png_uint_32 row_num, int pass)
+{
+ unsigned long i, j;
+ unsigned int start, step;
+ unsigned char *row = bitmap_data + (rowstride * row_num);
+
+ if (new_row == 0)
+ return;
+
+ if (interlace) {
+ start = interlace_start[pass];
+ step = interlace_step[pass];
+ row_num = interlace_row_start[pass] +
+ interlace_row_step[pass] * row_num;
+
+ /* Copy the data to our current row taking interlacing
+ * into consideration */
+ row = bitmap_data + (rowstride * row_num);
+ for (j = 0, i = start; i < rowbytes; i += step) {
+ row[i++] = new_row[j++];
+ row[i++] = new_row[j++];
+ row[i++] = new_row[j++];
+ row[i++] = new_row[j++];
+ }
+ } else {
+ memcpy(row, new_row, rowbytes);
+ }
+}
+
+static void
+end_callback(png_structp png, png_infop info)
+{
+}
+
+
+int
+main(int argc, char **argv)
+{
+ FILE *f;
+ unsigned char buffer[1024];
+ int br;
+ int x, y, c;
+
+ if (argc != 4) {
+ usage();
+ return 1;
+ }
+
+ printf(" CONVERT: %s (%s)\n", argv[1], argv[3]);
+
+ png = png_create_read_struct(PNG_LIBPNG_VER_STRING, 0, 0, 0);
+ info = png_create_info_struct(png);
+
+ png_set_progressive_read_fn(png, NULL, info_callback, row_callback, end_callback);
+
+ f = fopen(argv[1], "rb");
+ if (f == NULL) {
+ printf(" Unable to open %s\n", argv[1]);
+ return 1;
+ }
+
+ do {
+ br = fread(buffer, 1, 1024, f);
+ if (br > 0) {
+ png_process_data(png, info, buffer, br);
+ }
+ } while (br > 0);
+
+ if (br < 0) {
+ printf("Error reading input: %s\n", strerror(errno));
+ fclose(f);
+ return 1;
+ }
+
+ fclose(f);
+
+ detect_hotspot();
+
+ f = fopen(argv[2], "w");
+ if (f == NULL) {
+ printf(" Unable to open %s\n", argv[2]);
+ return 2;
+ }
+
+ fprintf(f, "/* This file is auto-generated from %s\n", argv[1]);
+ fprintf(f, " *\n * Do not edit this file directly.\n */\n\n");
+ fprintf(f, "#include <sys/types.h>\n\n");
+ fprintf(f, "#include <stdint.h>\n\n");
+ fprintf(f, "#include <stdbool.h>\n\n");
+ fprintf(f, "#include <libnsfb.h>\n\n");
+ fprintf(f, "#include \"netsurf/plot_style.h\"\n");
+ fprintf(f, "#include \"framebuffer/gui.h\"\n");
+ fprintf(f, "#include \"framebuffer/fbtk.h\"\n\n");
+
+ fprintf(f, "static uint8_t %s_pixdata[] = {\n", argv[3]);
+ for (y = 0; y < HEIGHT; ++y) {
+ unsigned char *rowptr = bitmap_data + (rowstride * y);
+ if (is_cursor) {
+ /* If it's a cursor, skip one row and one column */
+ rowptr += rowstride + 4;
+ }
+ fprintf(f, "\t");
+ for (x = 0; x < WIDTH; ++x) {
+ for (c = 0; c < 4; ++c) {
+ unsigned char b = *rowptr++;
+ fprintf(f, "0x%02x, ", b);
+ }
+ }
+ fprintf(f, "\n");
+ }
+ fprintf(f, "};\n\n");
+
+ fprintf(f, "struct fbtk_bitmap %s = {\n", argv[3]);
+ fprintf(f, "\t.width\t\t= %d,\n", WIDTH);
+ fprintf(f, "\t.height\t\t= %d,\n", HEIGHT);
+ fprintf(f, "\t.hot_x\t\t= %d,\n", HOT_X);
+ fprintf(f, "\t.hot_y\t\t= %d,\n", HOT_Y);
+ fprintf(f, "\t.pixdata\t= %s_pixdata,\n", argv[3]);
+
+ fprintf(f, "};\n\n");
+ fclose(f);
+
+ return 0;
+}
+
+/*
+ * Local Variables:
+ * c-basic-offset:8
+ * End:
+ */
diff --git a/tools/coverity-build.sh b/tools/coverity-build.sh
new file mode 100755
index 000000000..24dafd16a
--- /dev/null
+++ b/tools/coverity-build.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+#
+# Copyright © 2013 Vincent Sanders <vince@netsurf-browser.org>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# * The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+# CI system coverity build and submission script
+#
+# Usage: coverity-build.sh
+#
+
+# environment variables
+#
+# HOST The ABI to be compiled for
+# COVERITY_TOKEN
+# COVERITY_USER
+# COVERITY_PREFIX path to tools else default is used
+#
+# either PREFIX or JENKINS_HOME
+
+COVERITY_PROJECT="NetSurf+Browser"
+
+# build gtk2, framebuffer and monkey frontend by default
+TARGETS="gtk2 framebuffer monkey"
+
+# setup build environment
+export PREFIX=${PREFIX:-${JENKINS_HOME}/artifacts-${HOST}}
+export PKG_CONFIG_PATH=${PREFIX}/lib/pkgconfig
+export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${PREFIX}/lib
+export PATH=${PATH}:${PREFIX}/bin
+
+# Coverity tools location
+COVERITY_PREFIX=${COVERITY_PREFIX:-/opt/coverity/cov-analysis-linux64-7.5.0}
+COVERITY_VERSION=$(git rev-parse HEAD)
+
+export PATH=${PATH}:${COVERITY_PREFIX}/bin
+
+COVERITY_TAR=coverity-scan.tar
+
+# cleanup before we start
+rm -rf cov-int/ ${COVERITY_TAR} ${COVERITY_TAR}.gz
+
+for TARGET in ${TARGETS}; do
+ make clean TARGET=${TARGET}
+done
+
+# Do the builds using coverity data gathering tool
+for TARGET in ${TARGETS}; do
+ cov-build --dir cov-int make CCACHE= TARGET=${TARGET}
+done
+
+tar cf ${COVERITY_TAR} cov-int
+
+gzip -9 ${COVERITY_TAR}
+
+curl --form "project=${COVERITY_PROJECT}" --form "token=${COVERITY_TOKEN}" --form "email=${COVERITY_USER}" --form "file=@${COVERITY_TAR}.gz" --form "version=${COVERITY_VERSION}" --form "description=Git Head build" "https://scan.coverity.com/builds?project=${COVERITY_PROJECT}"
diff --git a/tools/fetch-transifex.pl b/tools/fetch-transifex.pl
new file mode 100644
index 000000000..4d40062c9
--- /dev/null
+++ b/tools/fetch-transifex.pl
@@ -0,0 +1,127 @@
+#!/usr/bin/perl
+#
+# Copyright © 2013 Vincent Sanders <vince@netsurf-browser.org>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# * The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+=head1
+
+retrive resource from transifex service
+
+=cut
+
+use strict;
+use Getopt::Long ();
+use LWP::UserAgent;
+use JSON qw( decode_json );
+use Data::Dumper;
+use Fcntl qw( O_CREAT O_EXCL O_WRONLY O_APPEND O_RDONLY O_WRONLY );
+
+use constant GETOPT_OPTS => qw( auto_abbrev no_getopt_compat bundling );
+use constant GETOPT_SPEC =>
+ qw( output|o=s
+ lang|l=s
+ resource|res|r=s
+ project|prj|p=s
+ user|u=s
+ password|w=s
+ help|h|? );
+
+# ensure no locale translation is applied and leave it all in UTF-8
+use bytes;
+
+# default option values:
+my %opt = qw( resource messagesany project netsurf user netsurf );
+
+sub output_stream ();
+sub usage ();
+
+sub main ()
+{
+ my $output;
+ my $opt_ok;
+
+ # option parsing:
+ Getopt::Long::Configure( GETOPT_OPTS );
+ $opt_ok = Getopt::Long::GetOptions( \%opt, GETOPT_SPEC );
+
+ if( $opt_ok )
+ {
+ $output = output_stream();
+ }
+
+ # double check the options are sane (and we weren't asked for the help)
+ if( !$opt_ok || $opt{help} || $opt{lang} !~ /^[a-z]{2}$/ )
+ {
+ usage();
+ }
+
+ my $transifexurl = "https://www.transifex.com/api/2/project/" . $opt{project} . "/resource/" . $opt{resource} . "/translation/" . $opt{lang} . "/";
+
+ my $ua = LWP::UserAgent->new;
+ $ua->credentials(
+ 'www.transifex.com:443',
+ 'Transifex API',
+ $opt{user} => $opt{password}
+ );
+
+ my $response = $ua->get( $transifexurl );
+ if (!$response->is_success) {
+ die $response->status_line . " When fetching " . $transifexurl;
+ }
+
+ # Decode the entire JSON
+ my $decoded_json = decode_json( $response->decoded_content );
+
+ print ( $output $decoded_json->{'content'} );
+}
+
+main();
+
+sub usage ()
+{
+ print(STDERR <<TXT );
+usage:
+ $0 -l lang-code \
+ [-o output-file] [-r resource] [-p project] [-u user] [-w password]
+
+ lang-code : en fr ko ... (no default)
+ project : transifex project (default 'netsurf')
+ resource : transifex resource (default 'messagesany')
+ user : transifex resource (default 'netsurf')
+ password : transifex resource (no default)
+ output-file: defaults to standard output
+TXT
+ exit(1);
+}
+
+sub output_stream ()
+{
+ if( $opt{output} )
+ {
+ my $ofh;
+
+ sysopen( $ofh, $opt{output}, O_CREAT|O_EXCL|O_APPEND|O_WRONLY ) ||
+ die( "$0: Failed to open output file $opt{output}: $!\n" );
+
+ return $ofh;
+ }
+
+ return \*STDOUT;
+}
diff --git a/tools/git-date.sh b/tools/git-date.sh
new file mode 100755
index 000000000..d9a9059a0
--- /dev/null
+++ b/tools/git-date.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+#
+# Copyright © 2016 Michael Drake <tlsa@netsurf-browser.org>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# * The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+# Sets the timestamp of files to the last time a commit touched them in
+# the given subtree.
+#
+# Usage: git-date.sh <PATH>
+#
+# Example: /utils/git-date.sh riscos/distribution
+
+set -e
+
+for FILE in $(git ls-files ${1})
+do
+ TIME=$(git log --pretty=format:%cd -n 1 --date=iso "$FILE")
+ touch -m -d "$TIME" "$FILE"
+done
diff --git a/tools/git-testament.pl b/tools/git-testament.pl
new file mode 100644
index 000000000..5a71a16df
--- /dev/null
+++ b/tools/git-testament.pl
@@ -0,0 +1,227 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+=head1
+
+Generate a testament describing the current Git status. This gets written
+out in a C form which can be used to construct the NetSurf Git testament
+file for signon notification.
+
+If there is no Git in place, the data is invented arbitrarily.
+
+=cut
+
+$ENV{LC_ALL} = 'C';
+
+my $root = shift @ARGV;
+my $targetfile = shift @ARGV;
+
+my %gitinfo; # The Git information
+
+$root .= "/" unless ($root =~ m@/$@);
+
+my $git_present = 0;
+if ( -d ".git" ) {
+ $git_present = 1;
+}
+
+sub compat_tmpnam {
+ # File::Temp was introduced in Perl 5.6.1
+ my $have_file_tmp = eval { require File::Temp };
+
+ if ( ! $have_file_tmp ) {
+ return "$$.gitt";
+ } else {
+ return File::Temp::tmpnam();
+ }
+}
+
+sub compat_md5_hex {
+ # Digest::MD5 was introduced in Perl 5.7.1
+ my $have_digest_md5 = eval { require Digest::MD5 };
+ my $have_md5 = eval { require MD5 };
+ my $data = shift;
+
+ if ( ! $have_digest_md5 ) {
+ return MD5->hexhash($data);
+ } else {
+ return Digest::MD5->new->add($data)->hexdigest;
+ }
+}
+
+sub gather_output {
+ my $cmd = shift;
+ my $tmpfile = compat_tmpnam();
+ local $/ = undef();
+ system("$cmd > $tmpfile");
+ open(my $CMDH, "<", $tmpfile);
+ my $ret = <$CMDH>;
+ close($CMDH);
+ unlink($tmpfile);
+ return $ret;
+}
+
+if ( $git_present ) {
+ my @bits = split /\s+/, `git config --get-regexp "^remote.*.url\$"`;
+ $gitinfo{url} = $bits[1];
+ chomp $gitinfo{url};
+ $gitinfo{revision} = `git rev-parse HEAD`;
+ chomp $gitinfo{revision};
+ $gitinfo{branch} = `git for-each-ref --format="\%(refname:short)" \$(git symbolic-ref HEAD 2>/dev/null || git show-ref -s HEAD)`;
+ chomp $gitinfo{branch};
+ @bits = split /\s+/, `git describe --tags --exact-match HEAD 2>/dev/null`;
+ $bits[0] = "" unless exists $bits[0];
+ $gitinfo{tag} = $bits[0];
+ $gitinfo{branch} = $gitinfo{tag} if ($gitinfo{tag} =~ m@.@);
+} else {
+ $gitinfo{url} = "http://nowhere/tarball/";
+ $gitinfo{revision} = "unknown";
+ $gitinfo{branch} = "tarball";
+ $gitinfo{tag} = "";
+}
+
+my %gitstatus; # The Git status output
+
+if ( $git_present ) {
+ foreach my $line (split(/\n/, gather_output("git status --porcelain"))) {
+ chomp $line;
+ my ($X, $Y, $fp) = ($line =~ /^(.)(.) (.+)$/);
+ my $fn = $fp;
+ $fn = ($fp =~ /(.+) ->/) if ($fp =~ / -> /);
+ next unless (care_about_file($fn));
+ # Normalise $X and $Y (WT and index) into a simple A/M/D etc
+
+ $gitstatus{$fn} = "$X$Y";
+ }
+}
+
+my %userinfo; # The information about the current user
+
+{
+ my @pwent = getpwuid($<);
+ $userinfo{USERNAME} = $pwent[0];
+ my $gecos = $pwent[6];
+ $gecos =~ s/,.+//g;
+ $gecos =~ s/"/'/g;
+ $gecos =~ s/\\/\\\\/g;
+ $userinfo{GECOS} = $gecos;
+}
+
+# The current date, in AmigaOS version friendly format (dd.mm.yyyy)
+
+my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime();
+my $compiledate = sprintf("%02d.%02d.%d",$mday,$mon+1,$year+1900);
+chomp $compiledate;
+
+# Spew the testament out
+
+my $testament = "";
+
+$testament .= "#define USERNAME \"$userinfo{USERNAME}\"\n";
+$testament .= "#define GECOS \"$userinfo{GECOS}\"\n";
+
+my $qroot = $root;
+$qroot =~ s/"/\\"/g;
+
+my $hostname = $ENV{HOSTNAME};
+
+unless ( defined($hostname) && $hostname ne "") {
+ # Try hostname command if env-var empty
+ $hostname = gather_output("hostname");
+ chomp $hostname;
+}
+
+$hostname = "unknown-host" unless (defined($hostname) && $hostname ne "");
+$hostname =~ s/"/\\"/g;
+
+$testament .= "#define WT_ROOT \"$qroot\"\n";
+$testament .= "#define WT_HOSTNAME \"$hostname\"\n";
+$testament .= "#define WT_COMPILEDATE \"$compiledate\"\n";
+
+my $cibuild = $ENV{CI_BUILD};
+if (defined ($cibuild) && ($cibuild ne '')) {
+ $testament .= "#define CI_BUILD \"$cibuild\"\n";
+}
+
+$testament .= "#define WT_BRANCHPATH \"$gitinfo{branch}\"\n";
+
+if ($gitinfo{branch} =~ m@^master$@) {
+ $testament .= "#define WT_BRANCHISMASTER 1\n";
+}
+if ($gitinfo{tag} =~ m@.@) {
+ $testament .= "#define WT_BRANCHISTAG 1\n";
+ $testament .= "#define WT_TAGIS \"$gitinfo{tag}\"\n";
+}
+if ($gitinfo{url} =~ m@/tarball/@) {
+ $testament .= "#define WT_NO_GIT 1\n";
+}
+$testament .= "#define WT_REVID \"$gitinfo{revision}\"\n";
+$testament .= "#define WT_MODIFIED " . scalar(keys %gitstatus) . "\n";
+$testament .= "#define WT_MODIFICATIONS {\\\n";
+my $doneone = 0;
+foreach my $filename (sort keys %gitstatus) {
+ if ($doneone) {
+ $testament .= ", \\\n";
+ }
+ $testament .= " { \"$filename\", \"$gitstatus{$filename}\" }";
+ $doneone = 1;
+}
+$testament .= " \\\n}\n";
+
+my $oldcsum = "";
+if ( -e $targetfile ) {
+ open(my $OLDVALUES, "<", $targetfile);
+ foreach my $line (readline($OLDVALUES)) {
+ if ($line =~ /MD5:([0-9a-f]+)/) {
+ $oldcsum = $1;
+ }
+ }
+ close($OLDVALUES);
+}
+
+my $newcsum = compat_md5_hex($testament);
+
+if ($oldcsum ne $newcsum) {
+ print "TESTMENT: $targetfile\n";
+ open(my $NEWVALUES, ">", $targetfile) or die "$!";
+ print $NEWVALUES "/* ", $targetfile,"\n";
+ print $NEWVALUES <<'EOS';
+ *
+ * Revision testament.
+ *
+ * *WARNING* this file is automatically generated by git-testament.pl
+ *
+ * Copyright 2012 NetSurf Browser Project
+ */
+
+EOS
+
+ print $NEWVALUES "#ifndef NETSURF_REVISION_TESTAMENT\n";
+ print $NEWVALUES "#define NETSURF_REVISION_TESTAMENT \"$newcsum\"\n\n";
+ print $NEWVALUES "/* Revision testament checksum:\n";
+ print $NEWVALUES " * MD5:", $newcsum,"\n */\n\n";
+ print $NEWVALUES "/* Revision testament: */\n";
+ print $NEWVALUES $testament;
+ print $NEWVALUES "\n#endif\n";
+ close($NEWVALUES);
+ foreach my $unwanted (@ARGV) {
+ next unless(-e $unwanted);
+ print "TESTAMENT: Removing $unwanted\n";
+ system("rm", "-f", "--", $unwanted);
+ }
+} else {
+ print "TESTMENT: unchanged\n";
+}
+
+exit 0;
+
+sub care_about_file {
+ my ($fn) = @_;
+ return 0 if ($fn =~ /\.d$/); # Don't care for extraneous DEP files
+ return 0 if ($fn =~ /\.a$/); # Don't care for extraneous archive files
+ return 0 if ($fn =~ /\.md5$/); # Don't care for md5sum files
+ return 0 if ($fn =~ /\.map$/); # Don't care for map files
+ return 0 if ($fn =~ /\.gitt$/); # Don't care for testament temp files
+ return 1;
+}
diff --git a/tools/idna-derived-props-gen.pl b/tools/idna-derived-props-gen.pl
new file mode 100644
index 000000000..a9e9b4b53
--- /dev/null
+++ b/tools/idna-derived-props-gen.pl
@@ -0,0 +1,182 @@
+#!/usr/bin/perl
+#
+# Copyright 2014 Chris Young <chris@unsatisfactorysoftware.co.uk>
+#
+# This file is part of NetSurf, http://www.netsurf-browser.org/
+#
+# NetSurf is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# NetSurf is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+use strict;
+
+use Getopt::Long ();
+use Fcntl qw( O_CREAT O_EXCL O_WRONLY O_APPEND O_RDONLY O_WRONLY );
+
+use constant GETOPT_OPTS => qw( auto_abbrev no_getopt_compat bundling );
+use constant GETOPT_SPEC =>
+ qw( output|o=s
+ properties|p=s
+ joining|j=s
+ help|h|? );
+
+# default option values:
+my %opt = qw(properties "idna-tables-properties.csv" joining "DerivedJoiningType.txt");
+
+sub usage
+{
+ my @fmt = map { s/::$//; $_ } keys(%{$::{'msgfmt::'}});
+ print(STDERR <<TXT );
+usage:
+ $0 [-o output-file] -p properties-file -j joining-file
+
+ output-file : defaults to standard output
+TXT
+ exit(1);
+}
+
+sub output_stream
+{
+ if( $opt{output} )
+ {
+ my $ofh;
+
+ sysopen( $ofh, $opt{output}, O_CREAT|O_EXCL|O_APPEND|O_WRONLY ) ||
+ die( "$0: Failed to open output file $opt{output}: $!\n" );
+
+ return $ofh;
+ }
+
+ return \*STDOUT;
+}
+
+sub input_stream
+{
+ my $stream = $_[0];
+
+ if( $opt{$stream} )
+ {
+ my $ifh;
+
+ sysopen( $ifh, $opt{$stream}, O_RDONLY ) ||
+ die( "$0: Failed to open input file $stream: $!\n" );
+
+ return $ifh;
+ }
+ die( "$0: No input file for $stream");
+}
+
+sub main
+{
+ my $output;
+ my $properties;
+ my $joining;
+ my $opt_ok;
+
+ # option parsing:
+ Getopt::Long::Configure( GETOPT_OPTS );
+ $opt_ok = Getopt::Long::GetOptions( \%opt, GETOPT_SPEC );
+
+ # double check the options are sane (and we weren't asked for the help)
+ if( !$opt_ok || $opt{help} )
+ {
+ usage();
+ }
+
+ # open the appropriate files
+ $properties = input_stream("properties");
+ $joining = input_stream("joining");
+ $output = output_stream();
+
+
+ print { $output } <<HEADER;
+/* This file is generated by idna-derived-props-gen.pl
+ * DO NOT EDIT BY HAND
+ */
+#ifndef _NETSURF_UTILS_IDNA_PROPS_H_
+#define _NETSURF_UTILS_IDNA_PROPS_H_
+
+typedef enum idna_property {
+ IDNA_P_PVALID = 1,
+ IDNA_P_CONTEXTJ = 2,
+ IDNA_P_CONTEXTO = 3,
+ IDNA_P_DISALLOWED = 4,
+ IDNA_P_UNASSIGNED = 5
+} idna_property;
+
+typedef enum idna_unicode_jt {
+ IDNA_UNICODE_JT_U = 0,
+ IDNA_UNICODE_JT_C = 1,
+ IDNA_UNICODE_JT_D = 2,
+ IDNA_UNICODE_JT_R = 3,
+ IDNA_UNICODE_JT_T = 4,
+ IDNA_UNICODE_JT_L = 5
+} idna_unicode_jt;
+
+
+typedef struct idna_table {
+ int32_t start;
+ int32_t end;
+ union p {
+ idna_property property;
+ idna_unicode_jt jt;
+ } p;
+} idna_table;
+
+idna_table idna_derived[] = {
+HEADER
+
+
+ my $line = <$properties>; # discard header line
+
+ while($line = <$properties>) {
+ my @items = split(/\,/, $line);
+ my @codepoints = split(/-/, $items[0]);
+ if($#codepoints == 0) {
+ $codepoints[1] = $codepoints[0];
+ }
+ print { $output } "\t{ 0x" . $codepoints[0] . ", 0x" . $codepoints[1] . ", .p.property = IDNA_P_" . $items[1] . " },\n";
+ }
+
+ close($properties);
+
+ print { $output } <<HEADER;
+ { 0, 0, .p.property = 0}
+};
+
+idna_table idna_joiningtype[] = {
+HEADER
+
+
+ while($line = <$joining>) {
+ chop($line);
+ if(substr($line, 0, 1) eq '#') {next;}
+ if(length($line) == 0) {next;}
+ my @items = split(/;/, $line);
+ my @codepoints = split(/\./, $items[0]);
+ if($#codepoints == 0) {
+ $codepoints[2] = $codepoints[0];
+ }
+ print { $output } "\t{ 0x" . $codepoints[0] . ", 0x" . $codepoints[2] . ", .p.jt = IDNA_UNICODE_JT_" . substr($items[1], 1, 1) . " },\n";
+ }
+
+ close($joining);
+
+ print { $output } <<HEADER;
+ { 0, 0, .p.jt = 0}
+};
+#endif
+HEADER
+
+
+}
+
+main();
diff --git a/tools/idna-tables-properties.csv b/tools/idna-tables-properties.csv
new file mode 100644
index 000000000..a74547bc9
--- /dev/null
+++ b/tools/idna-tables-properties.csv
@@ -0,0 +1,2322 @@
+Codepoint,Property,Description
+0000-002C,DISALLOWED,NULL..COMMA
+002D,PVALID,HYPHEN-MINUS
+002E-002F,DISALLOWED,FULL STOP..SOLIDUS
+0030-0039,PVALID,DIGIT ZERO..DIGIT NINE
+003A-0060,DISALLOWED,COLON..GRAVE ACCENT
+0061-007A,PVALID,LATIN SMALL LETTER A..LATIN SMALL LETTER Z
+007B-00B6,DISALLOWED,LEFT CURLY BRACKET..PILCROW SIGN
+00B7,CONTEXTO,MIDDLE DOT
+00B8-00DE,DISALLOWED,CEDILLA..LATIN CAPITAL LETTER THORN
+00DF-00F6,PVALID,LATIN SMALL LETTER SHARP S..LATIN SMALL LETTER O WITH DIAERESIS
+00F7,DISALLOWED,DIVISION SIGN
+00F8-00FF,PVALID,LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER Y WITH DIAERESIS
+0100,DISALLOWED,LATIN CAPITAL LETTER A WITH MACRON
+0101,PVALID,LATIN SMALL LETTER A WITH MACRON
+0102,DISALLOWED,LATIN CAPITAL LETTER A WITH BREVE
+0103,PVALID,LATIN SMALL LETTER A WITH BREVE
+0104,DISALLOWED,LATIN CAPITAL LETTER A WITH OGONEK
+0105,PVALID,LATIN SMALL LETTER A WITH OGONEK
+0106,DISALLOWED,LATIN CAPITAL LETTER C WITH ACUTE
+0107,PVALID,LATIN SMALL LETTER C WITH ACUTE
+0108,DISALLOWED,LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+0109,PVALID,LATIN SMALL LETTER C WITH CIRCUMFLEX
+010A,DISALLOWED,LATIN CAPITAL LETTER C WITH DOT ABOVE
+010B,PVALID,LATIN SMALL LETTER C WITH DOT ABOVE
+010C,DISALLOWED,LATIN CAPITAL LETTER C WITH CARON
+010D,PVALID,LATIN SMALL LETTER C WITH CARON
+010E,DISALLOWED,LATIN CAPITAL LETTER D WITH CARON
+010F,PVALID,LATIN SMALL LETTER D WITH CARON
+0110,DISALLOWED,LATIN CAPITAL LETTER D WITH STROKE
+0111,PVALID,LATIN SMALL LETTER D WITH STROKE
+0112,DISALLOWED,LATIN CAPITAL LETTER E WITH MACRON
+0113,PVALID,LATIN SMALL LETTER E WITH MACRON
+0114,DISALLOWED,LATIN CAPITAL LETTER E WITH BREVE
+0115,PVALID,LATIN SMALL LETTER E WITH BREVE
+0116,DISALLOWED,LATIN CAPITAL LETTER E WITH DOT ABOVE
+0117,PVALID,LATIN SMALL LETTER E WITH DOT ABOVE
+0118,DISALLOWED,LATIN CAPITAL LETTER E WITH OGONEK
+0119,PVALID,LATIN SMALL LETTER E WITH OGONEK
+011A,DISALLOWED,LATIN CAPITAL LETTER E WITH CARON
+011B,PVALID,LATIN SMALL LETTER E WITH CARON
+011C,DISALLOWED,LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+011D,PVALID,LATIN SMALL LETTER G WITH CIRCUMFLEX
+011E,DISALLOWED,LATIN CAPITAL LETTER G WITH BREVE
+011F,PVALID,LATIN SMALL LETTER G WITH BREVE
+0120,DISALLOWED,LATIN CAPITAL LETTER G WITH DOT ABOVE
+0121,PVALID,LATIN SMALL LETTER G WITH DOT ABOVE
+0122,DISALLOWED,LATIN CAPITAL LETTER G WITH CEDILLA
+0123,PVALID,LATIN SMALL LETTER G WITH CEDILLA
+0124,DISALLOWED,LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+0125,PVALID,LATIN SMALL LETTER H WITH CIRCUMFLEX
+0126,DISALLOWED,LATIN CAPITAL LETTER H WITH STROKE
+0127,PVALID,LATIN SMALL LETTER H WITH STROKE
+0128,DISALLOWED,LATIN CAPITAL LETTER I WITH TILDE
+0129,PVALID,LATIN SMALL LETTER I WITH TILDE
+012A,DISALLOWED,LATIN CAPITAL LETTER I WITH MACRON
+012B,PVALID,LATIN SMALL LETTER I WITH MACRON
+012C,DISALLOWED,LATIN CAPITAL LETTER I WITH BREVE
+012D,PVALID,LATIN SMALL LETTER I WITH BREVE
+012E,DISALLOWED,LATIN CAPITAL LETTER I WITH OGONEK
+012F,PVALID,LATIN SMALL LETTER I WITH OGONEK
+0130,DISALLOWED,LATIN CAPITAL LETTER I WITH DOT ABOVE
+0131,PVALID,LATIN SMALL LETTER DOTLESS I
+0132-0134,DISALLOWED,LATIN CAPITAL LIGATURE IJ..LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+0135,PVALID,LATIN SMALL LETTER J WITH CIRCUMFLEX
+0136,DISALLOWED,LATIN CAPITAL LETTER K WITH CEDILLA
+0137-0138,PVALID,LATIN SMALL LETTER K WITH CEDILLA..LATIN SMALL LETTER KRA
+0139,DISALLOWED,LATIN CAPITAL LETTER L WITH ACUTE
+013A,PVALID,LATIN SMALL LETTER L WITH ACUTE
+013B,DISALLOWED,LATIN CAPITAL LETTER L WITH CEDILLA
+013C,PVALID,LATIN SMALL LETTER L WITH CEDILLA
+013D,DISALLOWED,LATIN CAPITAL LETTER L WITH CARON
+013E,PVALID,LATIN SMALL LETTER L WITH CARON
+013F-0141,DISALLOWED,LATIN CAPITAL LETTER L WITH MIDDLE DOT..LATIN CAPITAL LETTER L WITH STROKE
+0142,PVALID,LATIN SMALL LETTER L WITH STROKE
+0143,DISALLOWED,LATIN CAPITAL LETTER N WITH ACUTE
+0144,PVALID,LATIN SMALL LETTER N WITH ACUTE
+0145,DISALLOWED,LATIN CAPITAL LETTER N WITH CEDILLA
+0146,PVALID,LATIN SMALL LETTER N WITH CEDILLA
+0147,DISALLOWED,LATIN CAPITAL LETTER N WITH CARON
+0148,PVALID,LATIN SMALL LETTER N WITH CARON
+0149-014A,DISALLOWED,LATIN SMALL LETTER N PRECEDED BY APOSTROPHE..LATIN CAPITAL LETTER ENG
+014B,PVALID,LATIN SMALL LETTER ENG
+014C,DISALLOWED,LATIN CAPITAL LETTER O WITH MACRON
+014D,PVALID,LATIN SMALL LETTER O WITH MACRON
+014E,DISALLOWED,LATIN CAPITAL LETTER O WITH BREVE
+014F,PVALID,LATIN SMALL LETTER O WITH BREVE
+0150,DISALLOWED,LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+0151,PVALID,LATIN SMALL LETTER O WITH DOUBLE ACUTE
+0152,DISALLOWED,LATIN CAPITAL LIGATURE OE
+0153,PVALID,LATIN SMALL LIGATURE OE
+0154,DISALLOWED,LATIN CAPITAL LETTER R WITH ACUTE
+0155,PVALID,LATIN SMALL LETTER R WITH ACUTE
+0156,DISALLOWED,LATIN CAPITAL LETTER R WITH CEDILLA
+0157,PVALID,LATIN SMALL LETTER R WITH CEDILLA
+0158,DISALLOWED,LATIN CAPITAL LETTER R WITH CARON
+0159,PVALID,LATIN SMALL LETTER R WITH CARON
+015A,DISALLOWED,LATIN CAPITAL LETTER S WITH ACUTE
+015B,PVALID,LATIN SMALL LETTER S WITH ACUTE
+015C,DISALLOWED,LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+015D,PVALID,LATIN SMALL LETTER S WITH CIRCUMFLEX
+015E,DISALLOWED,LATIN CAPITAL LETTER S WITH CEDILLA
+015F,PVALID,LATIN SMALL LETTER S WITH CEDILLA
+0160,DISALLOWED,LATIN CAPITAL LETTER S WITH CARON
+0161,PVALID,LATIN SMALL LETTER S WITH CARON
+0162,DISALLOWED,LATIN CAPITAL LETTER T WITH CEDILLA
+0163,PVALID,LATIN SMALL LETTER T WITH CEDILLA
+0164,DISALLOWED,LATIN CAPITAL LETTER T WITH CARON
+0165,PVALID,LATIN SMALL LETTER T WITH CARON
+0166,DISALLOWED,LATIN CAPITAL LETTER T WITH STROKE
+0167,PVALID,LATIN SMALL LETTER T WITH STROKE
+0168,DISALLOWED,LATIN CAPITAL LETTER U WITH TILDE
+0169,PVALID,LATIN SMALL LETTER U WITH TILDE
+016A,DISALLOWED,LATIN CAPITAL LETTER U WITH MACRON
+016B,PVALID,LATIN SMALL LETTER U WITH MACRON
+016C,DISALLOWED,LATIN CAPITAL LETTER U WITH BREVE
+016D,PVALID,LATIN SMALL LETTER U WITH BREVE
+016E,DISALLOWED,LATIN CAPITAL LETTER U WITH RING ABOVE
+016F,PVALID,LATIN SMALL LETTER U WITH RING ABOVE
+0170,DISALLOWED,LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+0171,PVALID,LATIN SMALL LETTER U WITH DOUBLE ACUTE
+0172,DISALLOWED,LATIN CAPITAL LETTER U WITH OGONEK
+0173,PVALID,LATIN SMALL LETTER U WITH OGONEK
+0174,DISALLOWED,LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+0175,PVALID,LATIN SMALL LETTER W WITH CIRCUMFLEX
+0176,DISALLOWED,LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+0177,PVALID,LATIN SMALL LETTER Y WITH CIRCUMFLEX
+0178-0179,DISALLOWED,LATIN CAPITAL LETTER Y WITH DIAERESIS..LATIN CAPITAL LETTER Z WITH ACUTE
+017A,PVALID,LATIN SMALL LETTER Z WITH ACUTE
+017B,DISALLOWED,LATIN CAPITAL LETTER Z WITH DOT ABOVE
+017C,PVALID,LATIN SMALL LETTER Z WITH DOT ABOVE
+017D,DISALLOWED,LATIN CAPITAL LETTER Z WITH CARON
+017E,PVALID,LATIN SMALL LETTER Z WITH CARON
+017F,DISALLOWED,LATIN SMALL LETTER LONG S
+0180,PVALID,LATIN SMALL LETTER B WITH STROKE
+0181-0182,DISALLOWED,LATIN CAPITAL LETTER B WITH HOOK..LATIN CAPITAL LETTER B WITH TOPBAR
+0183,PVALID,LATIN SMALL LETTER B WITH TOPBAR
+0184,DISALLOWED,LATIN CAPITAL LETTER TONE SIX
+0185,PVALID,LATIN SMALL LETTER TONE SIX
+0186-0187,DISALLOWED,LATIN CAPITAL LETTER OPEN O..LATIN CAPITAL LETTER C WITH HOOK
+0188,PVALID,LATIN SMALL LETTER C WITH HOOK
+0189-018B,DISALLOWED,LATIN CAPITAL LETTER AFRICAN D..LATIN CAPITAL LETTER D WITH TOPBAR
+018C-018D,PVALID,LATIN SMALL LETTER D WITH TOPBAR..LATIN SMALL LETTER TURNED DELTA
+018E-0191,DISALLOWED,LATIN CAPITAL LETTER REVERSED E..LATIN CAPITAL LETTER F WITH HOOK
+0192,PVALID,LATIN SMALL LETTER F WITH HOOK
+0193-0194,DISALLOWED,LATIN CAPITAL LETTER G WITH HOOK..LATIN CAPITAL LETTER GAMMA
+0195,PVALID,LATIN SMALL LETTER HV
+0196-0198,DISALLOWED,LATIN CAPITAL LETTER IOTA..LATIN CAPITAL LETTER K WITH HOOK
+0199-019B,PVALID,LATIN SMALL LETTER K WITH HOOK..LATIN SMALL LETTER LAMBDA WITH STROKE
+019C-019D,DISALLOWED,LATIN CAPITAL LETTER TURNED M..LATIN CAPITAL LETTER N WITH LEFT HOOK
+019E,PVALID,LATIN SMALL LETTER N WITH LONG RIGHT LEG
+019F-01A0,DISALLOWED,LATIN CAPITAL LETTER O WITH MIDDLE TILDE..LATIN CAPITAL LETTER O WITH HORN
+01A1,PVALID,LATIN SMALL LETTER O WITH HORN
+01A2,DISALLOWED,LATIN CAPITAL LETTER OI
+01A3,PVALID,LATIN SMALL LETTER OI
+01A4,DISALLOWED,LATIN CAPITAL LETTER P WITH HOOK
+01A5,PVALID,LATIN SMALL LETTER P WITH HOOK
+01A6-01A7,DISALLOWED,LATIN LETTER YR..LATIN CAPITAL LETTER TONE TWO
+01A8,PVALID,LATIN SMALL LETTER TONE TWO
+01A9,DISALLOWED,LATIN CAPITAL LETTER ESH
+01AA-01AB,PVALID,LATIN LETTER REVERSED ESH LOOP..LATIN SMALL LETTER T WITH PALATAL HOOK
+01AC,DISALLOWED,LATIN CAPITAL LETTER T WITH HOOK
+01AD,PVALID,LATIN SMALL LETTER T WITH HOOK
+01AE-01AF,DISALLOWED,LATIN CAPITAL LETTER T WITH RETROFLEX HOOK..LATIN CAPITAL LETTER U WITH HORN
+01B0,PVALID,LATIN SMALL LETTER U WITH HORN
+01B1-01B3,DISALLOWED,LATIN CAPITAL LETTER UPSILON..LATIN CAPITAL LETTER Y WITH HOOK
+01B4,PVALID,LATIN SMALL LETTER Y WITH HOOK
+01B5,DISALLOWED,LATIN CAPITAL LETTER Z WITH STROKE
+01B6,PVALID,LATIN SMALL LETTER Z WITH STROKE
+01B7-01B8,DISALLOWED,LATIN CAPITAL LETTER EZH..LATIN CAPITAL LETTER EZH REVERSED
+01B9-01BB,PVALID,LATIN SMALL LETTER EZH REVERSED..LATIN LETTER TWO WITH STROKE
+01BC,DISALLOWED,LATIN CAPITAL LETTER TONE FIVE
+01BD-01C3,PVALID,LATIN SMALL LETTER TONE FIVE..LATIN LETTER RETROFLEX CLICK
+01C4-01CD,DISALLOWED,LATIN CAPITAL LETTER DZ WITH CARON..LATIN CAPITAL LETTER A WITH CARON
+01CE,PVALID,LATIN SMALL LETTER A WITH CARON
+01CF,DISALLOWED,LATIN CAPITAL LETTER I WITH CARON
+01D0,PVALID,LATIN SMALL LETTER I WITH CARON
+01D1,DISALLOWED,LATIN CAPITAL LETTER O WITH CARON
+01D2,PVALID,LATIN SMALL LETTER O WITH CARON
+01D3,DISALLOWED,LATIN CAPITAL LETTER U WITH CARON
+01D4,PVALID,LATIN SMALL LETTER U WITH CARON
+01D5,DISALLOWED,LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
+01D6,PVALID,LATIN SMALL LETTER U WITH DIAERESIS AND MACRON
+01D7,DISALLOWED,LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
+01D8,PVALID,LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
+01D9,DISALLOWED,LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
+01DA,PVALID,LATIN SMALL LETTER U WITH DIAERESIS AND CARON
+01DB,DISALLOWED,LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
+01DC-01DD,PVALID,LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE..LATIN SMALL LETTER TURNED E
+01DE,DISALLOWED,LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
+01DF,PVALID,LATIN SMALL LETTER A WITH DIAERESIS AND MACRON
+01E0,DISALLOWED,LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
+01E1,PVALID,LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON
+01E2,DISALLOWED,LATIN CAPITAL LETTER AE WITH MACRON
+01E3,PVALID,LATIN SMALL LETTER AE WITH MACRON
+01E4,DISALLOWED,LATIN CAPITAL LETTER G WITH STROKE
+01E5,PVALID,LATIN SMALL LETTER G WITH STROKE
+01E6,DISALLOWED,LATIN CAPITAL LETTER G WITH CARON
+01E7,PVALID,LATIN SMALL LETTER G WITH CARON
+01E8,DISALLOWED,LATIN CAPITAL LETTER K WITH CARON
+01E9,PVALID,LATIN SMALL LETTER K WITH CARON
+01EA,DISALLOWED,LATIN CAPITAL LETTER O WITH OGONEK
+01EB,PVALID,LATIN SMALL LETTER O WITH OGONEK
+01EC,DISALLOWED,LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
+01ED,PVALID,LATIN SMALL LETTER O WITH OGONEK AND MACRON
+01EE,DISALLOWED,LATIN CAPITAL LETTER EZH WITH CARON
+01EF-01F0,PVALID,LATIN SMALL LETTER EZH WITH CARON..LATIN SMALL LETTER J WITH CARON
+01F1-01F4,DISALLOWED,LATIN CAPITAL LETTER DZ..LATIN CAPITAL LETTER G WITH ACUTE
+01F5,PVALID,LATIN SMALL LETTER G WITH ACUTE
+01F6-01F8,DISALLOWED,LATIN CAPITAL LETTER HWAIR..LATIN CAPITAL LETTER N WITH GRAVE
+01F9,PVALID,LATIN SMALL LETTER N WITH GRAVE
+01FA,DISALLOWED,LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
+01FB,PVALID,LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE
+01FC,DISALLOWED,LATIN CAPITAL LETTER AE WITH ACUTE
+01FD,PVALID,LATIN SMALL LETTER AE WITH ACUTE
+01FE,DISALLOWED,LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
+01FF,PVALID,LATIN SMALL LETTER O WITH STROKE AND ACUTE
+0200,DISALLOWED,LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
+0201,PVALID,LATIN SMALL LETTER A WITH DOUBLE GRAVE
+0202,DISALLOWED,LATIN CAPITAL LETTER A WITH INVERTED BREVE
+0203,PVALID,LATIN SMALL LETTER A WITH INVERTED BREVE
+0204,DISALLOWED,LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
+0205,PVALID,LATIN SMALL LETTER E WITH DOUBLE GRAVE
+0206,DISALLOWED,LATIN CAPITAL LETTER E WITH INVERTED BREVE
+0207,PVALID,LATIN SMALL LETTER E WITH INVERTED BREVE
+0208,DISALLOWED,LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
+0209,PVALID,LATIN SMALL LETTER I WITH DOUBLE GRAVE
+020A,DISALLOWED,LATIN CAPITAL LETTER I WITH INVERTED BREVE
+020B,PVALID,LATIN SMALL LETTER I WITH INVERTED BREVE
+020C,DISALLOWED,LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
+020D,PVALID,LATIN SMALL LETTER O WITH DOUBLE GRAVE
+020E,DISALLOWED,LATIN CAPITAL LETTER O WITH INVERTED BREVE
+020F,PVALID,LATIN SMALL LETTER O WITH INVERTED BREVE
+0210,DISALLOWED,LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
+0211,PVALID,LATIN SMALL LETTER R WITH DOUBLE GRAVE
+0212,DISALLOWED,LATIN CAPITAL LETTER R WITH INVERTED BREVE
+0213,PVALID,LATIN SMALL LETTER R WITH INVERTED BREVE
+0214,DISALLOWED,LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
+0215,PVALID,LATIN SMALL LETTER U WITH DOUBLE GRAVE
+0216,DISALLOWED,LATIN CAPITAL LETTER U WITH INVERTED BREVE
+0217,PVALID,LATIN SMALL LETTER U WITH INVERTED BREVE
+0218,DISALLOWED,LATIN CAPITAL LETTER S WITH COMMA BELOW
+0219,PVALID,LATIN SMALL LETTER S WITH COMMA BELOW
+021A,DISALLOWED,LATIN CAPITAL LETTER T WITH COMMA BELOW
+021B,PVALID,LATIN SMALL LETTER T WITH COMMA BELOW
+021C,DISALLOWED,LATIN CAPITAL LETTER YOGH
+021D,PVALID,LATIN SMALL LETTER YOGH
+021E,DISALLOWED,LATIN CAPITAL LETTER H WITH CARON
+021F,PVALID,LATIN SMALL LETTER H WITH CARON
+0220,DISALLOWED,LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
+0221,PVALID,LATIN SMALL LETTER D WITH CURL
+0222,DISALLOWED,LATIN CAPITAL LETTER OU
+0223,PVALID,LATIN SMALL LETTER OU
+0224,DISALLOWED,LATIN CAPITAL LETTER Z WITH HOOK
+0225,PVALID,LATIN SMALL LETTER Z WITH HOOK
+0226,DISALLOWED,LATIN CAPITAL LETTER A WITH DOT ABOVE
+0227,PVALID,LATIN SMALL LETTER A WITH DOT ABOVE
+0228,DISALLOWED,LATIN CAPITAL LETTER E WITH CEDILLA
+0229,PVALID,LATIN SMALL LETTER E WITH CEDILLA
+022A,DISALLOWED,LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
+022B,PVALID,LATIN SMALL LETTER O WITH DIAERESIS AND MACRON
+022C,DISALLOWED,LATIN CAPITAL LETTER O WITH TILDE AND MACRON
+022D,PVALID,LATIN SMALL LETTER O WITH TILDE AND MACRON
+022E,DISALLOWED,LATIN CAPITAL LETTER O WITH DOT ABOVE
+022F,PVALID,LATIN SMALL LETTER O WITH DOT ABOVE
+0230,DISALLOWED,LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
+0231,PVALID,LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON
+0232,DISALLOWED,LATIN CAPITAL LETTER Y WITH MACRON
+0233-0239,PVALID,LATIN SMALL LETTER Y WITH MACRON..LATIN SMALL LETTER QP DIGRAPH
+023A-023B,DISALLOWED,LATIN CAPITAL LETTER A WITH STROKE..LATIN CAPITAL LETTER C WITH STROKE
+023C,PVALID,LATIN SMALL LETTER C WITH STROKE
+023D-023E,DISALLOWED,LATIN CAPITAL LETTER L WITH BAR..LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
+023F-0240,PVALID,LATIN SMALL LETTER S WITH SWASH TAIL..LATIN SMALL LETTER Z WITH SWASH TAIL
+0241,DISALLOWED,LATIN CAPITAL LETTER GLOTTAL STOP
+0242,PVALID,LATIN SMALL LETTER GLOTTAL STOP
+0243-0246,DISALLOWED,LATIN CAPITAL LETTER B WITH STROKE..LATIN CAPITAL LETTER E WITH STROKE
+0247,PVALID,LATIN SMALL LETTER E WITH STROKE
+0248,DISALLOWED,LATIN CAPITAL LETTER J WITH STROKE
+0249,PVALID,LATIN SMALL LETTER J WITH STROKE
+024A,DISALLOWED,LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
+024B,PVALID,LATIN SMALL LETTER Q WITH HOOK TAIL
+024C,DISALLOWED,LATIN CAPITAL LETTER R WITH STROKE
+024D,PVALID,LATIN SMALL LETTER R WITH STROKE
+024E,DISALLOWED,LATIN CAPITAL LETTER Y WITH STROKE
+024F-02AF,PVALID,LATIN SMALL LETTER Y WITH STROKE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
+02B0-02B8,DISALLOWED,MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y
+02B9-02C1,PVALID,MODIFIER LETTER PRIME..MODIFIER LETTER REVERSED GLOTTAL STOP
+02C2-02C5,DISALLOWED,MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD
+02C6-02D1,PVALID,MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
+02D2-02EB,DISALLOWED,MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER YANG DEPARTING TONE MARK
+02EC,PVALID,MODIFIER LETTER VOICING
+02ED,DISALLOWED,MODIFIER LETTER UNASPIRATED
+02EE,PVALID,MODIFIER LETTER DOUBLE APOSTROPHE
+02EF-02FF,DISALLOWED,MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW
+0300-033F,PVALID,COMBINING GRAVE ACCENT..COMBINING DOUBLE OVERLINE
+0340-0341,DISALLOWED,COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
+0342,PVALID,COMBINING GREEK PERISPOMENI
+0343-0345,DISALLOWED,COMBINING GREEK KORONIS..COMBINING GREEK YPOGEGRAMMENI
+0346-034E,PVALID,COMBINING BRIDGE ABOVE..COMBINING UPWARDS ARROW BELOW
+034F,DISALLOWED,COMBINING GRAPHEME JOINER
+0350-036F,PVALID,COMBINING RIGHT ARROWHEAD ABOVE..COMBINING LATIN SMALL LETTER X
+0370,DISALLOWED,GREEK CAPITAL LETTER HETA
+0371,PVALID,GREEK SMALL LETTER HETA
+0372,DISALLOWED,GREEK CAPITAL LETTER ARCHAIC SAMPI
+0373,PVALID,GREEK SMALL LETTER ARCHAIC SAMPI
+0374,DISALLOWED,GREEK NUMERAL SIGN
+0375,CONTEXTO,GREEK LOWER NUMERAL SIGN
+0376,DISALLOWED,GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
+0377,PVALID,GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
+0378-0379,UNASSIGNED,<RESERVED>..<RESERVED>
+037A,DISALLOWED,GREEK YPOGEGRAMMENI
+037B-037D,PVALID,GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
+037E,DISALLOWED,GREEK QUESTION MARK
+037F-0383,UNASSIGNED,<RESERVED>..<RESERVED>
+0384-038A,DISALLOWED,GREEK TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS
+038B,UNASSIGNED,<RESERVED>
+038C,DISALLOWED,GREEK CAPITAL LETTER OMICRON WITH TONOS
+038D,UNASSIGNED,<RESERVED>
+038E-038F,DISALLOWED,GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER OMEGA WITH TONOS
+0390,PVALID,GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+0391-03A1,DISALLOWED,GREEK CAPITAL LETTER ALPHA..GREEK CAPITAL LETTER RHO
+03A2,UNASSIGNED,<RESERVED>
+03A3-03AB,DISALLOWED,GREEK CAPITAL LETTER SIGMA..GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+03AC-03CE,PVALID,GREEK SMALL LETTER ALPHA WITH TONOS..GREEK SMALL LETTER OMEGA WITH TONOS
+03CF-03D6,DISALLOWED,GREEK CAPITAL KAI SYMBOL..GREEK PI SYMBOL
+03D7,PVALID,GREEK KAI SYMBOL
+03D8,DISALLOWED,GREEK LETTER ARCHAIC KOPPA
+03D9,PVALID,GREEK SMALL LETTER ARCHAIC KOPPA
+03DA,DISALLOWED,GREEK LETTER STIGMA
+03DB,PVALID,GREEK SMALL LETTER STIGMA
+03DC,DISALLOWED,GREEK LETTER DIGAMMA
+03DD,PVALID,GREEK SMALL LETTER DIGAMMA
+03DE,DISALLOWED,GREEK LETTER KOPPA
+03DF,PVALID,GREEK SMALL LETTER KOPPA
+03E0,DISALLOWED,GREEK LETTER SAMPI
+03E1,PVALID,GREEK SMALL LETTER SAMPI
+03E2,DISALLOWED,COPTIC CAPITAL LETTER SHEI
+03E3,PVALID,COPTIC SMALL LETTER SHEI
+03E4,DISALLOWED,COPTIC CAPITAL LETTER FEI
+03E5,PVALID,COPTIC SMALL LETTER FEI
+03E6,DISALLOWED,COPTIC CAPITAL LETTER KHEI
+03E7,PVALID,COPTIC SMALL LETTER KHEI
+03E8,DISALLOWED,COPTIC CAPITAL LETTER HORI
+03E9,PVALID,COPTIC SMALL LETTER HORI
+03EA,DISALLOWED,COPTIC CAPITAL LETTER GANGIA
+03EB,PVALID,COPTIC SMALL LETTER GANGIA
+03EC,DISALLOWED,COPTIC CAPITAL LETTER SHIMA
+03ED,PVALID,COPTIC SMALL LETTER SHIMA
+03EE,DISALLOWED,COPTIC CAPITAL LETTER DEI
+03EF,PVALID,COPTIC SMALL LETTER DEI
+03F0-03F2,DISALLOWED,GREEK KAPPA SYMBOL..GREEK LUNATE SIGMA SYMBOL
+03F3,PVALID,GREEK LETTER YOT
+03F4-03F7,DISALLOWED,GREEK CAPITAL THETA SYMBOL..GREEK CAPITAL LETTER SHO
+03F8,PVALID,GREEK SMALL LETTER SHO
+03F9-03FA,DISALLOWED,GREEK CAPITAL LUNATE SIGMA SYMBOL..GREEK CAPITAL LETTER SAN
+03FB-03FC,PVALID,GREEK SMALL LETTER SAN..GREEK RHO WITH STROKE SYMBOL
+03FD-042F,DISALLOWED,GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL..CYRILLIC CAPITAL LETTER YA
+0430-045F,PVALID,CYRILLIC SMALL LETTER A..CYRILLIC SMALL LETTER DZHE
+0460,DISALLOWED,CYRILLIC CAPITAL LETTER OMEGA
+0461,PVALID,CYRILLIC SMALL LETTER OMEGA
+0462,DISALLOWED,CYRILLIC CAPITAL LETTER YAT
+0463,PVALID,CYRILLIC SMALL LETTER YAT
+0464,DISALLOWED,CYRILLIC CAPITAL LETTER IOTIFIED E
+0465,PVALID,CYRILLIC SMALL LETTER IOTIFIED E
+0466,DISALLOWED,CYRILLIC CAPITAL LETTER LITTLE YUS
+0467,PVALID,CYRILLIC SMALL LETTER LITTLE YUS
+0468,DISALLOWED,CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
+0469,PVALID,CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS
+046A,DISALLOWED,CYRILLIC CAPITAL LETTER BIG YUS
+046B,PVALID,CYRILLIC SMALL LETTER BIG YUS
+046C,DISALLOWED,CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
+046D,PVALID,CYRILLIC SMALL LETTER IOTIFIED BIG YUS
+046E,DISALLOWED,CYRILLIC CAPITAL LETTER KSI
+046F,PVALID,CYRILLIC SMALL LETTER KSI
+0470,DISALLOWED,CYRILLIC CAPITAL LETTER PSI
+0471,PVALID,CYRILLIC SMALL LETTER PSI
+0472,DISALLOWED,CYRILLIC CAPITAL LETTER FITA
+0473,PVALID,CYRILLIC SMALL LETTER FITA
+0474,DISALLOWED,CYRILLIC CAPITAL LETTER IZHITSA
+0475,PVALID,CYRILLIC SMALL LETTER IZHITSA
+0476,DISALLOWED,CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
+0477,PVALID,CYRILLIC SMALL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
+0478,DISALLOWED,CYRILLIC CAPITAL LETTER UK
+0479,PVALID,CYRILLIC SMALL LETTER UK
+047A,DISALLOWED,CYRILLIC CAPITAL LETTER ROUND OMEGA
+047B,PVALID,CYRILLIC SMALL LETTER ROUND OMEGA
+047C,DISALLOWED,CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
+047D,PVALID,CYRILLIC SMALL LETTER OMEGA WITH TITLO
+047E,DISALLOWED,CYRILLIC CAPITAL LETTER OT
+047F,PVALID,CYRILLIC SMALL LETTER OT
+0480,DISALLOWED,CYRILLIC CAPITAL LETTER KOPPA
+0481,PVALID,CYRILLIC SMALL LETTER KOPPA
+0482,DISALLOWED,CYRILLIC THOUSANDS SIGN
+0483-0487,PVALID,COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE
+0488-048A,DISALLOWED,COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
+048B,PVALID,CYRILLIC SMALL LETTER SHORT I WITH TAIL
+048C,DISALLOWED,CYRILLIC CAPITAL LETTER SEMISOFT SIGN
+048D,PVALID,CYRILLIC SMALL LETTER SEMISOFT SIGN
+048E,DISALLOWED,CYRILLIC CAPITAL LETTER ER WITH TICK
+048F,PVALID,CYRILLIC SMALL LETTER ER WITH TICK
+0490,DISALLOWED,CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+0491,PVALID,CYRILLIC SMALL LETTER GHE WITH UPTURN
+0492,DISALLOWED,CYRILLIC CAPITAL LETTER GHE WITH STROKE
+0493,PVALID,CYRILLIC SMALL LETTER GHE WITH STROKE
+0494,DISALLOWED,CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
+0495,PVALID,CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK
+0496,DISALLOWED,CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
+0497,PVALID,CYRILLIC SMALL LETTER ZHE WITH DESCENDER
+0498,DISALLOWED,CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
+0499,PVALID,CYRILLIC SMALL LETTER ZE WITH DESCENDER
+049A,DISALLOWED,CYRILLIC CAPITAL LETTER KA WITH DESCENDER
+049B,PVALID,CYRILLIC SMALL LETTER KA WITH DESCENDER
+049C,DISALLOWED,CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
+049D,PVALID,CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE
+049E,DISALLOWED,CYRILLIC CAPITAL LETTER KA WITH STROKE
+049F,PVALID,CYRILLIC SMALL LETTER KA WITH STROKE
+04A0,DISALLOWED,CYRILLIC CAPITAL LETTER BASHKIR KA
+04A1,PVALID,CYRILLIC SMALL LETTER BASHKIR KA
+04A2,DISALLOWED,CYRILLIC CAPITAL LETTER EN WITH DESCENDER
+04A3,PVALID,CYRILLIC SMALL LETTER EN WITH DESCENDER
+04A4,DISALLOWED,CYRILLIC CAPITAL LIGATURE EN GHE
+04A5,PVALID,CYRILLIC SMALL LIGATURE EN GHE
+04A6,DISALLOWED,CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
+04A7,PVALID,CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK
+04A8,DISALLOWED,CYRILLIC CAPITAL LETTER ABKHASIAN HA
+04A9,PVALID,CYRILLIC SMALL LETTER ABKHASIAN HA
+04AA,DISALLOWED,CYRILLIC CAPITAL LETTER ES WITH DESCENDER
+04AB,PVALID,CYRILLIC SMALL LETTER ES WITH DESCENDER
+04AC,DISALLOWED,CYRILLIC CAPITAL LETTER TE WITH DESCENDER
+04AD,PVALID,CYRILLIC SMALL LETTER TE WITH DESCENDER
+04AE,DISALLOWED,CYRILLIC CAPITAL LETTER STRAIGHT U
+04AF,PVALID,CYRILLIC SMALL LETTER STRAIGHT U
+04B0,DISALLOWED,CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
+04B1,PVALID,CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE
+04B2,DISALLOWED,CYRILLIC CAPITAL LETTER HA WITH DESCENDER
+04B3,PVALID,CYRILLIC SMALL LETTER HA WITH DESCENDER
+04B4,DISALLOWED,CYRILLIC CAPITAL LIGATURE TE TSE
+04B5,PVALID,CYRILLIC SMALL LIGATURE TE TSE
+04B6,DISALLOWED,CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
+04B7,PVALID,CYRILLIC SMALL LETTER CHE WITH DESCENDER
+04B8,DISALLOWED,CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
+04B9,PVALID,CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE
+04BA,DISALLOWED,CYRILLIC CAPITAL LETTER SHHA
+04BB,PVALID,CYRILLIC SMALL LETTER SHHA
+04BC,DISALLOWED,CYRILLIC CAPITAL LETTER ABKHASIAN CHE
+04BD,PVALID,CYRILLIC SMALL LETTER ABKHASIAN CHE
+04BE,DISALLOWED,CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
+04BF,PVALID,CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER
+04C0-04C1,DISALLOWED,CYRILLIC LETTER PALOCHKA..CYRILLIC CAPITAL LETTER ZHE WITH BREVE
+04C2,PVALID,CYRILLIC SMALL LETTER ZHE WITH BREVE
+04C3,DISALLOWED,CYRILLIC CAPITAL LETTER KA WITH HOOK
+04C4,PVALID,CYRILLIC SMALL LETTER KA WITH HOOK
+04C5,DISALLOWED,CYRILLIC CAPITAL LETTER EL WITH TAIL
+04C6,PVALID,CYRILLIC SMALL LETTER EL WITH TAIL
+04C7,DISALLOWED,CYRILLIC CAPITAL LETTER EN WITH HOOK
+04C8,PVALID,CYRILLIC SMALL LETTER EN WITH HOOK
+04C9,DISALLOWED,CYRILLIC CAPITAL LETTER EN WITH TAIL
+04CA,PVALID,CYRILLIC SMALL LETTER EN WITH TAIL
+04CB,DISALLOWED,CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
+04CC,PVALID,CYRILLIC SMALL LETTER KHAKASSIAN CHE
+04CD,DISALLOWED,CYRILLIC CAPITAL LETTER EM WITH TAIL
+04CE-04CF,PVALID,CYRILLIC SMALL LETTER EM WITH TAIL..CYRILLIC SMALL LETTER PALOCHKA
+04D0,DISALLOWED,CYRILLIC CAPITAL LETTER A WITH BREVE
+04D1,PVALID,CYRILLIC SMALL LETTER A WITH BREVE
+04D2,DISALLOWED,CYRILLIC CAPITAL LETTER A WITH DIAERESIS
+04D3,PVALID,CYRILLIC SMALL LETTER A WITH DIAERESIS
+04D4,DISALLOWED,CYRILLIC CAPITAL LIGATURE A IE
+04D5,PVALID,CYRILLIC SMALL LIGATURE A IE
+04D6,DISALLOWED,CYRILLIC CAPITAL LETTER IE WITH BREVE
+04D7,PVALID,CYRILLIC SMALL LETTER IE WITH BREVE
+04D8,DISALLOWED,CYRILLIC CAPITAL LETTER SCHWA
+04D9,PVALID,CYRILLIC SMALL LETTER SCHWA
+04DA,DISALLOWED,CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
+04DB,PVALID,CYRILLIC SMALL LETTER SCHWA WITH DIAERESIS
+04DC,DISALLOWED,CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
+04DD,PVALID,CYRILLIC SMALL LETTER ZHE WITH DIAERESIS
+04DE,DISALLOWED,CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
+04DF,PVALID,CYRILLIC SMALL LETTER ZE WITH DIAERESIS
+04E0,DISALLOWED,CYRILLIC CAPITAL LETTER ABKHASIAN DZE
+04E1,PVALID,CYRILLIC SMALL LETTER ABKHASIAN DZE
+04E2,DISALLOWED,CYRILLIC CAPITAL LETTER I WITH MACRON
+04E3,PVALID,CYRILLIC SMALL LETTER I WITH MACRON
+04E4,DISALLOWED,CYRILLIC CAPITAL LETTER I WITH DIAERESIS
+04E5,PVALID,CYRILLIC SMALL LETTER I WITH DIAERESIS
+04E6,DISALLOWED,CYRILLIC CAPITAL LETTER O WITH DIAERESIS
+04E7,PVALID,CYRILLIC SMALL LETTER O WITH DIAERESIS
+04E8,DISALLOWED,CYRILLIC CAPITAL LETTER BARRED O
+04E9,PVALID,CYRILLIC SMALL LETTER BARRED O
+04EA,DISALLOWED,CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
+04EB,PVALID,CYRILLIC SMALL LETTER BARRED O WITH DIAERESIS
+04EC,DISALLOWED,CYRILLIC CAPITAL LETTER E WITH DIAERESIS
+04ED,PVALID,CYRILLIC SMALL LETTER E WITH DIAERESIS
+04EE,DISALLOWED,CYRILLIC CAPITAL LETTER U WITH MACRON
+04EF,PVALID,CYRILLIC SMALL LETTER U WITH MACRON
+04F0,DISALLOWED,CYRILLIC CAPITAL LETTER U WITH DIAERESIS
+04F1,PVALID,CYRILLIC SMALL LETTER U WITH DIAERESIS
+04F2,DISALLOWED,CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
+04F3,PVALID,CYRILLIC SMALL LETTER U WITH DOUBLE ACUTE
+04F4,DISALLOWED,CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
+04F5,PVALID,CYRILLIC SMALL LETTER CHE WITH DIAERESIS
+04F6,DISALLOWED,CYRILLIC CAPITAL LETTER GHE WITH DESCENDER
+04F7,PVALID,CYRILLIC SMALL LETTER GHE WITH DESCENDER
+04F8,DISALLOWED,CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
+04F9,PVALID,CYRILLIC SMALL LETTER YERU WITH DIAERESIS
+04FA,DISALLOWED,CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK
+04FB,PVALID,CYRILLIC SMALL LETTER GHE WITH STROKE AND HOOK
+04FC,DISALLOWED,CYRILLIC CAPITAL LETTER HA WITH HOOK
+04FD,PVALID,CYRILLIC SMALL LETTER HA WITH HOOK
+04FE,DISALLOWED,CYRILLIC CAPITAL LETTER HA WITH STROKE
+04FF,PVALID,CYRILLIC SMALL LETTER HA WITH STROKE
+0500,DISALLOWED,CYRILLIC CAPITAL LETTER KOMI DE
+0501,PVALID,CYRILLIC SMALL LETTER KOMI DE
+0502,DISALLOWED,CYRILLIC CAPITAL LETTER KOMI DJE
+0503,PVALID,CYRILLIC SMALL LETTER KOMI DJE
+0504,DISALLOWED,CYRILLIC CAPITAL LETTER KOMI ZJE
+0505,PVALID,CYRILLIC SMALL LETTER KOMI ZJE
+0506,DISALLOWED,CYRILLIC CAPITAL LETTER KOMI DZJE
+0507,PVALID,CYRILLIC SMALL LETTER KOMI DZJE
+0508,DISALLOWED,CYRILLIC CAPITAL LETTER KOMI LJE
+0509,PVALID,CYRILLIC SMALL LETTER KOMI LJE
+050A,DISALLOWED,CYRILLIC CAPITAL LETTER KOMI NJE
+050B,PVALID,CYRILLIC SMALL LETTER KOMI NJE
+050C,DISALLOWED,CYRILLIC CAPITAL LETTER KOMI SJE
+050D,PVALID,CYRILLIC SMALL LETTER KOMI SJE
+050E,DISALLOWED,CYRILLIC CAPITAL LETTER KOMI TJE
+050F,PVALID,CYRILLIC SMALL LETTER KOMI TJE
+0510,DISALLOWED,CYRILLIC CAPITAL LETTER REVERSED ZE
+0511,PVALID,CYRILLIC SMALL LETTER REVERSED ZE
+0512,DISALLOWED,CYRILLIC CAPITAL LETTER EL WITH HOOK
+0513,PVALID,CYRILLIC SMALL LETTER EL WITH HOOK
+0514,DISALLOWED,CYRILLIC CAPITAL LETTER LHA
+0515,PVALID,CYRILLIC SMALL LETTER LHA
+0516,DISALLOWED,CYRILLIC CAPITAL LETTER RHA
+0517,PVALID,CYRILLIC SMALL LETTER RHA
+0518,DISALLOWED,CYRILLIC CAPITAL LETTER YAE
+0519,PVALID,CYRILLIC SMALL LETTER YAE
+051A,DISALLOWED,CYRILLIC CAPITAL LETTER QA
+051B,PVALID,CYRILLIC SMALL LETTER QA
+051C,DISALLOWED,CYRILLIC CAPITAL LETTER WE
+051D,PVALID,CYRILLIC SMALL LETTER WE
+051E,DISALLOWED,CYRILLIC CAPITAL LETTER ALEUT KA
+051F,PVALID,CYRILLIC SMALL LETTER ALEUT KA
+0520,DISALLOWED,CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK
+0521,PVALID,CYRILLIC SMALL LETTER EL WITH MIDDLE HOOK
+0522,DISALLOWED,CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK
+0523,PVALID,CYRILLIC SMALL LETTER EN WITH MIDDLE HOOK
+0524,DISALLOWED,CYRILLIC CAPITAL LETTER PE WITH DESCENDER
+0525,PVALID,CYRILLIC SMALL LETTER PE WITH DESCENDER
+0526-0530,UNASSIGNED,<RESERVED>..<RESERVED>
+0531-0556,DISALLOWED,ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
+0557-0558,UNASSIGNED,<RESERVED>..<RESERVED>
+0559,PVALID,ARMENIAN MODIFIER LETTER LEFT HALF RING
+055A-055F,DISALLOWED,ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK
+0560,UNASSIGNED,<RESERVED>
+0561-0586,PVALID,ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LETTER FEH
+0587,DISALLOWED,ARMENIAN SMALL LIGATURE ECH YIWN
+0588,UNASSIGNED,<RESERVED>
+0589-058A,DISALLOWED,ARMENIAN FULL STOP..ARMENIAN HYPHEN
+058B-0590,UNASSIGNED,<RESERVED>..<RESERVED>
+0591-05BD,PVALID,HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
+05BE,DISALLOWED,HEBREW PUNCTUATION MAQAF
+05BF,PVALID,HEBREW POINT RAFE
+05C0,DISALLOWED,HEBREW PUNCTUATION PASEQ
+05C1-05C2,PVALID,HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
+05C3,DISALLOWED,HEBREW PUNCTUATION SOF PASUQ
+05C4-05C5,PVALID,HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
+05C6,DISALLOWED,HEBREW PUNCTUATION NUN HAFUKHA
+05C7,PVALID,HEBREW POINT QAMATS QATAN
+05C8-05CF,UNASSIGNED,<RESERVED>..<RESERVED>
+05D0-05EA,PVALID,HEBREW LETTER ALEF..HEBREW LETTER TAV
+05EB-05EF,UNASSIGNED,<RESERVED>..<RESERVED>
+05F0-05F2,PVALID,HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD
+05F3-05F4,CONTEXTO,HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM
+05F5-05FF,UNASSIGNED,<RESERVED>..<RESERVED>
+0600-0603,DISALLOWED,ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
+0604-0605,UNASSIGNED,<RESERVED>..<RESERVED>
+0606-060F,DISALLOWED,ARABIC-INDIC CUBE ROOT..ARABIC SIGN MISRA
+0610-061A,PVALID,ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
+061B,DISALLOWED,ARABIC SEMICOLON
+061C-061D,UNASSIGNED,<RESERVED>..<RESERVED>
+061E-061F,DISALLOWED,ARABIC TRIPLE DOT PUNCTUATION MARK..ARABIC QUESTION MARK
+0620,UNASSIGNED,<RESERVED>
+0621-063F,PVALID,ARABIC LETTER HAMZA..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
+0640,DISALLOWED,ARABIC TATWEEL
+0641-065E,PVALID,ARABIC LETTER FEH..ARABIC FATHA WITH TWO DOTS
+065F,UNASSIGNED,<RESERVED>
+0660-0669,CONTEXTO,ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
+066A-066D,DISALLOWED,ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR
+066E-0674,PVALID,ARABIC LETTER DOTLESS BEH..ARABIC LETTER HIGH HAMZA
+0675-0678,DISALLOWED,ARABIC LETTER HIGH HAMZA ALEF..ARABIC LETTER HIGH HAMZA YEH
+0679-06D3,PVALID,ARABIC LETTER TTEH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
+06D4,DISALLOWED,ARABIC FULL STOP
+06D5-06DC,PVALID,ARABIC LETTER AE..ARABIC SMALL HIGH SEEN
+06DD-06DE,DISALLOWED,ARABIC END OF AYAH..ARABIC START OF RUB EL HIZB
+06DF-06E8,PVALID,ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH NOON
+06E9,DISALLOWED,ARABIC PLACE OF SAJDAH
+06EA-06EF,PVALID,ARABIC EMPTY CENTRE LOW STOP..ARABIC LETTER REH WITH INVERTED V
+06F0-06F9,CONTEXTO,EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
+06FA-06FF,PVALID,ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER HEH WITH INVERTED V
+0700-070D,DISALLOWED,SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS
+070E,UNASSIGNED,<RESERVED>
+070F,DISALLOWED,SYRIAC ABBREVIATION MARK
+0710-074A,PVALID,SYRIAC LETTER ALAPH..SYRIAC BARREKH
+074B-074C,UNASSIGNED,<RESERVED>..<RESERVED>
+074D-07B1,PVALID,SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER NAA
+07B2-07BF,UNASSIGNED,<RESERVED>..<RESERVED>
+07C0-07F5,PVALID,NKO DIGIT ZERO..NKO LOW TONE APOSTROPHE
+07F6-07FA,DISALLOWED,NKO SYMBOL OO DENNEN..NKO LAJANYALAN
+07FB-07FF,UNASSIGNED,<RESERVED>..<RESERVED>
+0800-082D,PVALID,SAMARITAN LETTER ALAF..SAMARITAN MARK NEQUDAA
+082E-082F,UNASSIGNED,<RESERVED>..<RESERVED>
+0830-083E,DISALLOWED,SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU
+083F-08FF,UNASSIGNED,<RESERVED>..<RESERVED>
+0900-0939,PVALID,DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI LETTER HA
+093A-093B,UNASSIGNED,<RESERVED>..<RESERVED>
+093C-094E,PVALID,DEVANAGARI SIGN NUKTA..DEVANAGARI VOWEL SIGN PRISHTHAMATRA E
+094F,UNASSIGNED,<RESERVED>
+0950-0955,PVALID,DEVANAGARI OM..DEVANAGARI VOWEL SIGN CANDRA LONG E
+0956-0957,UNASSIGNED,<RESERVED>..<RESERVED>
+0958-095F,DISALLOWED,DEVANAGARI LETTER QA..DEVANAGARI LETTER YYA
+0960-0963,PVALID,DEVANAGARI LETTER VOCALIC RR..DEVANAGARI VOWEL SIGN VOCALIC LL
+0964-0965,DISALLOWED,DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
+0966-096F,PVALID,DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
+0970,DISALLOWED,DEVANAGARI ABBREVIATION SIGN
+0971-0972,PVALID,DEVANAGARI SIGN HIGH SPACING DOT..DEVANAGARI LETTER CANDRA A
+0973-0978,UNASSIGNED,<RESERVED>..<RESERVED>
+0979-097F,PVALID,DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA
+0980,UNASSIGNED,<RESERVED>
+0981-0983,PVALID,BENGALI SIGN CANDRABINDU..BENGALI SIGN VISARGA
+0984,UNASSIGNED,<RESERVED>
+0985-098C,PVALID,BENGALI LETTER A..BENGALI LETTER VOCALIC L
+098D-098E,UNASSIGNED,<RESERVED>..<RESERVED>
+098F-0990,PVALID,BENGALI LETTER E..BENGALI LETTER AI
+0991-0992,UNASSIGNED,<RESERVED>..<RESERVED>
+0993-09A8,PVALID,BENGALI LETTER O..BENGALI LETTER NA
+09A9,UNASSIGNED,<RESERVED>
+09AA-09B0,PVALID,BENGALI LETTER PA..BENGALI LETTER RA
+09B1,UNASSIGNED,<RESERVED>
+09B2,PVALID,BENGALI LETTER LA
+09B3-09B5,UNASSIGNED,<RESERVED>..<RESERVED>
+09B6-09B9,PVALID,BENGALI LETTER SHA..BENGALI LETTER HA
+09BA-09BB,UNASSIGNED,<RESERVED>..<RESERVED>
+09BC-09C4,PVALID,BENGALI SIGN NUKTA..BENGALI VOWEL SIGN VOCALIC RR
+09C5-09C6,UNASSIGNED,<RESERVED>..<RESERVED>
+09C7-09C8,PVALID,BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI
+09C9-09CA,UNASSIGNED,<RESERVED>..<RESERVED>
+09CB-09CE,PVALID,BENGALI VOWEL SIGN O..BENGALI LETTER KHANDA TA
+09CF-09D6,UNASSIGNED,<RESERVED>..<RESERVED>
+09D7,PVALID,BENGALI AU LENGTH MARK
+09D8-09DB,UNASSIGNED,<RESERVED>..<RESERVED>
+09DC-09DD,DISALLOWED,BENGALI LETTER RRA..BENGALI LETTER RHA
+09DE,UNASSIGNED,<RESERVED>
+09DF,DISALLOWED,BENGALI LETTER YYA
+09E0-09E3,PVALID,BENGALI LETTER VOCALIC RR..BENGALI VOWEL SIGN VOCALIC LL
+09E4-09E5,UNASSIGNED,<RESERVED>..<RESERVED>
+09E6-09F1,PVALID,BENGALI DIGIT ZERO..BENGALI LETTER RA WITH LOWER DIAGONAL
+09F2-09FB,DISALLOWED,BENGALI RUPEE MARK..BENGALI GANDA MARK
+09FC-0A00,UNASSIGNED,<RESERVED>..<RESERVED>
+0A01-0A03,PVALID,GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN VISARGA
+0A04,UNASSIGNED,<RESERVED>
+0A05-0A0A,PVALID,GURMUKHI LETTER A..GURMUKHI LETTER UU
+0A0B-0A0E,UNASSIGNED,<RESERVED>..<RESERVED>
+0A0F-0A10,PVALID,GURMUKHI LETTER EE..GURMUKHI LETTER AI
+0A11-0A12,UNASSIGNED,<RESERVED>..<RESERVED>
+0A13-0A28,PVALID,GURMUKHI LETTER OO..GURMUKHI LETTER NA
+0A29,UNASSIGNED,<RESERVED>
+0A2A-0A30,PVALID,GURMUKHI LETTER PA..GURMUKHI LETTER RA
+0A31,UNASSIGNED,<RESERVED>
+0A32,PVALID,GURMUKHI LETTER LA
+0A33,DISALLOWED,GURMUKHI LETTER LLA
+0A34,UNASSIGNED,<RESERVED>
+0A35,PVALID,GURMUKHI LETTER VA
+0A36,DISALLOWED,GURMUKHI LETTER SHA
+0A37,UNASSIGNED,<RESERVED>
+0A38-0A39,PVALID,GURMUKHI LETTER SA..GURMUKHI LETTER HA
+0A3A-0A3B,UNASSIGNED,<RESERVED>..<RESERVED>
+0A3C,PVALID,GURMUKHI SIGN NUKTA
+0A3D,UNASSIGNED,<RESERVED>
+0A3E-0A42,PVALID,GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN UU
+0A43-0A46,UNASSIGNED,<RESERVED>..<RESERVED>
+0A47-0A48,PVALID,GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI
+0A49-0A4A,UNASSIGNED,<RESERVED>..<RESERVED>
+0A4B-0A4D,PVALID,GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA
+0A4E-0A50,UNASSIGNED,<RESERVED>..<RESERVED>
+0A51,PVALID,GURMUKHI SIGN UDAAT
+0A52-0A58,UNASSIGNED,<RESERVED>..<RESERVED>
+0A59-0A5B,DISALLOWED,GURMUKHI LETTER KHHA..GURMUKHI LETTER ZA
+0A5C,PVALID,GURMUKHI LETTER RRA
+0A5D,UNASSIGNED,<RESERVED>
+0A5E,DISALLOWED,GURMUKHI LETTER FA
+0A5F-0A65,UNASSIGNED,<RESERVED>..<RESERVED>
+0A66-0A75,PVALID,GURMUKHI DIGIT ZERO..GURMUKHI SIGN YAKASH
+0A76-0A80,UNASSIGNED,<RESERVED>..<RESERVED>
+0A81-0A83,PVALID,GUJARATI SIGN CANDRABINDU..GUJARATI SIGN VISARGA
+0A84,UNASSIGNED,<RESERVED>
+0A85-0A8D,PVALID,GUJARATI LETTER A..GUJARATI VOWEL CANDRA E
+0A8E,UNASSIGNED,<RESERVED>
+0A8F-0A91,PVALID,GUJARATI LETTER E..GUJARATI VOWEL CANDRA O
+0A92,UNASSIGNED,<RESERVED>
+0A93-0AA8,PVALID,GUJARATI LETTER O..GUJARATI LETTER NA
+0AA9,UNASSIGNED,<RESERVED>
+0AAA-0AB0,PVALID,GUJARATI LETTER PA..GUJARATI LETTER RA
+0AB1,UNASSIGNED,<RESERVED>
+0AB2-0AB3,PVALID,GUJARATI LETTER LA..GUJARATI LETTER LLA
+0AB4,UNASSIGNED,<RESERVED>
+0AB5-0AB9,PVALID,GUJARATI LETTER VA..GUJARATI LETTER HA
+0ABA-0ABB,UNASSIGNED,<RESERVED>..<RESERVED>
+0ABC-0AC5,PVALID,GUJARATI SIGN NUKTA..GUJARATI VOWEL SIGN CANDRA E
+0AC6,UNASSIGNED,<RESERVED>
+0AC7-0AC9,PVALID,GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN CANDRA O
+0ACA,UNASSIGNED,<RESERVED>
+0ACB-0ACD,PVALID,GUJARATI VOWEL SIGN O..GUJARATI SIGN VIRAMA
+0ACE-0ACF,UNASSIGNED,<RESERVED>..<RESERVED>
+0AD0,PVALID,GUJARATI OM
+0AD1-0ADF,UNASSIGNED,<RESERVED>..<RESERVED>
+0AE0-0AE3,PVALID,GUJARATI LETTER VOCALIC RR..GUJARATI VOWEL SIGN VOCALIC LL
+0AE4-0AE5,UNASSIGNED,<RESERVED>..<RESERVED>
+0AE6-0AEF,PVALID,GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
+0AF0,UNASSIGNED,<RESERVED>
+0AF1,DISALLOWED,GUJARATI RUPEE SIGN
+0AF2-0B00,UNASSIGNED,<RESERVED>..<RESERVED>
+0B01-0B03,PVALID,ORIYA SIGN CANDRABINDU..ORIYA SIGN VISARGA
+0B04,UNASSIGNED,<RESERVED>
+0B05-0B0C,PVALID,ORIYA LETTER A..ORIYA LETTER VOCALIC L
+0B0D-0B0E,UNASSIGNED,<RESERVED>..<RESERVED>
+0B0F-0B10,PVALID,ORIYA LETTER E..ORIYA LETTER AI
+0B11-0B12,UNASSIGNED,<RESERVED>..<RESERVED>
+0B13-0B28,PVALID,ORIYA LETTER O..ORIYA LETTER NA
+0B29,UNASSIGNED,<RESERVED>
+0B2A-0B30,PVALID,ORIYA LETTER PA..ORIYA LETTER RA
+0B31,UNASSIGNED,<RESERVED>
+0B32-0B33,PVALID,ORIYA LETTER LA..ORIYA LETTER LLA
+0B34,UNASSIGNED,<RESERVED>
+0B35-0B39,PVALID,ORIYA LETTER VA..ORIYA LETTER HA
+0B3A-0B3B,UNASSIGNED,<RESERVED>..<RESERVED>
+0B3C-0B44,PVALID,ORIYA SIGN NUKTA..ORIYA VOWEL SIGN VOCALIC RR
+0B45-0B46,UNASSIGNED,<RESERVED>..<RESERVED>
+0B47-0B48,PVALID,ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
+0B49-0B4A,UNASSIGNED,<RESERVED>..<RESERVED>
+0B4B-0B4D,PVALID,ORIYA VOWEL SIGN O..ORIYA SIGN VIRAMA
+0B4E-0B55,UNASSIGNED,<RESERVED>..<RESERVED>
+0B56-0B57,PVALID,ORIYA AI LENGTH MARK..ORIYA AU LENGTH MARK
+0B58-0B5B,UNASSIGNED,<RESERVED>..<RESERVED>
+0B5C-0B5D,DISALLOWED,ORIYA LETTER RRA..ORIYA LETTER RHA
+0B5E,UNASSIGNED,<RESERVED>
+0B5F-0B63,PVALID,ORIYA LETTER YYA..ORIYA VOWEL SIGN VOCALIC LL
+0B64-0B65,UNASSIGNED,<RESERVED>..<RESERVED>
+0B66-0B6F,PVALID,ORIYA DIGIT ZERO..ORIYA DIGIT NINE
+0B70,DISALLOWED,ORIYA ISSHAR
+0B71,PVALID,ORIYA LETTER WA
+0B72-0B81,UNASSIGNED,<RESERVED>..<RESERVED>
+0B82-0B83,PVALID,TAMIL SIGN ANUSVARA..TAMIL SIGN VISARGA
+0B84,UNASSIGNED,<RESERVED>
+0B85-0B8A,PVALID,TAMIL LETTER A..TAMIL LETTER UU
+0B8B-0B8D,UNASSIGNED,<RESERVED>..<RESERVED>
+0B8E-0B90,PVALID,TAMIL LETTER E..TAMIL LETTER AI
+0B91,UNASSIGNED,<RESERVED>
+0B92-0B95,PVALID,TAMIL LETTER O..TAMIL LETTER KA
+0B96-0B98,UNASSIGNED,<RESERVED>..<RESERVED>
+0B99-0B9A,PVALID,TAMIL LETTER NGA..TAMIL LETTER CA
+0B9B,UNASSIGNED,<RESERVED>
+0B9C,PVALID,TAMIL LETTER JA
+0B9D,UNASSIGNED,<RESERVED>
+0B9E-0B9F,PVALID,TAMIL LETTER NYA..TAMIL LETTER TTA
+0BA0-0BA2,UNASSIGNED,<RESERVED>..<RESERVED>
+0BA3-0BA4,PVALID,TAMIL LETTER NNA..TAMIL LETTER TA
+0BA5-0BA7,UNASSIGNED,<RESERVED>..<RESERVED>
+0BA8-0BAA,PVALID,TAMIL LETTER NA..TAMIL LETTER PA
+0BAB-0BAD,UNASSIGNED,<RESERVED>..<RESERVED>
+0BAE-0BB9,PVALID,TAMIL LETTER MA..TAMIL LETTER HA
+0BBA-0BBD,UNASSIGNED,<RESERVED>..<RESERVED>
+0BBE-0BC2,PVALID,TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN UU
+0BC3-0BC5,UNASSIGNED,<RESERVED>..<RESERVED>
+0BC6-0BC8,PVALID,TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI
+0BC9,UNASSIGNED,<RESERVED>
+0BCA-0BCD,PVALID,TAMIL VOWEL SIGN O..TAMIL SIGN VIRAMA
+0BCE-0BCF,UNASSIGNED,<RESERVED>..<RESERVED>
+0BD0,PVALID,TAMIL OM
+0BD1-0BD6,UNASSIGNED,<RESERVED>..<RESERVED>
+0BD7,PVALID,TAMIL AU LENGTH MARK
+0BD8-0BE5,UNASSIGNED,<RESERVED>..<RESERVED>
+0BE6-0BEF,PVALID,TAMIL DIGIT ZERO..TAMIL DIGIT NINE
+0BF0-0BFA,DISALLOWED,TAMIL NUMBER TEN..TAMIL NUMBER SIGN
+0BFB-0C00,UNASSIGNED,<RESERVED>..<RESERVED>
+0C01-0C03,PVALID,TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA
+0C04,UNASSIGNED,<RESERVED>
+0C05-0C0C,PVALID,TELUGU LETTER A..TELUGU LETTER VOCALIC L
+0C0D,UNASSIGNED,<RESERVED>
+0C0E-0C10,PVALID,TELUGU LETTER E..TELUGU LETTER AI
+0C11,UNASSIGNED,<RESERVED>
+0C12-0C28,PVALID,TELUGU LETTER O..TELUGU LETTER NA
+0C29,UNASSIGNED,<RESERVED>
+0C2A-0C33,PVALID,TELUGU LETTER PA..TELUGU LETTER LLA
+0C34,UNASSIGNED,<RESERVED>
+0C35-0C39,PVALID,TELUGU LETTER VA..TELUGU LETTER HA
+0C3A-0C3C,UNASSIGNED,<RESERVED>..<RESERVED>
+0C3D-0C44,PVALID,TELUGU SIGN AVAGRAHA..TELUGU VOWEL SIGN VOCALIC RR
+0C45,UNASSIGNED,<RESERVED>
+0C46-0C48,PVALID,TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
+0C49,UNASSIGNED,<RESERVED>
+0C4A-0C4D,PVALID,TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
+0C4E-0C54,UNASSIGNED,<RESERVED>..<RESERVED>
+0C55-0C56,PVALID,TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
+0C57,UNASSIGNED,<RESERVED>
+0C58-0C59,PVALID,TELUGU LETTER TSA..TELUGU LETTER DZA
+0C5A-0C5F,UNASSIGNED,<RESERVED>..<RESERVED>
+0C60-0C63,PVALID,TELUGU LETTER VOCALIC RR..TELUGU VOWEL SIGN VOCALIC LL
+0C64-0C65,UNASSIGNED,<RESERVED>..<RESERVED>
+0C66-0C6F,PVALID,TELUGU DIGIT ZERO..TELUGU DIGIT NINE
+0C70-0C77,UNASSIGNED,<RESERVED>..<RESERVED>
+0C78-0C7F,DISALLOWED,TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU SIGN TUUMU
+0C80-0C81,UNASSIGNED,<RESERVED>..<RESERVED>
+0C82-0C83,PVALID,KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
+0C84,UNASSIGNED,<RESERVED>
+0C85-0C8C,PVALID,KANNADA LETTER A..KANNADA LETTER VOCALIC L
+0C8D,UNASSIGNED,<RESERVED>
+0C8E-0C90,PVALID,KANNADA LETTER E..KANNADA LETTER AI
+0C91,UNASSIGNED,<RESERVED>
+0C92-0CA8,PVALID,KANNADA LETTER O..KANNADA LETTER NA
+0CA9,UNASSIGNED,<RESERVED>
+0CAA-0CB3,PVALID,KANNADA LETTER PA..KANNADA LETTER LLA
+0CB4,UNASSIGNED,<RESERVED>
+0CB5-0CB9,PVALID,KANNADA LETTER VA..KANNADA LETTER HA
+0CBA-0CBB,UNASSIGNED,<RESERVED>..<RESERVED>
+0CBC-0CC4,PVALID,KANNADA SIGN NUKTA..KANNADA VOWEL SIGN VOCALIC RR
+0CC5,UNASSIGNED,<RESERVED>
+0CC6-0CC8,PVALID,KANNADA VOWEL SIGN E..KANNADA VOWEL SIGN AI
+0CC9,UNASSIGNED,<RESERVED>
+0CCA-0CCD,PVALID,KANNADA VOWEL SIGN O..KANNADA SIGN VIRAMA
+0CCE-0CD4,UNASSIGNED,<RESERVED>..<RESERVED>
+0CD5-0CD6,PVALID,KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
+0CD7-0CDD,UNASSIGNED,<RESERVED>..<RESERVED>
+0CDE,PVALID,KANNADA LETTER FA
+0CDF,UNASSIGNED,<RESERVED>
+0CE0-0CE3,PVALID,KANNADA LETTER VOCALIC RR..KANNADA VOWEL SIGN VOCALIC LL
+0CE4-0CE5,UNASSIGNED,<RESERVED>..<RESERVED>
+0CE6-0CEF,PVALID,KANNADA DIGIT ZERO..KANNADA DIGIT NINE
+0CF0,UNASSIGNED,<RESERVED>
+0CF1-0CF2,DISALLOWED,KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
+0CF3-0D01,UNASSIGNED,<RESERVED>..<RESERVED>
+0D02-0D03,PVALID,MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
+0D04,UNASSIGNED,<RESERVED>
+0D05-0D0C,PVALID,MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
+0D0D,UNASSIGNED,<RESERVED>
+0D0E-0D10,PVALID,MALAYALAM LETTER E..MALAYALAM LETTER AI
+0D11,UNASSIGNED,<RESERVED>
+0D12-0D28,PVALID,MALAYALAM LETTER O..MALAYALAM LETTER NA
+0D29,UNASSIGNED,<RESERVED>
+0D2A-0D39,PVALID,MALAYALAM LETTER PA..MALAYALAM LETTER HA
+0D3A-0D3C,UNASSIGNED,<RESERVED>..<RESERVED>
+0D3D-0D44,PVALID,MALAYALAM SIGN AVAGRAHA..MALAYALAM VOWEL SIGN VOCALIC RR
+0D45,UNASSIGNED,<RESERVED>
+0D46-0D48,PVALID,MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI
+0D49,UNASSIGNED,<RESERVED>
+0D4A-0D4D,PVALID,MALAYALAM VOWEL SIGN O..MALAYALAM SIGN VIRAMA
+0D4E-0D56,UNASSIGNED,<RESERVED>..<RESERVED>
+0D57,PVALID,MALAYALAM AU LENGTH MARK
+0D58-0D5F,UNASSIGNED,<RESERVED>..<RESERVED>
+0D60-0D63,PVALID,MALAYALAM LETTER VOCALIC RR..MALAYALAM VOWEL SIGN VOCALIC LL
+0D64-0D65,UNASSIGNED,<RESERVED>..<RESERVED>
+0D66-0D6F,PVALID,MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
+0D70-0D75,DISALLOWED,MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE QUARTERS
+0D76-0D78,UNASSIGNED,<RESERVED>..<RESERVED>
+0D79,DISALLOWED,MALAYALAM DATE MARK
+0D7A-0D7F,PVALID,MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
+0D80-0D81,UNASSIGNED,<RESERVED>..<RESERVED>
+0D82-0D83,PVALID,SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA
+0D84,UNASSIGNED,<RESERVED>
+0D85-0D96,PVALID,SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA
+0D97-0D99,UNASSIGNED,<RESERVED>..<RESERVED>
+0D9A-0DB1,PVALID,SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA
+0DB2,UNASSIGNED,<RESERVED>
+0DB3-0DBB,PVALID,SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA
+0DBC,UNASSIGNED,<RESERVED>
+0DBD,PVALID,SINHALA LETTER DANTAJA LAYANNA
+0DBE-0DBF,UNASSIGNED,<RESERVED>..<RESERVED>
+0DC0-0DC6,PVALID,SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA
+0DC7-0DC9,UNASSIGNED,<RESERVED>..<RESERVED>
+0DCA,PVALID,SINHALA SIGN AL-LAKUNA
+0DCB-0DCE,UNASSIGNED,<RESERVED>..<RESERVED>
+0DCF-0DD4,PVALID,SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
+0DD5,UNASSIGNED,<RESERVED>
+0DD6,PVALID,SINHALA VOWEL SIGN DIGA PAA-PILLA
+0DD7,UNASSIGNED,<RESERVED>
+0DD8-0DDF,PVALID,SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA
+0DE0-0DF1,UNASSIGNED,<RESERVED>..<RESERVED>
+0DF2-0DF3,PVALID,SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA
+0DF4,DISALLOWED,SINHALA PUNCTUATION KUNDDALIYA
+0DF5-0E00,UNASSIGNED,<RESERVED>..<RESERVED>
+0E01-0E32,PVALID,THAI CHARACTER KO KAI..THAI CHARACTER SARA AA
+0E33,DISALLOWED,THAI CHARACTER SARA AM
+0E34-0E3A,PVALID,THAI CHARACTER SARA I..THAI CHARACTER PHINTHU
+0E3B-0E3E,UNASSIGNED,<RESERVED>..<RESERVED>
+0E3F,DISALLOWED,THAI CURRENCY SYMBOL BAHT
+0E40-0E4E,PVALID,THAI CHARACTER SARA E..THAI CHARACTER YAMAKKAN
+0E4F,DISALLOWED,THAI CHARACTER FONGMAN
+0E50-0E59,PVALID,THAI DIGIT ZERO..THAI DIGIT NINE
+0E5A-0E5B,DISALLOWED,THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT
+0E5C-0E80,UNASSIGNED,<RESERVED>..<RESERVED>
+0E81-0E82,PVALID,LAO LETTER KO..LAO LETTER KHO SUNG
+0E83,UNASSIGNED,<RESERVED>
+0E84,PVALID,LAO LETTER KHO TAM
+0E85-0E86,UNASSIGNED,<RESERVED>..<RESERVED>
+0E87-0E88,PVALID,LAO LETTER NGO..LAO LETTER CO
+0E89,UNASSIGNED,<RESERVED>
+0E8A,PVALID,LAO LETTER SO TAM
+0E8B-0E8C,UNASSIGNED,<RESERVED>..<RESERVED>
+0E8D,PVALID,LAO LETTER NYO
+0E8E-0E93,UNASSIGNED,<RESERVED>..<RESERVED>
+0E94-0E97,PVALID,LAO LETTER DO..LAO LETTER THO TAM
+0E98,UNASSIGNED,<RESERVED>
+0E99-0E9F,PVALID,LAO LETTER NO..LAO LETTER FO SUNG
+0EA0,UNASSIGNED,<RESERVED>
+0EA1-0EA3,PVALID,LAO LETTER MO..LAO LETTER LO LING
+0EA4,UNASSIGNED,<RESERVED>
+0EA5,PVALID,LAO LETTER LO LOOT
+0EA6,UNASSIGNED,<RESERVED>
+0EA7,PVALID,LAO LETTER WO
+0EA8-0EA9,UNASSIGNED,<RESERVED>..<RESERVED>
+0EAA-0EAB,PVALID,LAO LETTER SO SUNG..LAO LETTER HO SUNG
+0EAC,UNASSIGNED,<RESERVED>
+0EAD-0EB2,PVALID,LAO LETTER O..LAO VOWEL SIGN AA
+0EB3,DISALLOWED,LAO VOWEL SIGN AM
+0EB4-0EB9,PVALID,LAO VOWEL SIGN I..LAO VOWEL SIGN UU
+0EBA,UNASSIGNED,<RESERVED>
+0EBB-0EBD,PVALID,LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN NYO
+0EBE-0EBF,UNASSIGNED,<RESERVED>..<RESERVED>
+0EC0-0EC4,PVALID,LAO VOWEL SIGN E..LAO VOWEL SIGN AI
+0EC5,UNASSIGNED,<RESERVED>
+0EC6,PVALID,LAO KO LA
+0EC7,UNASSIGNED,<RESERVED>
+0EC8-0ECD,PVALID,LAO TONE MAI EK..LAO NIGGAHITA
+0ECE-0ECF,UNASSIGNED,<RESERVED>..<RESERVED>
+0ED0-0ED9,PVALID,LAO DIGIT ZERO..LAO DIGIT NINE
+0EDA-0EDB,UNASSIGNED,<RESERVED>..<RESERVED>
+0EDC-0EDD,DISALLOWED,LAO HO NO..LAO HO MO
+0EDE-0EFF,UNASSIGNED,<RESERVED>..<RESERVED>
+0F00,PVALID,TIBETAN SYLLABLE OM
+0F01-0F0A,DISALLOWED,TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK BKA- SHOG YIG MGO
+0F0B,PVALID,TIBETAN MARK INTERSYLLABIC TSHEG
+0F0C-0F17,DISALLOWED,TIBETAN MARK DELIMITER TSHEG BSTAR..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS
+0F18-0F19,PVALID,TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
+0F1A-0F1F,DISALLOWED,TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG
+0F20-0F29,PVALID,TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE
+0F2A-0F34,DISALLOWED,TIBETAN DIGIT HALF ONE..TIBETAN MARK BSDUS RTAGS
+0F35,PVALID,TIBETAN MARK NGAS BZUNG NYI ZLA
+0F36,DISALLOWED,TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN
+0F37,PVALID,TIBETAN MARK NGAS BZUNG SGOR RTAGS
+0F38,DISALLOWED,TIBETAN MARK CHE MGO
+0F39,PVALID,TIBETAN MARK TSA -PHRU
+0F3A-0F3D,DISALLOWED,TIBETAN MARK GUG RTAGS GYON..TIBETAN MARK ANG KHANG GYAS
+0F3E-0F42,PVALID,TIBETAN SIGN YAR TSHES..TIBETAN LETTER GA
+0F43,DISALLOWED,TIBETAN LETTER GHA
+0F44-0F47,PVALID,TIBETAN LETTER NGA..TIBETAN LETTER JA
+0F48,UNASSIGNED,<RESERVED>
+0F49-0F4C,PVALID,TIBETAN LETTER NYA..TIBETAN LETTER DDA
+0F4D,DISALLOWED,TIBETAN LETTER DDHA
+0F4E-0F51,PVALID,TIBETAN LETTER NNA..TIBETAN LETTER DA
+0F52,DISALLOWED,TIBETAN LETTER DHA
+0F53-0F56,PVALID,TIBETAN LETTER NA..TIBETAN LETTER BA
+0F57,DISALLOWED,TIBETAN LETTER BHA
+0F58-0F5B,PVALID,TIBETAN LETTER MA..TIBETAN LETTER DZA
+0F5C,DISALLOWED,TIBETAN LETTER DZHA
+0F5D-0F68,PVALID,TIBETAN LETTER WA..TIBETAN LETTER A
+0F69,DISALLOWED,TIBETAN LETTER KSSA
+0F6A-0F6C,PVALID,TIBETAN LETTER FIXED-FORM RA..TIBETAN LETTER RRA
+0F6D-0F70,UNASSIGNED,<RESERVED>..<RESERVED>
+0F71-0F72,PVALID,TIBETAN VOWEL SIGN AA..TIBETAN VOWEL SIGN I
+0F73,DISALLOWED,TIBETAN VOWEL SIGN II
+0F74,PVALID,TIBETAN VOWEL SIGN U
+0F75-0F79,DISALLOWED,TIBETAN VOWEL SIGN UU..TIBETAN VOWEL SIGN VOCALIC LL
+0F7A-0F80,PVALID,TIBETAN VOWEL SIGN E..TIBETAN VOWEL SIGN REVERSED I
+0F81,DISALLOWED,TIBETAN VOWEL SIGN REVERSED II
+0F82-0F84,PVALID,TIBETAN SIGN NYI ZLA NAA DA..TIBETAN MARK HALANTA
+0F85,DISALLOWED,TIBETAN MARK PALUTA
+0F86-0F8B,PVALID,TIBETAN SIGN LCI RTAGS..TIBETAN SIGN GRU MED RGYINGS
+0F8C-0F8F,UNASSIGNED,<RESERVED>..<RESERVED>
+0F90-0F92,PVALID,TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER GA
+0F93,DISALLOWED,TIBETAN SUBJOINED LETTER GHA
+0F94-0F97,PVALID,TIBETAN SUBJOINED LETTER NGA..TIBETAN SUBJOINED LETTER JA
+0F98,UNASSIGNED,<RESERVED>
+0F99-0F9C,PVALID,TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER DDA
+0F9D,DISALLOWED,TIBETAN SUBJOINED LETTER DDHA
+0F9E-0FA1,PVALID,TIBETAN SUBJOINED LETTER NNA..TIBETAN SUBJOINED LETTER DA
+0FA2,DISALLOWED,TIBETAN SUBJOINED LETTER DHA
+0FA3-0FA6,PVALID,TIBETAN SUBJOINED LETTER NA..TIBETAN SUBJOINED LETTER BA
+0FA7,DISALLOWED,TIBETAN SUBJOINED LETTER BHA
+0FA8-0FAB,PVALID,TIBETAN SUBJOINED LETTER MA..TIBETAN SUBJOINED LETTER DZA
+0FAC,DISALLOWED,TIBETAN SUBJOINED LETTER DZHA
+0FAD-0FB8,PVALID,TIBETAN SUBJOINED LETTER WA..TIBETAN SUBJOINED LETTER A
+0FB9,DISALLOWED,TIBETAN SUBJOINED LETTER KSSA
+0FBA-0FBC,PVALID,TIBETAN SUBJOINED LETTER FIXED-FORM WA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
+0FBD,UNASSIGNED,<RESERVED>
+0FBE-0FC5,DISALLOWED,TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE
+0FC6,PVALID,TIBETAN SYMBOL PADMA GDAN
+0FC7-0FCC,DISALLOWED,TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL
+0FCD,UNASSIGNED,<RESERVED>
+0FCE-0FD8,DISALLOWED,TIBETAN SIGN RDEL NAG RDEL DKAR..LEFT-FACING SVASTI SIGN WITH DOTS
+0FD9-0FFF,UNASSIGNED,<RESERVED>..<RESERVED>
+1000-1049,PVALID,MYANMAR LETTER KA..MYANMAR DIGIT NINE
+104A-104F,DISALLOWED,MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE
+1050-109D,PVALID,MYANMAR LETTER SHA..MYANMAR VOWEL SIGN AITON AI
+109E-10C5,DISALLOWED,MYANMAR SYMBOL SHAN ONE..GEORGIAN CAPITAL LETTER HOE
+10C6-10CF,UNASSIGNED,<RESERVED>..<RESERVED>
+10D0-10FA,PVALID,GEORGIAN LETTER AN..GEORGIAN LETTER AIN
+10FB-10FC,DISALLOWED,GEORGIAN PARAGRAPH SEPARATOR..MODIFIER LETTER GEORGIAN NAR
+10FD-10FF,UNASSIGNED,<RESERVED>..<RESERVED>
+1100-11FF,DISALLOWED,HANGUL CHOSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN
+1200-1248,PVALID,ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA
+1249,UNASSIGNED,<RESERVED>
+124A-124D,PVALID,ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE
+124E-124F,UNASSIGNED,<RESERVED>..<RESERVED>
+1250-1256,PVALID,ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO
+1257,UNASSIGNED,<RESERVED>
+1258,PVALID,ETHIOPIC SYLLABLE QHWA
+1259,UNASSIGNED,<RESERVED>
+125A-125D,PVALID,ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE
+125E-125F,UNASSIGNED,<RESERVED>..<RESERVED>
+1260-1288,PVALID,ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA
+1289,UNASSIGNED,<RESERVED>
+128A-128D,PVALID,ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE
+128E-128F,UNASSIGNED,<RESERVED>..<RESERVED>
+1290-12B0,PVALID,ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA
+12B1,UNASSIGNED,<RESERVED>
+12B2-12B5,PVALID,ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE
+12B6-12B7,UNASSIGNED,<RESERVED>..<RESERVED>
+12B8-12BE,PVALID,ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO
+12BF,UNASSIGNED,<RESERVED>
+12C0,PVALID,ETHIOPIC SYLLABLE KXWA
+12C1,UNASSIGNED,<RESERVED>
+12C2-12C5,PVALID,ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE
+12C6-12C7,UNASSIGNED,<RESERVED>..<RESERVED>
+12C8-12D6,PVALID,ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O
+12D7,UNASSIGNED,<RESERVED>
+12D8-1310,PVALID,ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA
+1311,UNASSIGNED,<RESERVED>
+1312-1315,PVALID,ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE
+1316-1317,UNASSIGNED,<RESERVED>..<RESERVED>
+1318-135A,PVALID,ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA
+135B-135E,UNASSIGNED,<RESERVED>..<RESERVED>
+135F,PVALID,ETHIOPIC COMBINING GEMINATION MARK
+1360-137C,DISALLOWED,ETHIOPIC SECTION MARK..ETHIOPIC NUMBER TEN THOUSAND
+137D-137F,UNASSIGNED,<RESERVED>..<RESERVED>
+1380-138F,PVALID,ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE
+1390-1399,DISALLOWED,ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT
+139A-139F,UNASSIGNED,<RESERVED>..<RESERVED>
+13A0-13F4,PVALID,CHEROKEE LETTER A..CHEROKEE LETTER YV
+13F5-13FF,UNASSIGNED,<RESERVED>..<RESERVED>
+1400,DISALLOWED,CANADIAN SYLLABICS HYPHEN
+1401-166C,PVALID,CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA
+166D-166E,DISALLOWED,CANADIAN SYLLABICS CHI SIGN..CANADIAN SYLLABICS FULL STOP
+166F-167F,PVALID,CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W
+1680,DISALLOWED,OGHAM SPACE MARK
+1681-169A,PVALID,OGHAM LETTER BEITH..OGHAM LETTER PEITH
+169B-169C,DISALLOWED,OGHAM FEATHER MARK..OGHAM REVERSED FEATHER MARK
+169D-169F,UNASSIGNED,<RESERVED>..<RESERVED>
+16A0-16EA,PVALID,RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X
+16EB-16F0,DISALLOWED,RUNIC SINGLE PUNCTUATION..RUNIC BELGTHOR SYMBOL
+16F1-16FF,UNASSIGNED,<RESERVED>..<RESERVED>
+1700-170C,PVALID,TAGALOG LETTER A..TAGALOG LETTER YA
+170D,UNASSIGNED,<RESERVED>
+170E-1714,PVALID,TAGALOG LETTER LA..TAGALOG SIGN VIRAMA
+1715-171F,UNASSIGNED,<RESERVED>..<RESERVED>
+1720-1734,PVALID,HANUNOO LETTER A..HANUNOO SIGN PAMUDPOD
+1735-1736,DISALLOWED,PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
+1737-173F,UNASSIGNED,<RESERVED>..<RESERVED>
+1740-1753,PVALID,BUHID LETTER A..BUHID VOWEL SIGN U
+1754-175F,UNASSIGNED,<RESERVED>..<RESERVED>
+1760-176C,PVALID,TAGBANWA LETTER A..TAGBANWA LETTER YA
+176D,UNASSIGNED,<RESERVED>
+176E-1770,PVALID,TAGBANWA LETTER LA..TAGBANWA LETTER SA
+1771,UNASSIGNED,<RESERVED>
+1772-1773,PVALID,TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
+1774-177F,UNASSIGNED,<RESERVED>..<RESERVED>
+1780-17B3,PVALID,KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU
+17B4-17B5,DISALLOWED,KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
+17B6-17D3,PVALID,KHMER VOWEL SIGN AA..KHMER SIGN BATHAMASAT
+17D4-17D6,DISALLOWED,KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH
+17D7,PVALID,KHMER SIGN LEK TOO
+17D8-17DB,DISALLOWED,KHMER SIGN BEYYAL..KHMER CURRENCY SYMBOL RIEL
+17DC-17DD,PVALID,KHMER SIGN AVAKRAHASANYA..KHMER SIGN ATTHACAN
+17DE-17DF,UNASSIGNED,<RESERVED>..<RESERVED>
+17E0-17E9,PVALID,KHMER DIGIT ZERO..KHMER DIGIT NINE
+17EA-17EF,UNASSIGNED,<RESERVED>..<RESERVED>
+17F0-17F9,DISALLOWED,KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON
+17FA-17FF,UNASSIGNED,<RESERVED>..<RESERVED>
+1800-180E,DISALLOWED,MONGOLIAN BIRGA..MONGOLIAN VOWEL SEPARATOR
+180F,UNASSIGNED,<RESERVED>
+1810-1819,PVALID,MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
+181A-181F,UNASSIGNED,<RESERVED>..<RESERVED>
+1820-1877,PVALID,MONGOLIAN LETTER A..MONGOLIAN LETTER MANCHU ZHA
+1878-187F,UNASSIGNED,<RESERVED>..<RESERVED>
+1880-18AA,PVALID,MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI LHA
+18AB-18AF,UNASSIGNED,<RESERVED>..<RESERVED>
+18B0-18F5,PVALID,CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S
+18F6-18FF,UNASSIGNED,<RESERVED>..<RESERVED>
+1900-191C,PVALID,LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA
+191D-191F,UNASSIGNED,<RESERVED>..<RESERVED>
+1920-192B,PVALID,LIMBU VOWEL SIGN A..LIMBU SUBJOINED LETTER WA
+192C-192F,UNASSIGNED,<RESERVED>..<RESERVED>
+1930-193B,PVALID,LIMBU SMALL LETTER KA..LIMBU SIGN SA-I
+193C-193F,UNASSIGNED,<RESERVED>..<RESERVED>
+1940,DISALLOWED,LIMBU SIGN LOO
+1941-1943,UNASSIGNED,<RESERVED>..<RESERVED>
+1944-1945,DISALLOWED,LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
+1946-196D,PVALID,LIMBU DIGIT ZERO..TAI LE LETTER AI
+196E-196F,UNASSIGNED,<RESERVED>..<RESERVED>
+1970-1974,PVALID,TAI LE LETTER TONE-2..TAI LE LETTER TONE-6
+1975-197F,UNASSIGNED,<RESERVED>..<RESERVED>
+1980-19AB,PVALID,NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA
+19AC-19AF,UNASSIGNED,<RESERVED>..<RESERVED>
+19B0-19C9,PVALID,NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2
+19CA-19CF,UNASSIGNED,<RESERVED>..<RESERVED>
+19D0-19DA,PVALID,NEW TAI LUE DIGIT ZERO..NEW TAI LUE THAM DIGIT ONE
+19DB-19DD,UNASSIGNED,<RESERVED>..<RESERVED>
+19DE-19FF,DISALLOWED,NEW TAI LUE SIGN LAE..KHMER SYMBOL DAP-PRAM ROC
+1A00-1A1B,PVALID,BUGINESE LETTER KA..BUGINESE VOWEL SIGN AE
+1A1C-1A1D,UNASSIGNED,<RESERVED>..<RESERVED>
+1A1E-1A1F,DISALLOWED,BUGINESE PALLAWA..BUGINESE END OF SECTION
+1A20-1A5E,PVALID,TAI THAM LETTER HIGH KA..TAI THAM CONSONANT SIGN SA
+1A5F,UNASSIGNED,<RESERVED>
+1A60-1A7C,PVALID,TAI THAM SIGN SAKOT..TAI THAM SIGN KHUEN-LUE KARAN
+1A7D-1A7E,UNASSIGNED,<RESERVED>..<RESERVED>
+1A7F-1A89,PVALID,TAI THAM COMBINING CRYPTOGRAMMIC DOT..TAI THAM HORA DIGIT NINE
+1A8A-1A8F,UNASSIGNED,<RESERVED>..<RESERVED>
+1A90-1A99,PVALID,TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE
+1A9A-1A9F,UNASSIGNED,<RESERVED>..<RESERVED>
+1AA0-1AA6,DISALLOWED,TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA
+1AA7,PVALID,TAI THAM SIGN MAI YAMOK
+1AA8-1AAD,DISALLOWED,TAI THAM SIGN KAAN..TAI THAM SIGN CAANG
+1AAE-1AFF,UNASSIGNED,<RESERVED>..<RESERVED>
+1B00-1B4B,PVALID,BALINESE SIGN ULU RICEM..BALINESE LETTER ASYURA SASAK
+1B4C-1B4F,UNASSIGNED,<RESERVED>..<RESERVED>
+1B50-1B59,PVALID,BALINESE DIGIT ZERO..BALINESE DIGIT NINE
+1B5A-1B6A,DISALLOWED,BALINESE PANTI..BALINESE MUSICAL SYMBOL DANG GEDE
+1B6B-1B73,PVALID,BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
+1B74-1B7C,DISALLOWED,BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING
+1B7D-1B7F,UNASSIGNED,<RESERVED>..<RESERVED>
+1B80-1BAA,PVALID,SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PAMAAEH
+1BAB-1BAD,UNASSIGNED,<RESERVED>..<RESERVED>
+1BAE-1BB9,PVALID,SUNDANESE LETTER KHA..SUNDANESE DIGIT NINE
+1BBA-1BFF,UNASSIGNED,<RESERVED>..<RESERVED>
+1C00-1C37,PVALID,LEPCHA LETTER KA..LEPCHA SIGN NUKTA
+1C38-1C3A,UNASSIGNED,<RESERVED>..<RESERVED>
+1C3B-1C3F,DISALLOWED,LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK
+1C40-1C49,PVALID,LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE
+1C4A-1C4C,UNASSIGNED,<RESERVED>..<RESERVED>
+1C4D-1C7D,PVALID,LEPCHA LETTER TTA..OL CHIKI AHAD
+1C7E-1C7F,DISALLOWED,OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
+1C80-1CCF,UNASSIGNED,<RESERVED>..<RESERVED>
+1CD0-1CD2,PVALID,VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
+1CD3,DISALLOWED,VEDIC SIGN NIHSHVASA
+1CD4-1CF2,PVALID,VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC SIGN ARDHAVISARGA
+1CF3-1CFF,UNASSIGNED,<RESERVED>..<RESERVED>
+1D00-1D2B,PVALID,LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL
+1D2C-1D2E,DISALLOWED,MODIFIER LETTER CAPITAL A..MODIFIER LETTER CAPITAL B
+1D2F,PVALID,MODIFIER LETTER CAPITAL BARRED B
+1D30-1D3A,DISALLOWED,MODIFIER LETTER CAPITAL D..MODIFIER LETTER CAPITAL N
+1D3B,PVALID,MODIFIER LETTER CAPITAL REVERSED N
+1D3C-1D4D,DISALLOWED,MODIFIER LETTER CAPITAL O..MODIFIER LETTER SMALL G
+1D4E,PVALID,MODIFIER LETTER SMALL TURNED I
+1D4F-1D6A,DISALLOWED,MODIFIER LETTER SMALL K..GREEK SUBSCRIPT SMALL LETTER CHI
+1D6B-1D77,PVALID,LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G
+1D78,DISALLOWED,MODIFIER LETTER CYRILLIC EN
+1D79-1D9A,PVALID,LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK
+1D9B-1DBF,DISALLOWED,MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA
+1DC0-1DE6,PVALID,COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
+1DE7-1DFC,UNASSIGNED,<RESERVED>..<RESERVED>
+1DFD-1DFF,PVALID,COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
+1E00,DISALLOWED,LATIN CAPITAL LETTER A WITH RING BELOW
+1E01,PVALID,LATIN SMALL LETTER A WITH RING BELOW
+1E02,DISALLOWED,LATIN CAPITAL LETTER B WITH DOT ABOVE
+1E03,PVALID,LATIN SMALL LETTER B WITH DOT ABOVE
+1E04,DISALLOWED,LATIN CAPITAL LETTER B WITH DOT BELOW
+1E05,PVALID,LATIN SMALL LETTER B WITH DOT BELOW
+1E06,DISALLOWED,LATIN CAPITAL LETTER B WITH LINE BELOW
+1E07,PVALID,LATIN SMALL LETTER B WITH LINE BELOW
+1E08,DISALLOWED,LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
+1E09,PVALID,LATIN SMALL LETTER C WITH CEDILLA AND ACUTE
+1E0A,DISALLOWED,LATIN CAPITAL LETTER D WITH DOT ABOVE
+1E0B,PVALID,LATIN SMALL LETTER D WITH DOT ABOVE
+1E0C,DISALLOWED,LATIN CAPITAL LETTER D WITH DOT BELOW
+1E0D,PVALID,LATIN SMALL LETTER D WITH DOT BELOW
+1E0E,DISALLOWED,LATIN CAPITAL LETTER D WITH LINE BELOW
+1E0F,PVALID,LATIN SMALL LETTER D WITH LINE BELOW
+1E10,DISALLOWED,LATIN CAPITAL LETTER D WITH CEDILLA
+1E11,PVALID,LATIN SMALL LETTER D WITH CEDILLA
+1E12,DISALLOWED,LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
+1E13,PVALID,LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW
+1E14,DISALLOWED,LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
+1E15,PVALID,LATIN SMALL LETTER E WITH MACRON AND GRAVE
+1E16,DISALLOWED,LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
+1E17,PVALID,LATIN SMALL LETTER E WITH MACRON AND ACUTE
+1E18,DISALLOWED,LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
+1E19,PVALID,LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW
+1E1A,DISALLOWED,LATIN CAPITAL LETTER E WITH TILDE BELOW
+1E1B,PVALID,LATIN SMALL LETTER E WITH TILDE BELOW
+1E1C,DISALLOWED,LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
+1E1D,PVALID,LATIN SMALL LETTER E WITH CEDILLA AND BREVE
+1E1E,DISALLOWED,LATIN CAPITAL LETTER F WITH DOT ABOVE
+1E1F,PVALID,LATIN SMALL LETTER F WITH DOT ABOVE
+1E20,DISALLOWED,LATIN CAPITAL LETTER G WITH MACRON
+1E21,PVALID,LATIN SMALL LETTER G WITH MACRON
+1E22,DISALLOWED,LATIN CAPITAL LETTER H WITH DOT ABOVE
+1E23,PVALID,LATIN SMALL LETTER H WITH DOT ABOVE
+1E24,DISALLOWED,LATIN CAPITAL LETTER H WITH DOT BELOW
+1E25,PVALID,LATIN SMALL LETTER H WITH DOT BELOW
+1E26,DISALLOWED,LATIN CAPITAL LETTER H WITH DIAERESIS
+1E27,PVALID,LATIN SMALL LETTER H WITH DIAERESIS
+1E28,DISALLOWED,LATIN CAPITAL LETTER H WITH CEDILLA
+1E29,PVALID,LATIN SMALL LETTER H WITH CEDILLA
+1E2A,DISALLOWED,LATIN CAPITAL LETTER H WITH BREVE BELOW
+1E2B,PVALID,LATIN SMALL LETTER H WITH BREVE BELOW
+1E2C,DISALLOWED,LATIN CAPITAL LETTER I WITH TILDE BELOW
+1E2D,PVALID,LATIN SMALL LETTER I WITH TILDE BELOW
+1E2E,DISALLOWED,LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
+1E2F,PVALID,LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE
+1E30,DISALLOWED,LATIN CAPITAL LETTER K WITH ACUTE
+1E31,PVALID,LATIN SMALL LETTER K WITH ACUTE
+1E32,DISALLOWED,LATIN CAPITAL LETTER K WITH DOT BELOW
+1E33,PVALID,LATIN SMALL LETTER K WITH DOT BELOW
+1E34,DISALLOWED,LATIN CAPITAL LETTER K WITH LINE BELOW
+1E35,PVALID,LATIN SMALL LETTER K WITH LINE BELOW
+1E36,DISALLOWED,LATIN CAPITAL LETTER L WITH DOT BELOW
+1E37,PVALID,LATIN SMALL LETTER L WITH DOT BELOW
+1E38,DISALLOWED,LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
+1E39,PVALID,LATIN SMALL LETTER L WITH DOT BELOW AND MACRON
+1E3A,DISALLOWED,LATIN CAPITAL LETTER L WITH LINE BELOW
+1E3B,PVALID,LATIN SMALL LETTER L WITH LINE BELOW
+1E3C,DISALLOWED,LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
+1E3D,PVALID,LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW
+1E3E,DISALLOWED,LATIN CAPITAL LETTER M WITH ACUTE
+1E3F,PVALID,LATIN SMALL LETTER M WITH ACUTE
+1E40,DISALLOWED,LATIN CAPITAL LETTER M WITH DOT ABOVE
+1E41,PVALID,LATIN SMALL LETTER M WITH DOT ABOVE
+1E42,DISALLOWED,LATIN CAPITAL LETTER M WITH DOT BELOW
+1E43,PVALID,LATIN SMALL LETTER M WITH DOT BELOW
+1E44,DISALLOWED,LATIN CAPITAL LETTER N WITH DOT ABOVE
+1E45,PVALID,LATIN SMALL LETTER N WITH DOT ABOVE
+1E46,DISALLOWED,LATIN CAPITAL LETTER N WITH DOT BELOW
+1E47,PVALID,LATIN SMALL LETTER N WITH DOT BELOW
+1E48,DISALLOWED,LATIN CAPITAL LETTER N WITH LINE BELOW
+1E49,PVALID,LATIN SMALL LETTER N WITH LINE BELOW
+1E4A,DISALLOWED,LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
+1E4B,PVALID,LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW
+1E4C,DISALLOWED,LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
+1E4D,PVALID,LATIN SMALL LETTER O WITH TILDE AND ACUTE
+1E4E,DISALLOWED,LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
+1E4F,PVALID,LATIN SMALL LETTER O WITH TILDE AND DIAERESIS
+1E50,DISALLOWED,LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
+1E51,PVALID,LATIN SMALL LETTER O WITH MACRON AND GRAVE
+1E52,DISALLOWED,LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
+1E53,PVALID,LATIN SMALL LETTER O WITH MACRON AND ACUTE
+1E54,DISALLOWED,LATIN CAPITAL LETTER P WITH ACUTE
+1E55,PVALID,LATIN SMALL LETTER P WITH ACUTE
+1E56,DISALLOWED,LATIN CAPITAL LETTER P WITH DOT ABOVE
+1E57,PVALID,LATIN SMALL LETTER P WITH DOT ABOVE
+1E58,DISALLOWED,LATIN CAPITAL LETTER R WITH DOT ABOVE
+1E59,PVALID,LATIN SMALL LETTER R WITH DOT ABOVE
+1E5A,DISALLOWED,LATIN CAPITAL LETTER R WITH DOT BELOW
+1E5B,PVALID,LATIN SMALL LETTER R WITH DOT BELOW
+1E5C,DISALLOWED,LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
+1E5D,PVALID,LATIN SMALL LETTER R WITH DOT BELOW AND MACRON
+1E5E,DISALLOWED,LATIN CAPITAL LETTER R WITH LINE BELOW
+1E5F,PVALID,LATIN SMALL LETTER R WITH LINE BELOW
+1E60,DISALLOWED,LATIN CAPITAL LETTER S WITH DOT ABOVE
+1E61,PVALID,LATIN SMALL LETTER S WITH DOT ABOVE
+1E62,DISALLOWED,LATIN CAPITAL LETTER S WITH DOT BELOW
+1E63,PVALID,LATIN SMALL LETTER S WITH DOT BELOW
+1E64,DISALLOWED,LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
+1E65,PVALID,LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE
+1E66,DISALLOWED,LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
+1E67,PVALID,LATIN SMALL LETTER S WITH CARON AND DOT ABOVE
+1E68,DISALLOWED,LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
+1E69,PVALID,LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
+1E6A,DISALLOWED,LATIN CAPITAL LETTER T WITH DOT ABOVE
+1E6B,PVALID,LATIN SMALL LETTER T WITH DOT ABOVE
+1E6C,DISALLOWED,LATIN CAPITAL LETTER T WITH DOT BELOW
+1E6D,PVALID,LATIN SMALL LETTER T WITH DOT BELOW
+1E6E,DISALLOWED,LATIN CAPITAL LETTER T WITH LINE BELOW
+1E6F,PVALID,LATIN SMALL LETTER T WITH LINE BELOW
+1E70,DISALLOWED,LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
+1E71,PVALID,LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW
+1E72,DISALLOWED,LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
+1E73,PVALID,LATIN SMALL LETTER U WITH DIAERESIS BELOW
+1E74,DISALLOWED,LATIN CAPITAL LETTER U WITH TILDE BELOW
+1E75,PVALID,LATIN SMALL LETTER U WITH TILDE BELOW
+1E76,DISALLOWED,LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
+1E77,PVALID,LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW
+1E78,DISALLOWED,LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
+1E79,PVALID,LATIN SMALL LETTER U WITH TILDE AND ACUTE
+1E7A,DISALLOWED,LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
+1E7B,PVALID,LATIN SMALL LETTER U WITH MACRON AND DIAERESIS
+1E7C,DISALLOWED,LATIN CAPITAL LETTER V WITH TILDE
+1E7D,PVALID,LATIN SMALL LETTER V WITH TILDE
+1E7E,DISALLOWED,LATIN CAPITAL LETTER V WITH DOT BELOW
+1E7F,PVALID,LATIN SMALL LETTER V WITH DOT BELOW
+1E80,DISALLOWED,LATIN CAPITAL LETTER W WITH GRAVE
+1E81,PVALID,LATIN SMALL LETTER W WITH GRAVE
+1E82,DISALLOWED,LATIN CAPITAL LETTER W WITH ACUTE
+1E83,PVALID,LATIN SMALL LETTER W WITH ACUTE
+1E84,DISALLOWED,LATIN CAPITAL LETTER W WITH DIAERESIS
+1E85,PVALID,LATIN SMALL LETTER W WITH DIAERESIS
+1E86,DISALLOWED,LATIN CAPITAL LETTER W WITH DOT ABOVE
+1E87,PVALID,LATIN SMALL LETTER W WITH DOT ABOVE
+1E88,DISALLOWED,LATIN CAPITAL LETTER W WITH DOT BELOW
+1E89,PVALID,LATIN SMALL LETTER W WITH DOT BELOW
+1E8A,DISALLOWED,LATIN CAPITAL LETTER X WITH DOT ABOVE
+1E8B,PVALID,LATIN SMALL LETTER X WITH DOT ABOVE
+1E8C,DISALLOWED,LATIN CAPITAL LETTER X WITH DIAERESIS
+1E8D,PVALID,LATIN SMALL LETTER X WITH DIAERESIS
+1E8E,DISALLOWED,LATIN CAPITAL LETTER Y WITH DOT ABOVE
+1E8F,PVALID,LATIN SMALL LETTER Y WITH DOT ABOVE
+1E90,DISALLOWED,LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
+1E91,PVALID,LATIN SMALL LETTER Z WITH CIRCUMFLEX
+1E92,DISALLOWED,LATIN CAPITAL LETTER Z WITH DOT BELOW
+1E93,PVALID,LATIN SMALL LETTER Z WITH DOT BELOW
+1E94,DISALLOWED,LATIN CAPITAL LETTER Z WITH LINE BELOW
+1E95-1E99,PVALID,LATIN SMALL LETTER Z WITH LINE BELOW..LATIN SMALL LETTER Y WITH RING ABOVE
+1E9A-1E9B,DISALLOWED,LATIN SMALL LETTER A WITH RIGHT HALF RING..LATIN SMALL LETTER LONG S WITH DOT ABOVE
+1E9C-1E9D,PVALID,LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE..LATIN SMALL LETTER LONG S WITH HIGH STROKE
+1E9E,DISALLOWED,LATIN CAPITAL LETTER SHARP S
+1E9F,PVALID,LATIN SMALL LETTER DELTA
+1EA0,DISALLOWED,LATIN CAPITAL LETTER A WITH DOT BELOW
+1EA1,PVALID,LATIN SMALL LETTER A WITH DOT BELOW
+1EA2,DISALLOWED,LATIN CAPITAL LETTER A WITH HOOK ABOVE
+1EA3,PVALID,LATIN SMALL LETTER A WITH HOOK ABOVE
+1EA4,DISALLOWED,LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
+1EA5,PVALID,LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE
+1EA6,DISALLOWED,LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
+1EA7,PVALID,LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE
+1EA8,DISALLOWED,LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
+1EA9,PVALID,LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
+1EAA,DISALLOWED,LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
+1EAB,PVALID,LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE
+1EAC,DISALLOWED,LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
+1EAD,PVALID,LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW
+1EAE,DISALLOWED,LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
+1EAF,PVALID,LATIN SMALL LETTER A WITH BREVE AND ACUTE
+1EB0,DISALLOWED,LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
+1EB1,PVALID,LATIN SMALL LETTER A WITH BREVE AND GRAVE
+1EB2,DISALLOWED,LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
+1EB3,PVALID,LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE
+1EB4,DISALLOWED,LATIN CAPITAL LETTER A WITH BREVE AND TILDE
+1EB5,PVALID,LATIN SMALL LETTER A WITH BREVE AND TILDE
+1EB6,DISALLOWED,LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
+1EB7,PVALID,LATIN SMALL LETTER A WITH BREVE AND DOT BELOW
+1EB8,DISALLOWED,LATIN CAPITAL LETTER E WITH DOT BELOW
+1EB9,PVALID,LATIN SMALL LETTER E WITH DOT BELOW
+1EBA,DISALLOWED,LATIN CAPITAL LETTER E WITH HOOK ABOVE
+1EBB,PVALID,LATIN SMALL LETTER E WITH HOOK ABOVE
+1EBC,DISALLOWED,LATIN CAPITAL LETTER E WITH TILDE
+1EBD,PVALID,LATIN SMALL LETTER E WITH TILDE
+1EBE,DISALLOWED,LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
+1EBF,PVALID,LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE
+1EC0,DISALLOWED,LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
+1EC1,PVALID,LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE
+1EC2,DISALLOWED,LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
+1EC3,PVALID,LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
+1EC4,DISALLOWED,LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
+1EC5,PVALID,LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE
+1EC6,DISALLOWED,LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
+1EC7,PVALID,LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW
+1EC8,DISALLOWED,LATIN CAPITAL LETTER I WITH HOOK ABOVE
+1EC9,PVALID,LATIN SMALL LETTER I WITH HOOK ABOVE
+1ECA,DISALLOWED,LATIN CAPITAL LETTER I WITH DOT BELOW
+1ECB,PVALID,LATIN SMALL LETTER I WITH DOT BELOW
+1ECC,DISALLOWED,LATIN CAPITAL LETTER O WITH DOT BELOW
+1ECD,PVALID,LATIN SMALL LETTER O WITH DOT BELOW
+1ECE,DISALLOWED,LATIN CAPITAL LETTER O WITH HOOK ABOVE
+1ECF,PVALID,LATIN SMALL LETTER O WITH HOOK ABOVE
+1ED0,DISALLOWED,LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
+1ED1,PVALID,LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE
+1ED2,DISALLOWED,LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
+1ED3,PVALID,LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE
+1ED4,DISALLOWED,LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
+1ED5,PVALID,LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
+1ED6,DISALLOWED,LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
+1ED7,PVALID,LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE
+1ED8,DISALLOWED,LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
+1ED9,PVALID,LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW
+1EDA,DISALLOWED,LATIN CAPITAL LETTER O WITH HORN AND ACUTE
+1EDB,PVALID,LATIN SMALL LETTER O WITH HORN AND ACUTE
+1EDC,DISALLOWED,LATIN CAPITAL LETTER O WITH HORN AND GRAVE
+1EDD,PVALID,LATIN SMALL LETTER O WITH HORN AND GRAVE
+1EDE,DISALLOWED,LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
+1EDF,PVALID,LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE
+1EE0,DISALLOWED,LATIN CAPITAL LETTER O WITH HORN AND TILDE
+1EE1,PVALID,LATIN SMALL LETTER O WITH HORN AND TILDE
+1EE2,DISALLOWED,LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
+1EE3,PVALID,LATIN SMALL LETTER O WITH HORN AND DOT BELOW
+1EE4,DISALLOWED,LATIN CAPITAL LETTER U WITH DOT BELOW
+1EE5,PVALID,LATIN SMALL LETTER U WITH DOT BELOW
+1EE6,DISALLOWED,LATIN CAPITAL LETTER U WITH HOOK ABOVE
+1EE7,PVALID,LATIN SMALL LETTER U WITH HOOK ABOVE
+1EE8,DISALLOWED,LATIN CAPITAL LETTER U WITH HORN AND ACUTE
+1EE9,PVALID,LATIN SMALL LETTER U WITH HORN AND ACUTE
+1EEA,DISALLOWED,LATIN CAPITAL LETTER U WITH HORN AND GRAVE
+1EEB,PVALID,LATIN SMALL LETTER U WITH HORN AND GRAVE
+1EEC,DISALLOWED,LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
+1EED,PVALID,LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE
+1EEE,DISALLOWED,LATIN CAPITAL LETTER U WITH HORN AND TILDE
+1EEF,PVALID,LATIN SMALL LETTER U WITH HORN AND TILDE
+1EF0,DISALLOWED,LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
+1EF1,PVALID,LATIN SMALL LETTER U WITH HORN AND DOT BELOW
+1EF2,DISALLOWED,LATIN CAPITAL LETTER Y WITH GRAVE
+1EF3,PVALID,LATIN SMALL LETTER Y WITH GRAVE
+1EF4,DISALLOWED,LATIN CAPITAL LETTER Y WITH DOT BELOW
+1EF5,PVALID,LATIN SMALL LETTER Y WITH DOT BELOW
+1EF6,DISALLOWED,LATIN CAPITAL LETTER Y WITH HOOK ABOVE
+1EF7,PVALID,LATIN SMALL LETTER Y WITH HOOK ABOVE
+1EF8,DISALLOWED,LATIN CAPITAL LETTER Y WITH TILDE
+1EF9,PVALID,LATIN SMALL LETTER Y WITH TILDE
+1EFA,DISALLOWED,LATIN CAPITAL LETTER MIDDLE-WELSH LL
+1EFB,PVALID,LATIN SMALL LETTER MIDDLE-WELSH LL
+1EFC,DISALLOWED,LATIN CAPITAL LETTER MIDDLE-WELSH V
+1EFD,PVALID,LATIN SMALL LETTER MIDDLE-WELSH V
+1EFE,DISALLOWED,LATIN CAPITAL LETTER Y WITH LOOP
+1EFF-1F07,PVALID,LATIN SMALL LETTER Y WITH LOOP..GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI
+1F08-1F0F,DISALLOWED,GREEK CAPITAL LETTER ALPHA WITH PSILI..GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
+1F10-1F15,PVALID,GREEK SMALL LETTER EPSILON WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
+1F16-1F17,UNASSIGNED,<RESERVED>..<RESERVED>
+1F18-1F1D,DISALLOWED,GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
+1F1E-1F1F,UNASSIGNED,<RESERVED>..<RESERVED>
+1F20-1F27,PVALID,GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI
+1F28-1F2F,DISALLOWED,GREEK CAPITAL LETTER ETA WITH PSILI..GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
+1F30-1F37,PVALID,GREEK SMALL LETTER IOTA WITH PSILI..GREEK SMALL LETTER IOTA WITH DASIA AND PERISPOMENI
+1F38-1F3F,DISALLOWED,GREEK CAPITAL LETTER IOTA WITH PSILI..GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
+1F40-1F45,PVALID,GREEK SMALL LETTER OMICRON WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
+1F46-1F47,UNASSIGNED,<RESERVED>..<RESERVED>
+1F48-1F4D,DISALLOWED,GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
+1F4E-1F4F,UNASSIGNED,<RESERVED>..<RESERVED>
+1F50-1F57,PVALID,GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI
+1F58,UNASSIGNED,<RESERVED>
+1F59,DISALLOWED,GREEK CAPITAL LETTER UPSILON WITH DASIA
+1F5A,UNASSIGNED,<RESERVED>
+1F5B,DISALLOWED,GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
+1F5C,UNASSIGNED,<RESERVED>
+1F5D,DISALLOWED,GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
+1F5E,UNASSIGNED,<RESERVED>
+1F5F,DISALLOWED,GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
+1F60-1F67,PVALID,GREEK SMALL LETTER OMEGA WITH PSILI..GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI
+1F68-1F6F,DISALLOWED,GREEK CAPITAL LETTER OMEGA WITH PSILI..GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
+1F70,PVALID,GREEK SMALL LETTER ALPHA WITH VARIA
+1F71,DISALLOWED,GREEK SMALL LETTER ALPHA WITH OXIA
+1F72,PVALID,GREEK SMALL LETTER EPSILON WITH VARIA
+1F73,DISALLOWED,GREEK SMALL LETTER EPSILON WITH OXIA
+1F74,PVALID,GREEK SMALL LETTER ETA WITH VARIA
+1F75,DISALLOWED,GREEK SMALL LETTER ETA WITH OXIA
+1F76,PVALID,GREEK SMALL LETTER IOTA WITH VARIA
+1F77,DISALLOWED,GREEK SMALL LETTER IOTA WITH OXIA
+1F78,PVALID,GREEK SMALL LETTER OMICRON WITH VARIA
+1F79,DISALLOWED,GREEK SMALL LETTER OMICRON WITH OXIA
+1F7A,PVALID,GREEK SMALL LETTER UPSILON WITH VARIA
+1F7B,DISALLOWED,GREEK SMALL LETTER UPSILON WITH OXIA
+1F7C,PVALID,GREEK SMALL LETTER OMEGA WITH VARIA
+1F7D,DISALLOWED,GREEK SMALL LETTER OMEGA WITH OXIA
+1F7E-1F7F,UNASSIGNED,<RESERVED>..<RESERVED>
+1F80-1FAF,DISALLOWED,GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FB0-1FB1,PVALID,GREEK SMALL LETTER ALPHA WITH VRACHY..GREEK SMALL LETTER ALPHA WITH MACRON
+1FB2-1FB4,DISALLOWED,GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
+1FB5,UNASSIGNED,<RESERVED>
+1FB6,PVALID,GREEK SMALL LETTER ALPHA WITH PERISPOMENI
+1FB7-1FC4,DISALLOWED,GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
+1FC5,UNASSIGNED,<RESERVED>
+1FC6,PVALID,GREEK SMALL LETTER ETA WITH PERISPOMENI
+1FC7-1FCF,DISALLOWED,GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI..GREEK PSILI AND PERISPOMENI
+1FD0-1FD2,PVALID,GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
+1FD3,DISALLOWED,GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+1FD4-1FD5,UNASSIGNED,<RESERVED>..<RESERVED>
+1FD6-1FD7,PVALID,GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
+1FD8-1FDB,DISALLOWED,GREEK CAPITAL LETTER IOTA WITH VRACHY..GREEK CAPITAL LETTER IOTA WITH OXIA
+1FDC,UNASSIGNED,<RESERVED>
+1FDD-1FDF,DISALLOWED,GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI
+1FE0-1FE2,PVALID,GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
+1FE3,DISALLOWED,GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
+1FE4-1FE7,PVALID,GREEK SMALL LETTER RHO WITH PSILI..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
+1FE8-1FEF,DISALLOWED,GREEK CAPITAL LETTER UPSILON WITH VRACHY..GREEK VARIA
+1FF0-1FF1,UNASSIGNED,<RESERVED>..<RESERVED>
+1FF2-1FF4,DISALLOWED,GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
+1FF5,UNASSIGNED,<RESERVED>
+1FF6,PVALID,GREEK SMALL LETTER OMEGA WITH PERISPOMENI
+1FF7-1FFE,DISALLOWED,GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI..GREEK DASIA
+1FFF,UNASSIGNED,<RESERVED>
+2000-200B,DISALLOWED,EN QUAD..ZERO WIDTH SPACE
+200C-200D,CONTEXTJ,ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
+200E-2064,DISALLOWED,LEFT-TO-RIGHT MARK..INVISIBLE PLUS
+2065-2069,UNASSIGNED,<RESERVED>..<RESERVED>
+206A-2071,DISALLOWED,INHIBIT SYMMETRIC SWAPPING..SUPERSCRIPT LATIN SMALL LETTER I
+2072-2073,UNASSIGNED,<RESERVED>..<RESERVED>
+2074-208E,DISALLOWED,SUPERSCRIPT FOUR..SUBSCRIPT RIGHT PARENTHESIS
+208F,UNASSIGNED,<RESERVED>
+2090-2094,DISALLOWED,LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA
+2095-209F,UNASSIGNED,<RESERVED>..<RESERVED>
+20A0-20B8,DISALLOWED,EURO-CURRENCY SIGN..TENGE SIGN
+20B9-20CF,UNASSIGNED,<RESERVED>..<RESERVED>
+20D0-20F0,DISALLOWED,COMBINING LEFT HARPOON ABOVE..COMBINING ASTERISK ABOVE
+20F1-20FF,UNASSIGNED,<RESERVED>..<RESERVED>
+2100-214D,DISALLOWED,ACCOUNT OF..AKTIESELSKAB
+214E,PVALID,TURNED SMALL F
+214F-2183,DISALLOWED,SYMBOL FOR SAMARITAN SOURCE..ROMAN NUMERAL REVERSED ONE HUNDRED
+2184,PVALID,LATIN SMALL LETTER REVERSED C
+2185-2189,DISALLOWED,ROMAN NUMERAL SIX LATE FORM..VULGAR FRACTION ZERO THIRDS
+218A-218F,UNASSIGNED,<RESERVED>..<RESERVED>
+2190-23E8,DISALLOWED,LEFTWARDS ARROW..DECIMAL EXPONENT SYMBOL
+23E9-23FF,UNASSIGNED,<RESERVED>..<RESERVED>
+2400-2426,DISALLOWED,SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
+2427-243F,UNASSIGNED,<RESERVED>..<RESERVED>
+2440-244A,DISALLOWED,OCR HOOK..OCR DOUBLE BACKSLASH
+244B-245F,UNASSIGNED,<RESERVED>..<RESERVED>
+2460-26CD,DISALLOWED,CIRCLED DIGIT ONE..DISABLED CAR
+26CE,UNASSIGNED,<RESERVED>
+26CF-26E1,DISALLOWED,PICK..RESTRICTED LEFT ENTRY-2
+26E2,UNASSIGNED,<RESERVED>
+26E3,DISALLOWED,HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE
+26E4-26E7,UNASSIGNED,<RESERVED>..<RESERVED>
+26E8-26FF,DISALLOWED,BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
+2700,UNASSIGNED,<RESERVED>
+2701-2704,DISALLOWED,UPPER BLADE SCISSORS..WHITE SCISSORS
+2705,UNASSIGNED,<RESERVED>
+2706-2709,DISALLOWED,TELEPHONE LOCATION SIGN..ENVELOPE
+270A-270B,UNASSIGNED,<RESERVED>..<RESERVED>
+270C-2727,DISALLOWED,VICTORY HAND..WHITE FOUR POINTED STAR
+2728,UNASSIGNED,<RESERVED>
+2729-274B,DISALLOWED,STRESS OUTLINED WHITE STAR..HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK
+274C,UNASSIGNED,<RESERVED>
+274D,DISALLOWED,SHADOWED WHITE CIRCLE
+274E,UNASSIGNED,<RESERVED>
+274F-2752,DISALLOWED,LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE
+2753-2755,UNASSIGNED,<RESERVED>..<RESERVED>
+2756-275E,DISALLOWED,BLACK DIAMOND MINUS WHITE X..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
+275F-2760,UNASSIGNED,<RESERVED>..<RESERVED>
+2761-2794,DISALLOWED,CURVED STEM PARAGRAPH SIGN ORNAMENT..HEAVY WIDE-HEADED RIGHTWARDS ARROW
+2795-2797,UNASSIGNED,<RESERVED>..<RESERVED>
+2798-27AF,DISALLOWED,HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW
+27B0,UNASSIGNED,<RESERVED>
+27B1-27BE,DISALLOWED,NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW..OPEN-OUTLINED RIGHTWARDS ARROW
+27BF,UNASSIGNED,<RESERVED>
+27C0-27CA,DISALLOWED,THREE DIMENSIONAL ANGLE..VERTICAL BAR WITH HORIZONTAL STROKE
+27CB,UNASSIGNED,<RESERVED>
+27CC,DISALLOWED,LONG DIVISION
+27CD-27CF,UNASSIGNED,<RESERVED>..<RESERVED>
+27D0-2B4C,DISALLOWED,WHITE DIAMOND WITH CENTRED DOT..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
+2B4D-2B4F,UNASSIGNED,<RESERVED>..<RESERVED>
+2B50-2B59,DISALLOWED,WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE
+2B5A-2BFF,UNASSIGNED,<RESERVED>..<RESERVED>
+2C00-2C2E,DISALLOWED,GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
+2C2F,UNASSIGNED,<RESERVED>
+2C30-2C5E,PVALID,GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
+2C5F,UNASSIGNED,<RESERVED>
+2C60,DISALLOWED,LATIN CAPITAL LETTER L WITH DOUBLE BAR
+2C61,PVALID,LATIN SMALL LETTER L WITH DOUBLE BAR
+2C62-2C64,DISALLOWED,LATIN CAPITAL LETTER L WITH MIDDLE TILDE..LATIN CAPITAL LETTER R WITH TAIL
+2C65-2C66,PVALID,LATIN SMALL LETTER A WITH STROKE..LATIN SMALL LETTER T WITH DIAGONAL STROKE
+2C67,DISALLOWED,LATIN CAPITAL LETTER H WITH DESCENDER
+2C68,PVALID,LATIN SMALL LETTER H WITH DESCENDER
+2C69,DISALLOWED,LATIN CAPITAL LETTER K WITH DESCENDER
+2C6A,PVALID,LATIN SMALL LETTER K WITH DESCENDER
+2C6B,DISALLOWED,LATIN CAPITAL LETTER Z WITH DESCENDER
+2C6C,PVALID,LATIN SMALL LETTER Z WITH DESCENDER
+2C6D-2C70,DISALLOWED,LATIN CAPITAL LETTER ALPHA..LATIN CAPITAL LETTER TURNED ALPHA
+2C71,PVALID,LATIN SMALL LETTER V WITH RIGHT HOOK
+2C72,DISALLOWED,LATIN CAPITAL LETTER W WITH HOOK
+2C73-2C74,PVALID,LATIN SMALL LETTER W WITH HOOK..LATIN SMALL LETTER V WITH CURL
+2C75,DISALLOWED,LATIN CAPITAL LETTER HALF H
+2C76-2C7B,PVALID,LATIN SMALL LETTER HALF H..LATIN LETTER SMALL CAPITAL TURNED E
+2C7C-2C80,DISALLOWED,LATIN SUBSCRIPT SMALL LETTER J..COPTIC CAPITAL LETTER ALFA
+2C81,PVALID,COPTIC SMALL LETTER ALFA
+2C82,DISALLOWED,COPTIC CAPITAL LETTER VIDA
+2C83,PVALID,COPTIC SMALL LETTER VIDA
+2C84,DISALLOWED,COPTIC CAPITAL LETTER GAMMA
+2C85,PVALID,COPTIC SMALL LETTER GAMMA
+2C86,DISALLOWED,COPTIC CAPITAL LETTER DALDA
+2C87,PVALID,COPTIC SMALL LETTER DALDA
+2C88,DISALLOWED,COPTIC CAPITAL LETTER EIE
+2C89,PVALID,COPTIC SMALL LETTER EIE
+2C8A,DISALLOWED,COPTIC CAPITAL LETTER SOU
+2C8B,PVALID,COPTIC SMALL LETTER SOU
+2C8C,DISALLOWED,COPTIC CAPITAL LETTER ZATA
+2C8D,PVALID,COPTIC SMALL LETTER ZATA
+2C8E,DISALLOWED,COPTIC CAPITAL LETTER HATE
+2C8F,PVALID,COPTIC SMALL LETTER HATE
+2C90,DISALLOWED,COPTIC CAPITAL LETTER THETHE
+2C91,PVALID,COPTIC SMALL LETTER THETHE
+2C92,DISALLOWED,COPTIC CAPITAL LETTER IAUDA
+2C93,PVALID,COPTIC SMALL LETTER IAUDA
+2C94,DISALLOWED,COPTIC CAPITAL LETTER KAPA
+2C95,PVALID,COPTIC SMALL LETTER KAPA
+2C96,DISALLOWED,COPTIC CAPITAL LETTER LAULA
+2C97,PVALID,COPTIC SMALL LETTER LAULA
+2C98,DISALLOWED,COPTIC CAPITAL LETTER MI
+2C99,PVALID,COPTIC SMALL LETTER MI
+2C9A,DISALLOWED,COPTIC CAPITAL LETTER NI
+2C9B,PVALID,COPTIC SMALL LETTER NI
+2C9C,DISALLOWED,COPTIC CAPITAL LETTER KSI
+2C9D,PVALID,COPTIC SMALL LETTER KSI
+2C9E,DISALLOWED,COPTIC CAPITAL LETTER O
+2C9F,PVALID,COPTIC SMALL LETTER O
+2CA0,DISALLOWED,COPTIC CAPITAL LETTER PI
+2CA1,PVALID,COPTIC SMALL LETTER PI
+2CA2,DISALLOWED,COPTIC CAPITAL LETTER RO
+2CA3,PVALID,COPTIC SMALL LETTER RO
+2CA4,DISALLOWED,COPTIC CAPITAL LETTER SIMA
+2CA5,PVALID,COPTIC SMALL LETTER SIMA
+2CA6,DISALLOWED,COPTIC CAPITAL LETTER TAU
+2CA7,PVALID,COPTIC SMALL LETTER TAU
+2CA8,DISALLOWED,COPTIC CAPITAL LETTER UA
+2CA9,PVALID,COPTIC SMALL LETTER UA
+2CAA,DISALLOWED,COPTIC CAPITAL LETTER FI
+2CAB,PVALID,COPTIC SMALL LETTER FI
+2CAC,DISALLOWED,COPTIC CAPITAL LETTER KHI
+2CAD,PVALID,COPTIC SMALL LETTER KHI
+2CAE,DISALLOWED,COPTIC CAPITAL LETTER PSI
+2CAF,PVALID,COPTIC SMALL LETTER PSI
+2CB0,DISALLOWED,COPTIC CAPITAL LETTER OOU
+2CB1,PVALID,COPTIC SMALL LETTER OOU
+2CB2,DISALLOWED,COPTIC CAPITAL LETTER DIALECT-P ALEF
+2CB3,PVALID,COPTIC SMALL LETTER DIALECT-P ALEF
+2CB4,DISALLOWED,COPTIC CAPITAL LETTER OLD COPTIC AIN
+2CB5,PVALID,COPTIC SMALL LETTER OLD COPTIC AIN
+2CB6,DISALLOWED,COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE
+2CB7,PVALID,COPTIC SMALL LETTER CRYPTOGRAMMIC EIE
+2CB8,DISALLOWED,COPTIC CAPITAL LETTER DIALECT-P KAPA
+2CB9,PVALID,COPTIC SMALL LETTER DIALECT-P KAPA
+2CBA,DISALLOWED,COPTIC CAPITAL LETTER DIALECT-P NI
+2CBB,PVALID,COPTIC SMALL LETTER DIALECT-P NI
+2CBC,DISALLOWED,COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI
+2CBD,PVALID,COPTIC SMALL LETTER CRYPTOGRAMMIC NI
+2CBE,DISALLOWED,COPTIC CAPITAL LETTER OLD COPTIC OOU
+2CBF,PVALID,COPTIC SMALL LETTER OLD COPTIC OOU
+2CC0,DISALLOWED,COPTIC CAPITAL LETTER SAMPI
+2CC1,PVALID,COPTIC SMALL LETTER SAMPI
+2CC2,DISALLOWED,COPTIC CAPITAL LETTER CROSSED SHEI
+2CC3,PVALID,COPTIC SMALL LETTER CROSSED SHEI
+2CC4,DISALLOWED,COPTIC CAPITAL LETTER OLD COPTIC SHEI
+2CC5,PVALID,COPTIC SMALL LETTER OLD COPTIC SHEI
+2CC6,DISALLOWED,COPTIC CAPITAL LETTER OLD COPTIC ESH
+2CC7,PVALID,COPTIC SMALL LETTER OLD COPTIC ESH
+2CC8,DISALLOWED,COPTIC CAPITAL LETTER AKHMIMIC KHEI
+2CC9,PVALID,COPTIC SMALL LETTER AKHMIMIC KHEI
+2CCA,DISALLOWED,COPTIC CAPITAL LETTER DIALECT-P HORI
+2CCB,PVALID,COPTIC SMALL LETTER DIALECT-P HORI
+2CCC,DISALLOWED,COPTIC CAPITAL LETTER OLD COPTIC HORI
+2CCD,PVALID,COPTIC SMALL LETTER OLD COPTIC HORI
+2CCE,DISALLOWED,COPTIC CAPITAL LETTER OLD COPTIC HA
+2CCF,PVALID,COPTIC SMALL LETTER OLD COPTIC HA
+2CD0,DISALLOWED,COPTIC CAPITAL LETTER L-SHAPED HA
+2CD1,PVALID,COPTIC SMALL LETTER L-SHAPED HA
+2CD2,DISALLOWED,COPTIC CAPITAL LETTER OLD COPTIC HEI
+2CD3,PVALID,COPTIC SMALL LETTER OLD COPTIC HEI
+2CD4,DISALLOWED,COPTIC CAPITAL LETTER OLD COPTIC HAT
+2CD5,PVALID,COPTIC SMALL LETTER OLD COPTIC HAT
+2CD6,DISALLOWED,COPTIC CAPITAL LETTER OLD COPTIC GANGIA
+2CD7,PVALID,COPTIC SMALL LETTER OLD COPTIC GANGIA
+2CD8,DISALLOWED,COPTIC CAPITAL LETTER OLD COPTIC DJA
+2CD9,PVALID,COPTIC SMALL LETTER OLD COPTIC DJA
+2CDA,DISALLOWED,COPTIC CAPITAL LETTER OLD COPTIC SHIMA
+2CDB,PVALID,COPTIC SMALL LETTER OLD COPTIC SHIMA
+2CDC,DISALLOWED,COPTIC CAPITAL LETTER OLD NUBIAN SHIMA
+2CDD,PVALID,COPTIC SMALL LETTER OLD NUBIAN SHIMA
+2CDE,DISALLOWED,COPTIC CAPITAL LETTER OLD NUBIAN NGI
+2CDF,PVALID,COPTIC SMALL LETTER OLD NUBIAN NGI
+2CE0,DISALLOWED,COPTIC CAPITAL LETTER OLD NUBIAN NYI
+2CE1,PVALID,COPTIC SMALL LETTER OLD NUBIAN NYI
+2CE2,DISALLOWED,COPTIC CAPITAL LETTER OLD NUBIAN WAU
+2CE3-2CE4,PVALID,COPTIC SMALL LETTER OLD NUBIAN WAU..COPTIC SYMBOL KAI
+2CE5-2CEB,DISALLOWED,COPTIC SYMBOL MI RO..COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI
+2CEC,PVALID,COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI
+2CED,DISALLOWED,COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA
+2CEE-2CF1,PVALID,COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA..COPTIC COMBINING SPIRITUS LENIS
+2CF2-2CF8,UNASSIGNED,<RESERVED>..<RESERVED>
+2CF9-2CFF,DISALLOWED,COPTIC OLD NUBIAN FULL STOP..COPTIC MORPHOLOGICAL DIVIDER
+2D00-2D25,PVALID,GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE
+2D26-2D2F,UNASSIGNED,<RESERVED>..<RESERVED>
+2D30-2D65,PVALID,TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
+2D66-2D6E,UNASSIGNED,<RESERVED>..<RESERVED>
+2D6F,DISALLOWED,TIFINAGH MODIFIER LETTER LABIALIZATION MARK
+2D70-2D7F,UNASSIGNED,<RESERVED>..<RESERVED>
+2D80-2D96,PVALID,ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE
+2D97-2D9F,UNASSIGNED,<RESERVED>..<RESERVED>
+2DA0-2DA6,PVALID,ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO
+2DA7,UNASSIGNED,<RESERVED>
+2DA8-2DAE,PVALID,ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO
+2DAF,UNASSIGNED,<RESERVED>
+2DB0-2DB6,PVALID,ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO
+2DB7,UNASSIGNED,<RESERVED>
+2DB8-2DBE,PVALID,ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO
+2DBF,UNASSIGNED,<RESERVED>
+2DC0-2DC6,PVALID,ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO
+2DC7,UNASSIGNED,<RESERVED>
+2DC8-2DCE,PVALID,ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO
+2DCF,UNASSIGNED,<RESERVED>
+2DD0-2DD6,PVALID,ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO
+2DD7,UNASSIGNED,<RESERVED>
+2DD8-2DDE,PVALID,ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO
+2DDF,UNASSIGNED,<RESERVED>
+2DE0-2DFF,PVALID,COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
+2E00-2E2E,DISALLOWED,RIGHT ANGLE SUBSTITUTION MARKER..REVERSED QUESTION MARK
+2E2F,PVALID,VERTICAL TILDE
+2E30-2E31,DISALLOWED,RING POINT..WORD SEPARATOR MIDDLE DOT
+2E32-2E7F,UNASSIGNED,<RESERVED>..<RESERVED>
+2E80-2E99,DISALLOWED,CJK RADICAL REPEAT..CJK RADICAL RAP
+2E9A,UNASSIGNED,<RESERVED>
+2E9B-2EF3,DISALLOWED,CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
+2EF4-2EFF,UNASSIGNED,<RESERVED>..<RESERVED>
+2F00-2FD5,DISALLOWED,KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
+2FD6-2FEF,UNASSIGNED,<RESERVED>..<RESERVED>
+2FF0-2FFB,DISALLOWED,IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
+2FFC-2FFF,UNASSIGNED,<RESERVED>..<RESERVED>
+3000-3004,DISALLOWED,IDEOGRAPHIC SPACE..JAPANESE INDUSTRIAL STANDARD SYMBOL
+3005-3007,PVALID,IDEOGRAPHIC ITERATION MARK..IDEOGRAPHIC NUMBER ZERO
+3008-3029,DISALLOWED,LEFT ANGLE BRACKET..HANGZHOU NUMERAL NINE
+302A-302D,PVALID,IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK
+302E-303B,DISALLOWED,HANGUL SINGLE DOT TONE MARK..VERTICAL IDEOGRAPHIC ITERATION MARK
+303C,PVALID,MASU MARK
+303D-303F,DISALLOWED,PART ALTERNATION MARK..IDEOGRAPHIC HALF FILL SPACE
+3040,UNASSIGNED,<RESERVED>
+3041-3096,PVALID,HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE
+3097-3098,UNASSIGNED,<RESERVED>..<RESERVED>
+3099-309A,PVALID,COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+309B-309C,DISALLOWED,KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+309D-309E,PVALID,HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
+309F-30A0,DISALLOWED,HIRAGANA DIGRAPH YORI..KATAKANA-HIRAGANA DOUBLE HYPHEN
+30A1-30FA,PVALID,KATAKANA LETTER SMALL A..KATAKANA LETTER VO
+30FB,CONTEXTO,KATAKANA MIDDLE DOT
+30FC-30FE,PVALID,KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK
+30FF,DISALLOWED,KATAKANA DIGRAPH KOTO
+3100-3104,UNASSIGNED,<RESERVED>..<RESERVED>
+3105-312D,PVALID,BOPOMOFO LETTER B..BOPOMOFO LETTER IH
+312E-3130,UNASSIGNED,<RESERVED>..<RESERVED>
+3131-318E,DISALLOWED,HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
+318F,UNASSIGNED,<RESERVED>
+3190-319F,DISALLOWED,IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION MAN MARK
+31A0-31B7,PVALID,BOPOMOFO LETTER BU..BOPOMOFO FINAL LETTER H
+31B8-31BF,UNASSIGNED,<RESERVED>..<RESERVED>
+31C0-31E3,DISALLOWED,CJK STROKE T..CJK STROKE Q
+31E4-31EF,UNASSIGNED,<RESERVED>..<RESERVED>
+31F0-31FF,PVALID,KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
+3200-321E,DISALLOWED,PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
+321F,UNASSIGNED,<RESERVED>
+3220-32FE,DISALLOWED,PARENTHESIZED IDEOGRAPH ONE..CIRCLED KATAKANA WO
+32FF,UNASSIGNED,<RESERVED>
+3300-33FF,DISALLOWED,SQUARE APAATO..SQUARE GAL
+3400-4DB5,PVALID,"<CJK IDEOGRAPH EXTENSION A, FIRST>..<CJK IDEOGRAPH EXTENSION A, LAST>"
+4DB6-4DBF,UNASSIGNED,<RESERVED>..<RESERVED>
+4DC0-4DFF,DISALLOWED,HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION
+4E00-9FCB,PVALID,"<CJK IDEOGRAPH, FIRST>..<CJK IDEOGRAPH, LAST>"
+9FCC-9FFF,UNASSIGNED,<RESERVED>..<RESERVED>
+A000-A48C,PVALID,YI SYLLABLE IT..YI SYLLABLE YYR
+A48D-A48F,UNASSIGNED,<RESERVED>..<RESERVED>
+A490-A4C6,DISALLOWED,YI RADICAL QOT..YI RADICAL KE
+A4C7-A4CF,UNASSIGNED,<RESERVED>..<RESERVED>
+A4D0-A4FD,PVALID,LISU LETTER BA..LISU LETTER TONE MYA JEU
+A4FE-A4FF,DISALLOWED,LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP
+A500-A60C,PVALID,VAI SYLLABLE EE..VAI SYLLABLE LENGTHENER
+A60D-A60F,DISALLOWED,VAI COMMA..VAI QUESTION MARK
+A610-A62B,PVALID,VAI SYLLABLE NDOLE FA..VAI SYLLABLE NDOLE DO
+A62C-A63F,UNASSIGNED,<RESERVED>..<RESERVED>
+A640,DISALLOWED,CYRILLIC CAPITAL LETTER ZEMLYA
+A641,PVALID,CYRILLIC SMALL LETTER ZEMLYA
+A642,DISALLOWED,CYRILLIC CAPITAL LETTER DZELO
+A643,PVALID,CYRILLIC SMALL LETTER DZELO
+A644,DISALLOWED,CYRILLIC CAPITAL LETTER REVERSED DZE
+A645,PVALID,CYRILLIC SMALL LETTER REVERSED DZE
+A646,DISALLOWED,CYRILLIC CAPITAL LETTER IOTA
+A647,PVALID,CYRILLIC SMALL LETTER IOTA
+A648,DISALLOWED,CYRILLIC CAPITAL LETTER DJERV
+A649,PVALID,CYRILLIC SMALL LETTER DJERV
+A64A,DISALLOWED,CYRILLIC CAPITAL LETTER MONOGRAPH UK
+A64B,PVALID,CYRILLIC SMALL LETTER MONOGRAPH UK
+A64C,DISALLOWED,CYRILLIC CAPITAL LETTER BROAD OMEGA
+A64D,PVALID,CYRILLIC SMALL LETTER BROAD OMEGA
+A64E,DISALLOWED,CYRILLIC CAPITAL LETTER NEUTRAL YER
+A64F,PVALID,CYRILLIC SMALL LETTER NEUTRAL YER
+A650,DISALLOWED,CYRILLIC CAPITAL LETTER YERU WITH BACK YER
+A651,PVALID,CYRILLIC SMALL LETTER YERU WITH BACK YER
+A652,DISALLOWED,CYRILLIC CAPITAL LETTER IOTIFIED YAT
+A653,PVALID,CYRILLIC SMALL LETTER IOTIFIED YAT
+A654,DISALLOWED,CYRILLIC CAPITAL LETTER REVERSED YU
+A655,PVALID,CYRILLIC SMALL LETTER REVERSED YU
+A656,DISALLOWED,CYRILLIC CAPITAL LETTER IOTIFIED A
+A657,PVALID,CYRILLIC SMALL LETTER IOTIFIED A
+A658,DISALLOWED,CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS
+A659,PVALID,CYRILLIC SMALL LETTER CLOSED LITTLE YUS
+A65A,DISALLOWED,CYRILLIC CAPITAL LETTER BLENDED YUS
+A65B,PVALID,CYRILLIC SMALL LETTER BLENDED YUS
+A65C,DISALLOWED,CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS
+A65D,PVALID,CYRILLIC SMALL LETTER IOTIFIED CLOSED LITTLE YUS
+A65E,DISALLOWED,CYRILLIC CAPITAL LETTER YN
+A65F,PVALID,CYRILLIC SMALL LETTER YN
+A660-A661,UNASSIGNED,<RESERVED>..<RESERVED>
+A662,DISALLOWED,CYRILLIC CAPITAL LETTER SOFT DE
+A663,PVALID,CYRILLIC SMALL LETTER SOFT DE
+A664,DISALLOWED,CYRILLIC CAPITAL LETTER SOFT EL
+A665,PVALID,CYRILLIC SMALL LETTER SOFT EL
+A666,DISALLOWED,CYRILLIC CAPITAL LETTER SOFT EM
+A667,PVALID,CYRILLIC SMALL LETTER SOFT EM
+A668,DISALLOWED,CYRILLIC CAPITAL LETTER MONOCULAR O
+A669,PVALID,CYRILLIC SMALL LETTER MONOCULAR O
+A66A,DISALLOWED,CYRILLIC CAPITAL LETTER BINOCULAR O
+A66B,PVALID,CYRILLIC SMALL LETTER BINOCULAR O
+A66C,DISALLOWED,CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O
+A66D-A66F,PVALID,CYRILLIC SMALL LETTER DOUBLE MONOCULAR O..COMBINING CYRILLIC VZMET
+A670-A673,DISALLOWED,COMBINING CYRILLIC TEN MILLIONS SIGN..SLAVONIC ASTERISK
+A674-A67B,UNASSIGNED,<RESERVED>..<RESERVED>
+A67C-A67D,PVALID,COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK
+A67E,DISALLOWED,CYRILLIC KAVYKA
+A67F,PVALID,CYRILLIC PAYEROK
+A680,DISALLOWED,CYRILLIC CAPITAL LETTER DWE
+A681,PVALID,CYRILLIC SMALL LETTER DWE
+A682,DISALLOWED,CYRILLIC CAPITAL LETTER DZWE
+A683,PVALID,CYRILLIC SMALL LETTER DZWE
+A684,DISALLOWED,CYRILLIC CAPITAL LETTER ZHWE
+A685,PVALID,CYRILLIC SMALL LETTER ZHWE
+A686,DISALLOWED,CYRILLIC CAPITAL LETTER CCHE
+A687,PVALID,CYRILLIC SMALL LETTER CCHE
+A688,DISALLOWED,CYRILLIC CAPITAL LETTER DZZE
+A689,PVALID,CYRILLIC SMALL LETTER DZZE
+A68A,DISALLOWED,CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK
+A68B,PVALID,CYRILLIC SMALL LETTER TE WITH MIDDLE HOOK
+A68C,DISALLOWED,CYRILLIC CAPITAL LETTER TWE
+A68D,PVALID,CYRILLIC SMALL LETTER TWE
+A68E,DISALLOWED,CYRILLIC CAPITAL LETTER TSWE
+A68F,PVALID,CYRILLIC SMALL LETTER TSWE
+A690,DISALLOWED,CYRILLIC CAPITAL LETTER TSSE
+A691,PVALID,CYRILLIC SMALL LETTER TSSE
+A692,DISALLOWED,CYRILLIC CAPITAL LETTER TCHE
+A693,PVALID,CYRILLIC SMALL LETTER TCHE
+A694,DISALLOWED,CYRILLIC CAPITAL LETTER HWE
+A695,PVALID,CYRILLIC SMALL LETTER HWE
+A696,DISALLOWED,CYRILLIC CAPITAL LETTER SHWE
+A697,PVALID,CYRILLIC SMALL LETTER SHWE
+A698-A69F,UNASSIGNED,<RESERVED>..<RESERVED>
+A6A0-A6E5,PVALID,BAMUM LETTER A..BAMUM LETTER KI
+A6E6-A6EF,DISALLOWED,BAMUM LETTER MO..BAMUM LETTER KOGHOM
+A6F0-A6F1,PVALID,BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
+A6F2-A6F7,DISALLOWED,BAMUM NJAEMLI..BAMUM QUESTION MARK
+A6F8-A6FF,UNASSIGNED,<RESERVED>..<RESERVED>
+A700-A716,DISALLOWED,MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR
+A717-A71F,PVALID,MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
+A720-A722,DISALLOWED,MODIFIER LETTER STRESS AND HIGH TONE..LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF
+A723,PVALID,LATIN SMALL LETTER EGYPTOLOGICAL ALEF
+A724,DISALLOWED,LATIN CAPITAL LETTER EGYPTOLOGICAL AIN
+A725,PVALID,LATIN SMALL LETTER EGYPTOLOGICAL AIN
+A726,DISALLOWED,LATIN CAPITAL LETTER HENG
+A727,PVALID,LATIN SMALL LETTER HENG
+A728,DISALLOWED,LATIN CAPITAL LETTER TZ
+A729,PVALID,LATIN SMALL LETTER TZ
+A72A,DISALLOWED,LATIN CAPITAL LETTER TRESILLO
+A72B,PVALID,LATIN SMALL LETTER TRESILLO
+A72C,DISALLOWED,LATIN CAPITAL LETTER CUATRILLO
+A72D,PVALID,LATIN SMALL LETTER CUATRILLO
+A72E,DISALLOWED,LATIN CAPITAL LETTER CUATRILLO WITH COMMA
+A72F-A731,PVALID,LATIN SMALL LETTER CUATRILLO WITH COMMA..LATIN LETTER SMALL CAPITAL S
+A732,DISALLOWED,LATIN CAPITAL LETTER AA
+A733,PVALID,LATIN SMALL LETTER AA
+A734,DISALLOWED,LATIN CAPITAL LETTER AO
+A735,PVALID,LATIN SMALL LETTER AO
+A736,DISALLOWED,LATIN CAPITAL LETTER AU
+A737,PVALID,LATIN SMALL LETTER AU
+A738,DISALLOWED,LATIN CAPITAL LETTER AV
+A739,PVALID,LATIN SMALL LETTER AV
+A73A,DISALLOWED,LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR
+A73B,PVALID,LATIN SMALL LETTER AV WITH HORIZONTAL BAR
+A73C,DISALLOWED,LATIN CAPITAL LETTER AY
+A73D,PVALID,LATIN SMALL LETTER AY
+A73E,DISALLOWED,LATIN CAPITAL LETTER REVERSED C WITH DOT
+A73F,PVALID,LATIN SMALL LETTER REVERSED C WITH DOT
+A740,DISALLOWED,LATIN CAPITAL LETTER K WITH STROKE
+A741,PVALID,LATIN SMALL LETTER K WITH STROKE
+A742,DISALLOWED,LATIN CAPITAL LETTER K WITH DIAGONAL STROKE
+A743,PVALID,LATIN SMALL LETTER K WITH DIAGONAL STROKE
+A744,DISALLOWED,LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE
+A745,PVALID,LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE
+A746,DISALLOWED,LATIN CAPITAL LETTER BROKEN L
+A747,PVALID,LATIN SMALL LETTER BROKEN L
+A748,DISALLOWED,LATIN CAPITAL LETTER L WITH HIGH STROKE
+A749,PVALID,LATIN SMALL LETTER L WITH HIGH STROKE
+A74A,DISALLOWED,LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY
+A74B,PVALID,LATIN SMALL LETTER O WITH LONG STROKE OVERLAY
+A74C,DISALLOWED,LATIN CAPITAL LETTER O WITH LOOP
+A74D,PVALID,LATIN SMALL LETTER O WITH LOOP
+A74E,DISALLOWED,LATIN CAPITAL LETTER OO
+A74F,PVALID,LATIN SMALL LETTER OO
+A750,DISALLOWED,LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER
+A751,PVALID,LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER
+A752,DISALLOWED,LATIN CAPITAL LETTER P WITH FLOURISH
+A753,PVALID,LATIN SMALL LETTER P WITH FLOURISH
+A754,DISALLOWED,LATIN CAPITAL LETTER P WITH SQUIRREL TAIL
+A755,PVALID,LATIN SMALL LETTER P WITH SQUIRREL TAIL
+A756,DISALLOWED,LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER
+A757,PVALID,LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER
+A758,DISALLOWED,LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE
+A759,PVALID,LATIN SMALL LETTER Q WITH DIAGONAL STROKE
+A75A,DISALLOWED,LATIN CAPITAL LETTER R ROTUNDA
+A75B,PVALID,LATIN SMALL LETTER R ROTUNDA
+A75C,DISALLOWED,LATIN CAPITAL LETTER RUM ROTUNDA
+A75D,PVALID,LATIN SMALL LETTER RUM ROTUNDA
+A75E,DISALLOWED,LATIN CAPITAL LETTER V WITH DIAGONAL STROKE
+A75F,PVALID,LATIN SMALL LETTER V WITH DIAGONAL STROKE
+A760,DISALLOWED,LATIN CAPITAL LETTER VY
+A761,PVALID,LATIN SMALL LETTER VY
+A762,DISALLOWED,LATIN CAPITAL LETTER VISIGOTHIC Z
+A763,PVALID,LATIN SMALL LETTER VISIGOTHIC Z
+A764,DISALLOWED,LATIN CAPITAL LETTER THORN WITH STROKE
+A765,PVALID,LATIN SMALL LETTER THORN WITH STROKE
+A766,DISALLOWED,LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER
+A767,PVALID,LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER
+A768,DISALLOWED,LATIN CAPITAL LETTER VEND
+A769,PVALID,LATIN SMALL LETTER VEND
+A76A,DISALLOWED,LATIN CAPITAL LETTER ET
+A76B,PVALID,LATIN SMALL LETTER ET
+A76C,DISALLOWED,LATIN CAPITAL LETTER IS
+A76D,PVALID,LATIN SMALL LETTER IS
+A76E,DISALLOWED,LATIN CAPITAL LETTER CON
+A76F,PVALID,LATIN SMALL LETTER CON
+A770,DISALLOWED,MODIFIER LETTER US
+A771-A778,PVALID,LATIN SMALL LETTER DUM..LATIN SMALL LETTER UM
+A779,DISALLOWED,LATIN CAPITAL LETTER INSULAR D
+A77A,PVALID,LATIN SMALL LETTER INSULAR D
+A77B,DISALLOWED,LATIN CAPITAL LETTER INSULAR F
+A77C,PVALID,LATIN SMALL LETTER INSULAR F
+A77D-A77E,DISALLOWED,LATIN CAPITAL LETTER INSULAR G..LATIN CAPITAL LETTER TURNED INSULAR G
+A77F,PVALID,LATIN SMALL LETTER TURNED INSULAR G
+A780,DISALLOWED,LATIN CAPITAL LETTER TURNED L
+A781,PVALID,LATIN SMALL LETTER TURNED L
+A782,DISALLOWED,LATIN CAPITAL LETTER INSULAR R
+A783,PVALID,LATIN SMALL LETTER INSULAR R
+A784,DISALLOWED,LATIN CAPITAL LETTER INSULAR S
+A785,PVALID,LATIN SMALL LETTER INSULAR S
+A786,DISALLOWED,LATIN CAPITAL LETTER INSULAR T
+A787-A788,PVALID,LATIN SMALL LETTER INSULAR T..MODIFIER LETTER LOW CIRCUMFLEX ACCENT
+A789-A78B,DISALLOWED,MODIFIER LETTER COLON..LATIN CAPITAL LETTER SALTILLO
+A78C,PVALID,LATIN SMALL LETTER SALTILLO
+A78D-A7FA,UNASSIGNED,<RESERVED>..<RESERVED>
+A7FB-A827,PVALID,LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI VOWEL SIGN OO
+A828-A82B,DISALLOWED,SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4
+A82C-A82F,UNASSIGNED,<RESERVED>..<RESERVED>
+A830-A839,DISALLOWED,NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC QUANTITY MARK
+A83A-A83F,UNASSIGNED,<RESERVED>..<RESERVED>
+A840-A873,PVALID,PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU
+A874-A877,DISALLOWED,PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD
+A878-A87F,UNASSIGNED,<RESERVED>..<RESERVED>
+A880-A8C4,PVALID,SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VIRAMA
+A8C5-A8CD,UNASSIGNED,<RESERVED>..<RESERVED>
+A8CE-A8CF,DISALLOWED,SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
+A8D0-A8D9,PVALID,SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
+A8DA-A8DF,UNASSIGNED,<RESERVED>..<RESERVED>
+A8E0-A8F7,PVALID,COMBINING DEVANAGARI DIGIT ZERO..DEVANAGARI SIGN CANDRABINDU AVAGRAHA
+A8F8-A8FA,DISALLOWED,DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET
+A8FB,PVALID,DEVANAGARI HEADSTROKE
+A8FC-A8FF,UNASSIGNED,<RESERVED>..<RESERVED>
+A900-A92D,PVALID,KAYAH LI DIGIT ZERO..KAYAH LI TONE CALYA PLOPHU
+A92E-A92F,DISALLOWED,KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA
+A930-A953,PVALID,REJANG LETTER KA..REJANG VIRAMA
+A954-A95E,UNASSIGNED,<RESERVED>..<RESERVED>
+A95F-A97C,DISALLOWED,REJANG SECTION MARK..HANGUL CHOSEONG SSANGYEORINHIEUH
+A97D-A97F,UNASSIGNED,<RESERVED>..<RESERVED>
+A980-A9C0,PVALID,JAVANESE SIGN PANYANGGA..JAVANESE PANGKON
+A9C1-A9CD,DISALLOWED,JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH
+A9CE,UNASSIGNED,<RESERVED>
+A9CF-A9D9,PVALID,JAVANESE PANGRANGKEP..JAVANESE DIGIT NINE
+A9DA-A9DD,UNASSIGNED,<RESERVED>..<RESERVED>
+A9DE-A9DF,DISALLOWED,JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN
+A9E0-A9FF,UNASSIGNED,<RESERVED>..<RESERVED>
+AA00-AA36,PVALID,CHAM LETTER A..CHAM CONSONANT SIGN WA
+AA37-AA3F,UNASSIGNED,<RESERVED>..<RESERVED>
+AA40-AA4D,PVALID,CHAM LETTER FINAL K..CHAM CONSONANT SIGN FINAL H
+AA4E-AA4F,UNASSIGNED,<RESERVED>..<RESERVED>
+AA50-AA59,PVALID,CHAM DIGIT ZERO..CHAM DIGIT NINE
+AA5A-AA5B,UNASSIGNED,<RESERVED>..<RESERVED>
+AA5C-AA5F,DISALLOWED,CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA
+AA60-AA76,PVALID,MYANMAR LETTER KHAMTI GA..MYANMAR LOGOGRAM KHAMTI HM
+AA77-AA79,DISALLOWED,MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO
+AA7A-AA7B,PVALID,MYANMAR LETTER AITON RA..MYANMAR SIGN PAO KAREN TONE
+AA7C-AA7F,UNASSIGNED,<RESERVED>..<RESERVED>
+AA80-AAC2,PVALID,TAI VIET LETTER LOW KO..TAI VIET TONE MAI SONG
+AAC3-AADA,UNASSIGNED,<RESERVED>..<RESERVED>
+AADB-AADD,PVALID,TAI VIET SYMBOL KON..TAI VIET SYMBOL SAM
+AADE-AADF,DISALLOWED,TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI
+AAE0-ABBF,UNASSIGNED,<RESERVED>..<RESERVED>
+ABC0-ABEA,PVALID,MEETEI MAYEK LETTER KOK..MEETEI MAYEK VOWEL SIGN NUNG
+ABEB,DISALLOWED,MEETEI MAYEK CHEIKHEI
+ABEC-ABED,PVALID,MEETEI MAYEK LUM IYEK..MEETEI MAYEK APUN IYEK
+ABEE-ABEF,UNASSIGNED,<RESERVED>..<RESERVED>
+ABF0-ABF9,PVALID,MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
+ABFA-ABFF,UNASSIGNED,<RESERVED>..<RESERVED>
+AC00-D7A3,PVALID,"<HANGUL SYLLABLE, FIRST>..<HANGUL SYLLABLE, LAST>"
+D7A4-D7AF,UNASSIGNED,<RESERVED>..<RESERVED>
+D7B0-D7C6,DISALLOWED,HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E
+D7C7-D7CA,UNASSIGNED,<RESERVED>..<RESERVED>
+D7CB-D7FB,DISALLOWED,HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH
+D7FC-D7FF,UNASSIGNED,<RESERVED>..<RESERVED>
+D800-FA0D,DISALLOWED,"<NON PRIVATE USE HIGH SURROGATE, FIRST>..CJK COMPATIBILITY IDEOGRAPH-FA0D"
+FA0E-FA0F,PVALID,CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F
+FA10,DISALLOWED,CJK COMPATIBILITY IDEOGRAPH-FA10
+FA11,PVALID,CJK COMPATIBILITY IDEOGRAPH-FA11
+FA12,DISALLOWED,CJK COMPATIBILITY IDEOGRAPH-FA12
+FA13-FA14,PVALID,CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14
+FA15-FA1E,DISALLOWED,CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPATIBILITY IDEOGRAPH-FA1E
+FA1F,PVALID,CJK COMPATIBILITY IDEOGRAPH-FA1F
+FA20,DISALLOWED,CJK COMPATIBILITY IDEOGRAPH-FA20
+FA21,PVALID,CJK COMPATIBILITY IDEOGRAPH-FA21
+FA22,DISALLOWED,CJK COMPATIBILITY IDEOGRAPH-FA22
+FA23-FA24,PVALID,CJK COMPATIBILITY IDEOGRAPH-FA23..CJK COMPATIBILITY IDEOGRAPH-FA24
+FA25-FA26,DISALLOWED,CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26
+FA27-FA29,PVALID,CJK COMPATIBILITY IDEOGRAPH-FA27..CJK COMPATIBILITY IDEOGRAPH-FA29
+FA2A-FA2D,DISALLOWED,CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D
+FA2E-FA2F,UNASSIGNED,<RESERVED>..<RESERVED>
+FA30-FA6D,DISALLOWED,CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D
+FA6E-FA6F,UNASSIGNED,<RESERVED>..<RESERVED>
+FA70-FAD9,DISALLOWED,CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
+FADA-FAFF,UNASSIGNED,<RESERVED>..<RESERVED>
+FB00-FB06,DISALLOWED,LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
+FB07-FB12,UNASSIGNED,<RESERVED>..<RESERVED>
+FB13-FB17,DISALLOWED,ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
+FB18-FB1C,UNASSIGNED,<RESERVED>..<RESERVED>
+FB1D,DISALLOWED,HEBREW LETTER YOD WITH HIRIQ
+FB1E,PVALID,HEBREW POINT JUDEO-SPANISH VARIKA
+FB1F-FB36,DISALLOWED,HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER ZAYIN WITH DAGESH
+FB37,UNASSIGNED,<RESERVED>
+FB38-FB3C,DISALLOWED,HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH
+FB3D,UNASSIGNED,<RESERVED>
+FB3E,DISALLOWED,HEBREW LETTER MEM WITH DAGESH
+FB3F,UNASSIGNED,<RESERVED>
+FB40-FB41,DISALLOWED,HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH
+FB42,UNASSIGNED,<RESERVED>
+FB43-FB44,DISALLOWED,HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH
+FB45,UNASSIGNED,<RESERVED>
+FB46-FBB1,DISALLOWED,HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
+FBB2-FBD2,UNASSIGNED,<RESERVED>..<RESERVED>
+FBD3-FD3F,DISALLOWED,ARABIC LETTER NG ISOLATED FORM..ORNATE RIGHT PARENTHESIS
+FD40-FD4F,UNASSIGNED,<RESERVED>..<RESERVED>
+FD50-FD8F,DISALLOWED,ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
+FD90-FD91,UNASSIGNED,<RESERVED>..<RESERVED>
+FD92-FDC7,DISALLOWED,ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
+FDC8-FDCF,UNASSIGNED,<RESERVED>..<RESERVED>
+FDD0-FDFD,DISALLOWED,<NOT A CHARACTER>..ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
+FDFE-FDFF,UNASSIGNED,<RESERVED>..<RESERVED>
+FE00-FE19,DISALLOWED,VARIATION SELECTOR-1..PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
+FE1A-FE1F,UNASSIGNED,<RESERVED>..<RESERVED>
+FE20-FE26,PVALID,COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
+FE27-FE2F,UNASSIGNED,<RESERVED>..<RESERVED>
+FE30-FE52,DISALLOWED,PRESENTATION FORM FOR VERTICAL TWO DOT LEADER..SMALL FULL STOP
+FE53,UNASSIGNED,<RESERVED>
+FE54-FE66,DISALLOWED,SMALL SEMICOLON..SMALL EQUALS SIGN
+FE67,UNASSIGNED,<RESERVED>
+FE68-FE6B,DISALLOWED,SMALL REVERSE SOLIDUS..SMALL COMMERCIAL AT
+FE6C-FE6F,UNASSIGNED,<RESERVED>..<RESERVED>
+FE70-FE72,DISALLOWED,ARABIC FATHATAN ISOLATED FORM..ARABIC DAMMATAN ISOLATED FORM
+FE73,PVALID,ARABIC TAIL FRAGMENT
+FE74,DISALLOWED,ARABIC KASRATAN ISOLATED FORM
+FE75,UNASSIGNED,<RESERVED>
+FE76-FEFC,DISALLOWED,ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
+FEFD-FEFE,UNASSIGNED,<RESERVED>..<RESERVED>
+FEFF,DISALLOWED,ZERO WIDTH NO-BREAK SPACE
+FF00,UNASSIGNED,<RESERVED>
+FF01-FFBE,DISALLOWED,FULLWIDTH EXCLAMATION MARK..HALFWIDTH HANGUL LETTER HIEUH
+FFBF-FFC1,UNASSIGNED,<RESERVED>..<RESERVED>
+FFC2-FFC7,DISALLOWED,HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E
+FFC8-FFC9,UNASSIGNED,<RESERVED>..<RESERVED>
+FFCA-FFCF,DISALLOWED,HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE
+FFD0-FFD1,UNASSIGNED,<RESERVED>..<RESERVED>
+FFD2-FFD7,DISALLOWED,HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU
+FFD8-FFD9,UNASSIGNED,<RESERVED>..<RESERVED>
+FFDA-FFDC,DISALLOWED,HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
+FFDD-FFDF,UNASSIGNED,<RESERVED>..<RESERVED>
+FFE0-FFE6,DISALLOWED,FULLWIDTH CENT SIGN..FULLWIDTH WON SIGN
+FFE7,UNASSIGNED,<RESERVED>
+FFE8-FFEE,DISALLOWED,HALFWIDTH FORMS LIGHT VERTICAL..HALFWIDTH WHITE CIRCLE
+FFEF-FFF8,UNASSIGNED,<RESERVED>..<RESERVED>
+FFF9-FFFF,DISALLOWED,INTERLINEAR ANNOTATION ANCHOR..<NOT A CHARACTER>
+10000-1000B,PVALID,LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE
+1000C,UNASSIGNED,<RESERVED>
+1000D-10026,PVALID,LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO
+10027,UNASSIGNED,<RESERVED>
+10028-1003A,PVALID,LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO
+1003B,UNASSIGNED,<RESERVED>
+1003C-1003D,PVALID,LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE
+1003E,UNASSIGNED,<RESERVED>
+1003F-1004D,PVALID,LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO
+1004E-1004F,UNASSIGNED,<RESERVED>..<RESERVED>
+10050-1005D,PVALID,LINEAR B SYMBOL B018..LINEAR B SYMBOL B089
+1005E-1007F,UNASSIGNED,<RESERVED>..<RESERVED>
+10080-100FA,PVALID,LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305
+100FB-100FF,UNASSIGNED,<RESERVED>..<RESERVED>
+10100-10102,DISALLOWED,AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK
+10103-10106,UNASSIGNED,<RESERVED>..<RESERVED>
+10107-10133,DISALLOWED,AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND
+10134-10136,UNASSIGNED,<RESERVED>..<RESERVED>
+10137-1018A,DISALLOWED,AEGEAN WEIGHT BASE UNIT..GREEK ZERO SIGN
+1018B-1018F,UNASSIGNED,<RESERVED>..<RESERVED>
+10190-1019B,DISALLOWED,ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN
+1019C-101CF,UNASSIGNED,<RESERVED>..<RESERVED>
+101D0-101FC,DISALLOWED,PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND
+101FD,PVALID,PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
+101FE-1027F,UNASSIGNED,<RESERVED>..<RESERVED>
+10280-1029C,PVALID,LYCIAN LETTER A..LYCIAN LETTER X
+1029D-1029F,UNASSIGNED,<RESERVED>..<RESERVED>
+102A0-102D0,PVALID,CARIAN LETTER A..CARIAN LETTER UUU3
+102D1-102FF,UNASSIGNED,<RESERVED>..<RESERVED>
+10300-1031E,PVALID,OLD ITALIC LETTER A..OLD ITALIC LETTER UU
+1031F,UNASSIGNED,<RESERVED>
+10320-10323,DISALLOWED,OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY
+10324-1032F,UNASSIGNED,<RESERVED>..<RESERVED>
+10330-10340,PVALID,GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA
+10341,DISALLOWED,GOTHIC LETTER NINETY
+10342-10349,PVALID,GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL
+1034A,DISALLOWED,GOTHIC LETTER NINE HUNDRED
+1034B-1037F,UNASSIGNED,<RESERVED>..<RESERVED>
+10380-1039D,PVALID,UGARITIC LETTER ALPA..UGARITIC LETTER SSU
+1039E,UNASSIGNED,<RESERVED>
+1039F,DISALLOWED,UGARITIC WORD DIVIDER
+103A0-103C3,PVALID,OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA
+103C4-103C7,UNASSIGNED,<RESERVED>..<RESERVED>
+103C8-103CF,PVALID,OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH
+103D0-103D5,DISALLOWED,OLD PERSIAN WORD DIVIDER..OLD PERSIAN NUMBER HUNDRED
+103D6-103FF,UNASSIGNED,<RESERVED>..<RESERVED>
+10400-10427,DISALLOWED,DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW
+10428-1049D,PVALID,DESERET SMALL LETTER LONG I..OSMANYA LETTER OO
+1049E-1049F,UNASSIGNED,<RESERVED>..<RESERVED>
+104A0-104A9,PVALID,OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
+104AA-107FF,UNASSIGNED,<RESERVED>..<RESERVED>
+10800-10805,PVALID,CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA
+10806-10807,UNASSIGNED,<RESERVED>..<RESERVED>
+10808,PVALID,CYPRIOT SYLLABLE JO
+10809,UNASSIGNED,<RESERVED>
+1080A-10835,PVALID,CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO
+10836,UNASSIGNED,<RESERVED>
+10837-10838,PVALID,CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE
+10839-1083B,UNASSIGNED,<RESERVED>..<RESERVED>
+1083C,PVALID,CYPRIOT SYLLABLE ZA
+1083D-1083E,UNASSIGNED,<RESERVED>..<RESERVED>
+1083F-10855,PVALID,CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW
+10856,UNASSIGNED,<RESERVED>
+10857-1085F,DISALLOWED,IMPERIAL ARAMAIC SECTION SIGN..IMPERIAL ARAMAIC NUMBER TEN THOUSAND
+10860-108FF,UNASSIGNED,<RESERVED>..<RESERVED>
+10900-10915,PVALID,PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU
+10916-1091B,DISALLOWED,PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE
+1091C-1091E,UNASSIGNED,<RESERVED>..<RESERVED>
+1091F,DISALLOWED,PHOENICIAN WORD SEPARATOR
+10920-10939,PVALID,LYDIAN LETTER A..LYDIAN LETTER C
+1093A-1093E,UNASSIGNED,<RESERVED>..<RESERVED>
+1093F,DISALLOWED,LYDIAN TRIANGULAR MARK
+10940-109FF,UNASSIGNED,<RESERVED>..<RESERVED>
+10A00-10A03,PVALID,KHAROSHTHI LETTER A..KHAROSHTHI VOWEL SIGN VOCALIC R
+10A04,UNASSIGNED,<RESERVED>
+10A05-10A06,PVALID,KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
+10A07-10A0B,UNASSIGNED,<RESERVED>..<RESERVED>
+10A0C-10A13,PVALID,KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI LETTER GHA
+10A14,UNASSIGNED,<RESERVED>
+10A15-10A17,PVALID,KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA
+10A18,UNASSIGNED,<RESERVED>
+10A19-10A33,PVALID,KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA
+10A34-10A37,UNASSIGNED,<RESERVED>..<RESERVED>
+10A38-10A3A,PVALID,KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
+10A3B-10A3E,UNASSIGNED,<RESERVED>..<RESERVED>
+10A3F,PVALID,KHAROSHTHI VIRAMA
+10A40-10A47,DISALLOWED,KHAROSHTHI DIGIT ONE..KHAROSHTHI NUMBER ONE THOUSAND
+10A48-10A4F,UNASSIGNED,<RESERVED>..<RESERVED>
+10A50-10A58,DISALLOWED,KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES
+10A59-10A5F,UNASSIGNED,<RESERVED>..<RESERVED>
+10A60-10A7C,PVALID,OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH
+10A7D-10A7F,DISALLOWED,OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMERIC INDICATOR
+10A80-10AFF,UNASSIGNED,<RESERVED>..<RESERVED>
+10B00-10B35,PVALID,AVESTAN LETTER A..AVESTAN LETTER HE
+10B36-10B38,UNASSIGNED,<RESERVED>..<RESERVED>
+10B39-10B3F,DISALLOWED,AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION
+10B40-10B55,PVALID,INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW
+10B56-10B57,UNASSIGNED,<RESERVED>..<RESERVED>
+10B58-10B5F,DISALLOWED,INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND
+10B60-10B72,PVALID,INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW
+10B73-10B77,UNASSIGNED,<RESERVED>..<RESERVED>
+10B78-10B7F,DISALLOWED,INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND
+10B80-10BFF,UNASSIGNED,<RESERVED>..<RESERVED>
+10C00-10C48,PVALID,OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH
+10C49-10E5F,UNASSIGNED,<RESERVED>..<RESERVED>
+10E60-10E7E,DISALLOWED,RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
+10E7F-1107F,UNASSIGNED,<RESERVED>..<RESERVED>
+11080-110BA,PVALID,KAITHI SIGN CANDRABINDU..KAITHI SIGN NUKTA
+110BB-110C1,DISALLOWED,KAITHI ABBREVIATION SIGN..KAITHI DOUBLE DANDA
+110C2-11FFF,UNASSIGNED,<RESERVED>..<RESERVED>
+12000-1236E,PVALID,CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM
+1236F-123FF,UNASSIGNED,<RESERVED>..<RESERVED>
+12400-12462,DISALLOWED,CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
+12463-1246F,UNASSIGNED,<RESERVED>..<RESERVED>
+12470-12473,DISALLOWED,CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON
+12474-12FFF,UNASSIGNED,<RESERVED>..<RESERVED>
+13000-1342E,PVALID,EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
+1342F-1CFFF,UNASSIGNED,<RESERVED>..<RESERVED>
+1D000-1D0F5,DISALLOWED,BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
+1D0F6-1D0FF,UNASSIGNED,<RESERVED>..<RESERVED>
+1D100-1D126,DISALLOWED,MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2
+1D127-1D128,UNASSIGNED,<RESERVED>..<RESERVED>
+1D129-1D1DD,DISALLOWED,MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL PES SUBPUNCTIS
+1D1DE-1D1FF,UNASSIGNED,<RESERVED>..<RESERVED>
+1D200-1D245,DISALLOWED,GREEK VOCAL NOTATION SYMBOL-1..GREEK MUSICAL LEIMMA
+1D246-1D2FF,UNASSIGNED,<RESERVED>..<RESERVED>
+1D300-1D356,DISALLOWED,MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING
+1D357-1D35F,UNASSIGNED,<RESERVED>..<RESERVED>
+1D360-1D371,DISALLOWED,COUNTING ROD UNIT DIGIT ONE..COUNTING ROD TENS DIGIT NINE
+1D372-1D3FF,UNASSIGNED,<RESERVED>..<RESERVED>
+1D400-1D454,DISALLOWED,MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
+1D455,UNASSIGNED,<RESERVED>
+1D456-1D49C,DISALLOWED,MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
+1D49D,UNASSIGNED,<RESERVED>
+1D49E-1D49F,DISALLOWED,MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
+1D4A0-1D4A1,UNASSIGNED,<RESERVED>..<RESERVED>
+1D4A2,DISALLOWED,MATHEMATICAL SCRIPT CAPITAL G
+1D4A3-1D4A4,UNASSIGNED,<RESERVED>..<RESERVED>
+1D4A5-1D4A6,DISALLOWED,MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K
+1D4A7-1D4A8,UNASSIGNED,<RESERVED>..<RESERVED>
+1D4A9-1D4AC,DISALLOWED,MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q
+1D4AD,UNASSIGNED,<RESERVED>
+1D4AE-1D4B9,DISALLOWED,MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D
+1D4BA,UNASSIGNED,<RESERVED>
+1D4BB,DISALLOWED,MATHEMATICAL SCRIPT SMALL F
+1D4BC,UNASSIGNED,<RESERVED>
+1D4BD-1D4C3,DISALLOWED,MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N
+1D4C4,UNASSIGNED,<RESERVED>
+1D4C5-1D505,DISALLOWED,MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B
+1D506,UNASSIGNED,<RESERVED>
+1D507-1D50A,DISALLOWED,MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G
+1D50B-1D50C,UNASSIGNED,<RESERVED>..<RESERVED>
+1D50D-1D514,DISALLOWED,MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q
+1D515,UNASSIGNED,<RESERVED>
+1D516-1D51C,DISALLOWED,MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y
+1D51D,UNASSIGNED,<RESERVED>
+1D51E-1D539,DISALLOWED,MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B
+1D53A,UNASSIGNED,<RESERVED>
+1D53B-1D53E,DISALLOWED,MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G
+1D53F,UNASSIGNED,<RESERVED>
+1D540-1D544,DISALLOWED,MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M
+1D545,UNASSIGNED,<RESERVED>
+1D546,DISALLOWED,MATHEMATICAL DOUBLE-STRUCK CAPITAL O
+1D547-1D549,UNASSIGNED,<RESERVED>..<RESERVED>
+1D54A-1D550,DISALLOWED,MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
+1D551,UNASSIGNED,<RESERVED>
+1D552-1D6A5,DISALLOWED,MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J
+1D6A6-1D6A7,UNASSIGNED,<RESERVED>..<RESERVED>
+1D6A8-1D7CB,DISALLOWED,MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD SMALL DIGAMMA
+1D7CC-1D7CD,UNASSIGNED,<RESERVED>..<RESERVED>
+1D7CE-1D7FF,DISALLOWED,MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
+1D800-1EFFF,UNASSIGNED,<RESERVED>..<RESERVED>
+1F000-1F02B,DISALLOWED,MAHJONG TILE EAST WIND..MAHJONG TILE BACK
+1F02C-1F02F,UNASSIGNED,<RESERVED>..<RESERVED>
+1F030-1F093,DISALLOWED,DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06
+1F094-1F0FF,UNASSIGNED,<RESERVED>..<RESERVED>
+1F100-1F10A,DISALLOWED,DIGIT ZERO FULL STOP..DIGIT NINE COMMA
+1F10B-1F10F,UNASSIGNED,<RESERVED>..<RESERVED>
+1F110-1F12E,DISALLOWED,PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ
+1F12F-1F130,UNASSIGNED,<RESERVED>..<RESERVED>
+1F131,DISALLOWED,SQUARED LATIN CAPITAL LETTER B
+1F132-1F13C,UNASSIGNED,<RESERVED>..<RESERVED>
+1F13D,DISALLOWED,SQUARED LATIN CAPITAL LETTER N
+1F13E,UNASSIGNED,<RESERVED>
+1F13F,DISALLOWED,SQUARED LATIN CAPITAL LETTER P
+1F140-1F141,UNASSIGNED,<RESERVED>..<RESERVED>
+1F142,DISALLOWED,SQUARED LATIN CAPITAL LETTER S
+1F143-1F145,UNASSIGNED,<RESERVED>..<RESERVED>
+1F146,DISALLOWED,SQUARED LATIN CAPITAL LETTER W
+1F147-1F149,UNASSIGNED,<RESERVED>..<RESERVED>
+1F14A-1F14E,DISALLOWED,SQUARED HV..SQUARED PPV
+1F14F-1F156,UNASSIGNED,<RESERVED>..<RESERVED>
+1F157,DISALLOWED,NEGATIVE CIRCLED LATIN CAPITAL LETTER H
+1F158-1F15E,UNASSIGNED,<RESERVED>..<RESERVED>
+1F15F,DISALLOWED,NEGATIVE CIRCLED LATIN CAPITAL LETTER P
+1F160-1F178,UNASSIGNED,<RESERVED>..<RESERVED>
+1F179,DISALLOWED,NEGATIVE SQUARED LATIN CAPITAL LETTER J
+1F17A,UNASSIGNED,<RESERVED>
+1F17B-1F17C,DISALLOWED,NEGATIVE SQUARED LATIN CAPITAL LETTER L..NEGATIVE SQUARED LATIN CAPITAL LETTER M
+1F17D-1F17E,UNASSIGNED,<RESERVED>..<RESERVED>
+1F17F,DISALLOWED,NEGATIVE SQUARED LATIN CAPITAL LETTER P
+1F180-1F189,UNASSIGNED,<RESERVED>..<RESERVED>
+1F18A-1F18D,DISALLOWED,CROSSED NEGATIVE SQUARED LATIN CAPITAL LETTER P..NEGATIVE SQUARED SA
+1F18E-1F18F,UNASSIGNED,<RESERVED>..<RESERVED>
+1F190,DISALLOWED,SQUARE DJ
+1F191-1F1FF,UNASSIGNED,<RESERVED>..<RESERVED>
+1F200,DISALLOWED,SQUARE HIRAGANA HOKA
+1F201-1F20F,UNASSIGNED,<RESERVED>..<RESERVED>
+1F210-1F231,DISALLOWED,SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-6253
+1F232-1F23F,UNASSIGNED,<RESERVED>..<RESERVED>
+1F240-1F248,DISALLOWED,TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
+1F249-1FFFD,UNASSIGNED,<RESERVED>..<RESERVED>
+1FFFE-1FFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
+20000-2A6D6,PVALID,"<CJK IDEOGRAPH EXTENSION B, FIRST>..<CJK IDEOGRAPH EXTENSION B, LAST>"
+2A6D7-2A6FF,UNASSIGNED,<RESERVED>..<RESERVED>
+2A700-2B734,PVALID,"<CJK IDEOGRAPH EXTENSION C, FIRST>..<CJK IDEOGRAPH EXTENSION C, LAST>"
+2B735-2F7FF,UNASSIGNED,<RESERVED>..<RESERVED>
+2F800-2FA1D,DISALLOWED,CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
+2FA1E-2FFFD,UNASSIGNED,<RESERVED>..<RESERVED>
+2FFFE-2FFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
+30000-3FFFD,UNASSIGNED,<RESERVED>..<RESERVED>
+3FFFE-3FFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
+40000-4FFFD,UNASSIGNED,<RESERVED>..<RESERVED>
+4FFFE-4FFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
+50000-5FFFD,UNASSIGNED,<RESERVED>..<RESERVED>
+5FFFE-5FFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
+60000-6FFFD,UNASSIGNED,<RESERVED>..<RESERVED>
+6FFFE-6FFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
+70000-7FFFD,UNASSIGNED,<RESERVED>..<RESERVED>
+7FFFE-7FFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
+80000-8FFFD,UNASSIGNED,<RESERVED>..<RESERVED>
+8FFFE-8FFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
+90000-9FFFD,UNASSIGNED,<RESERVED>..<RESERVED>
+9FFFE-9FFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
+A0000-AFFFD,UNASSIGNED,<RESERVED>..<RESERVED>
+AFFFE-AFFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
+B0000-BFFFD,UNASSIGNED,<RESERVED>..<RESERVED>
+BFFFE-BFFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
+C0000-CFFFD,UNASSIGNED,<RESERVED>..<RESERVED>
+CFFFE-CFFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
+D0000-DFFFD,UNASSIGNED,<RESERVED>..<RESERVED>
+DFFFE-DFFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
+E0000,UNASSIGNED,<RESERVED>
+E0001,DISALLOWED,LANGUAGE TAG
+E0002-E001F,UNASSIGNED,<RESERVED>..<RESERVED>
+E0020-E007F,DISALLOWED,TAG SPACE..CANCEL TAG
+E0080-E00FF,UNASSIGNED,<RESERVED>..<RESERVED>
+E0100-E01EF,DISALLOWED,VARIATION SELECTOR-17..VARIATION SELECTOR-256
+E01F0-EFFFD,UNASSIGNED,<RESERVED>..<RESERVED>
+EFFFE-10FFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
diff --git a/tools/import-messages.pl b/tools/import-messages.pl
new file mode 100644
index 000000000..4c13a859e
--- /dev/null
+++ b/tools/import-messages.pl
@@ -0,0 +1,326 @@
+#!/usr/bin/perl
+#
+# Copyright © 2013 Vivek Dasmohapatra <vivek@collabora.co.uk>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# * The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+=head1
+
+Take a single-language messages file and merge it back in to the
+NetSurf master messaged (i10n) file.
+
+=cut
+
+use strict;
+
+use Getopt::Long ();
+use Fcntl qw( O_CREAT O_EXCL O_WRONLY O_APPEND O_RDONLY O_WRONLY O_TRUNC );
+
+use constant GETOPT_OPTS => qw( auto_abbrev no_getopt_compat bundling );
+use constant GETOPT_SPEC =>
+ qw( output|o=s
+ input|i=s
+ lang|l=s
+ plat|platform|p=s
+ format|fmt|f=s
+ import|I=s
+ help|h|? );
+
+# default option values:
+my %opt = qw( plat any format messages );
+
+sub input_stream ($;$);
+sub output_stream ();
+sub usage ();
+sub parser ();
+
+sub main ()
+{
+ my $input;
+ my $output;
+ my $import;
+ my $parser;
+ my $opt_ok;
+ my @input;
+ my %message;
+ my $last_key;
+ my $last_plat;
+
+ # option parsing:
+ Getopt::Long::Configure( GETOPT_OPTS );
+ $opt_ok = Getopt::Long::GetOptions( \%opt, GETOPT_SPEC );
+
+ # allow input, import & output to be specified as non-option arguments:
+ if( @ARGV ) { $opt{input } ||= shift( @ARGV ) }
+ if( @ARGV ) { $opt{import} ||= shift( @ARGV ) }
+ if( @ARGV ) { $opt{output} ||= shift( @ARGV ) }
+
+ # open the appropriate streams and get the formatter and headers:
+ if( $opt_ok )
+ {
+ $input = input_stream( $opt{input} );
+ $import = input_stream( $opt{import}, 'import-file' );
+ $parser = parser();
+ $opt{plat} ||= 'any';
+ }
+
+ # double check the options are sane (and we weren't asked for the help)
+ if( !$opt_ok || $opt{help} || $opt{lang} !~ /^[a-z]{2}$/ )
+ {
+ usage();
+ }
+
+ @input = <$input>;
+ $output = output_stream();
+
+ $parser->( \%message, $import );
+
+ foreach ( @input )
+ {
+ use bytes;
+
+ my( $lang, $plat, $key );
+
+ if( /^([a-z]{2})\.([^.]+)\.([^:]+):/ )
+ {
+ ( $lang, $plat, $key ) = ( $1, $2, $3 );
+ }
+
+ if( $key || $message{ $last_key } )
+ {
+ #print( $output "## $last_key -> $key\n" );
+ # the key changed but we have a message for it still pending:
+ if( $last_key && $message{ $last_key } && ($key ne $last_key) )
+ {
+ my $plt = $last_plat;
+ my $str = $message{ $last_key };
+ my $msg = qq|$opt{lang}.$last_plat.$last_key:$str\n|;
+
+ print( $output $msg );
+ delete( $message{ $last_key } );
+
+ # if the line following our new translation is not blank,
+ # generate a synthetic group-separator:
+ if( !/^\s*$/ ) { print( $output "\n") }
+ }
+
+ $last_key = $key;
+ $last_plat = $plat;
+
+ if( $lang eq $opt{lang} )
+ {
+ my $val = $message{ $key };
+ if( $val &&
+ ( $opt{plat} eq 'any' || # all platforms ok
+ $opt{plat} eq $plat ) ) # specified platform matched
+ {
+ print( $output qq|$1.$2.$3:$val\n| );
+ delete( $message{ $key } );
+ next;
+ }
+ }
+ }
+
+ print( $output $_ );
+ }
+}
+
+main();
+
+sub usage ()
+{
+ my @fmt = map { s/::$//; $_ } keys(%{$::{'msgfmt::'}});
+ print( STDERR <<TXT );
+usage:
+ $0 -l lang-code \
+ [-p platform] [-f format] \
+ [-o output-file] [-i input-file] [-I import-file]
+
+ $0 -l lang-code … [input-file [import-file [output-file]]]
+
+ lang-code : en fr ko … (no default)
+ platform : any gtk ami (default 'any')
+ format : @fmt (default 'messages')
+ input-file : defaults to standard input
+ output-file: defaults to standard output
+ import-file: no default
+
+ The input-file may be the same as the output-file, in which case
+ it will be altered in place.
+TXT
+ exit(1);
+}
+
+sub input_stream ($;$)
+{
+ my $file = shift();
+ my $must_exist = shift();
+
+ if( $file )
+ {
+ my $ifh;
+
+ sysopen( $ifh, $file, O_RDONLY ) ||
+ die( "$0: Failed to open input file $file: $!\n" );
+
+ return $ifh;
+ }
+
+ if( $must_exist )
+ {
+ print( STDERR "No file specified for $must_exist\n" );
+ usage();
+ }
+
+ return \*STDIN;
+}
+
+sub output_stream ()
+{
+ if( $opt{output} )
+ {
+ my $ofh;
+
+ sysopen( $ofh, $opt{output}, O_CREAT|O_TRUNC|O_WRONLY ) ||
+ die( "$0: Failed to open output file $opt{output}: $!\n" );
+
+ return $ofh;
+ }
+
+ return \*STDOUT;
+}
+
+sub parser ()
+{
+ my $name = $opt{format};
+ my $func = "msgfmt::$name"->UNIVERSAL::can("parse");
+
+ return $func || die( "No handler found for format '$name'\n" );
+}
+
+# format implementations:
+{
+ package msgfmt::java;
+
+ sub unescape { $_[0] =~ s/\\([^abfnrtv])/$1/g; $_[0] }
+ sub parse
+ {
+ my $cache = shift();
+ my $stream = shift();
+
+ while ( <$stream> )
+ {
+ if( /([^#]\S+)\s*=\s?(.*)/ )
+ {
+ my $key = $1;
+ my $val = $2;
+ $cache->{ $key } = unescape( $val );
+ }
+ }
+ }
+}
+
+{
+ package msgfmt::messages; # native netsurf format
+
+ sub parse
+ {
+ my $cache = shift();
+ my $stream = shift();
+
+ while ( <$stream> )
+ {
+ if( /^([a-z]{2})\.([^.]+)\.([^:]+):(.*)/ )
+ {
+ my( $lang, $plat, $key, $val ) = ( $1, $2, $3, $4 );
+
+ if( $lang ne $opt{lang} ) { next }
+ if( $opt{plat} ne 'any' &&
+ $opt{plat} ne $plat &&
+ 'all' ne $plat ) { next }
+
+ $cache->{ $key } = $val;
+ }
+ }
+ }
+}
+
+{
+ package msgfmt::transifex;
+ use base 'msgfmt::java';
+
+ # the differences between transifex and java properties only matter in
+ # the outward direction: During import they can be treated the same way
+}
+
+{
+ package msgfmt::android;
+
+ ANDROID_XML:
+ {
+ package msgfmt::android::xml;
+
+ my @stack;
+ my $data;
+ my $key;
+ our $cache;
+
+ sub StartDocument ($) { @stack = (); $key = '' }
+ sub Text ($) { if( $key ) { $data .= $_ } }
+ sub PI ($$$) { }
+ sub EndDocument ($) { }
+
+ sub EndTag ($$)
+ {
+ pop( @stack );
+
+ if( !$key ) { return; }
+
+ $cache->{ $key } = $data;
+ $data = $key = '';
+ }
+
+ sub StartTag ($$)
+ {
+ push( @stack, $_[1] );
+
+ if( "@stack" eq "resources string" )
+ {
+ $data = '';
+ $key = $_{ name };
+ }
+ }
+ }
+
+ sub parse
+ {
+ require XML::Parser;
+
+ if( !$XML::Parser::VERSION )
+ {
+ die("XML::Parser required for android format support\n");
+ }
+
+ $msgfmt::android::xml::cache = shift();
+ my $stream = shift();
+ my $parser = XML::Parser->new( Style => 'Stream',
+ Pkg => 'msgfmt::android::xml' );
+ $parser->parse( $stream );
+ }
+}
diff --git a/tools/jenkins-build.sh b/tools/jenkins-build.sh
new file mode 100755
index 000000000..d31b233b4
--- /dev/null
+++ b/tools/jenkins-build.sh
@@ -0,0 +1,486 @@
+#!/bin/bash
+#
+# Copyright © 2013 Vincent Sanders <vince@netsurf-browser.org>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# * The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+# NetSurf continuous integration build script for jenkins
+#
+# This script is executed by jenkins to build netsurf itself
+#
+# Usage: jenkins-build.sh
+#
+
+# TARGET is set to the frontend target to build
+# HOST is set to the identifier of the toolchain doing the building
+# CC is the compiler (gcc or clang)
+# BUILD_NUMBER is the CI build number
+
+#####
+
+# set defaults - this is not retrivable from the jenkins environment
+OLD_ARTIFACT_COUNT=25
+
+################# Parameter and environment setup #####################
+
+#identifier for this specific build
+IDENTIFIER="$CC-${BUILD_NUMBER}"
+
+# Identifier for build which will be cleaned
+OLD_IDENTIFIER="$CC-$((BUILD_NUMBER - ${OLD_ARTIFACT_COUNT}))"
+
+# default atari architecture - bletch
+ATARIARCH=68020-60
+
+# make tool
+MAKE=make
+
+# NetSurf version number haiku needs it for package name
+NETSURF_VERSION="3.11"
+
+# Ensure the combination of target and toolchain works and set build
+# specific parameters too
+case ${TARGET} in
+ "riscos")
+ case ${HOST} in
+ "arm-unknown-riscos")
+ ;;
+
+ *)
+ echo "Target \"${TARGET}\" cannot be built on \"${HOST})\""
+ exit 1
+ ;;
+
+ esac
+
+ PKG_SRC=netsurf
+ PKG_SFX=.zip
+ ;;
+
+ "haiku")
+ case ${HOST} in
+ "i586-pc-haiku")
+ ;;
+
+ *)
+ echo "Target \"${TARGET}\" cannot be built on \"${HOST})\""
+ exit 1
+ ;;
+
+ esac
+
+ PKG_SRC="netsurf_x86-${NETSURF_VERSION}-1-x86_gcc2"
+ PKG_SFX=.hpkg
+ ;;
+
+
+ "windows")
+ case ${HOST} in
+ "i686-w64-mingw32")
+ ;;
+
+ *)
+ echo "Target \"${TARGET}\" cannot be built on \"${HOST})\""
+ exit 1
+ ;;
+
+ esac
+
+ PKG_SRC=netsurf-installer
+ PKG_SFX=.exe
+ ;;
+
+
+ "cocoa")
+ case ${HOST} in
+ "x86_64-apple-darwin14.5.0")
+ PATH=/opt/local/bin:/opt/local/sbin:${PATH}
+ ;;
+
+ "i686-apple-darwin10")
+ ;;
+
+ "powerpc-apple-darwin9")
+ ;;
+
+ *)
+ echo "Target \"${TARGET}\" cannot be built on \"${HOST})\""
+ exit 1
+ ;;
+
+ esac
+
+ IDENTIFIER="${HOST}-${IDENTIFIER}"
+ OLD_IDENTIFIER="${HOST}-${OLD_IDENTIFIER}"
+ PKG_SRC=NetSurf
+ PKG_SFX=.dmg
+ ;;
+
+
+ "amiga")
+ case ${HOST} in
+ "ppc-amigaos")
+ ;;
+
+ *)
+ echo "Target \"${TARGET}\" cannot be built on \"${HOST})\""
+ exit 1
+ ;;
+
+ esac
+
+ PKG_SRC=NetSurf_Amiga/netsurf
+ PKG_SFX=.lha
+ ;;
+
+
+ "amigaos3")
+ case ${HOST} in
+ "m68k-unknown-amigaos")
+ ;;
+
+ *)
+ echo "Target \"${TARGET}\" cannot be built on \"${HOST})\""
+ exit 1
+ ;;
+
+ esac
+
+ PKG_SRC=NetSurf_Amiga/netsurf
+ PKG_SFX=.lha
+ ;;
+
+
+ "atari")
+ case ${HOST} in
+ "m68k-atari-mint")
+ PKG_SRC=ns020
+ PKG_SFX=.zip
+ ;;
+
+ "m5475-atari-mint")
+ export GCCSDK_INSTALL_ENV=/opt/netsurf/m5475-atari-mint/env
+ export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/m5475-atari-mint/cross/bin
+ ATARIARCH=v4e
+ PKG_SRC=nsv4e
+ PKG_SFX=.zip
+ ;;
+
+ *)
+ echo "Target \"${TARGET}\" cannot be built on \"${HOST})\""
+ exit 1
+ ;;
+
+ esac
+
+ IDENTIFIER="${HOST}-${IDENTIFIER}"
+ OLD_IDENTIFIER="${HOST}-${OLD_IDENTIFIER}"
+ ;;
+
+
+ "gtk2")
+ case ${HOST} in
+ "x86_64-linux-gnu")
+ ;;
+
+ "arm-linux-gnueabihf")
+ ;;
+
+ "aarch64-linux-gnu")
+ ;;
+
+ amd64-unknown-openbsd*)
+ MAKE=gmake
+ ;;
+
+ x86_64-unknown-freebsd*)
+ MAKE=gmake
+ ;;
+
+ *)
+ echo "Target \"${TARGET}\" cannot be built on \"${HOST}\""
+ exit 1
+ ;;
+
+ esac
+
+ IDENTIFIER="${HOST}-${IDENTIFIER}"
+ OLD_IDENTIFIER="${HOST}-${OLD_IDENTIFIER}"
+ PKG_SRC=nsgtk2
+ PKG_SFX=
+ ;;
+
+
+ "gtk3")
+ case ${HOST} in
+ "x86_64-linux-gnu")
+ ;;
+
+ "arm-linux-gnueabihf")
+ ;;
+
+ "aarch64-linux-gnu")
+ ;;
+
+ amd64-unknown-openbsd*)
+ MAKE=gmake
+ ;;
+
+ x86_64-unknown-freebsd*)
+ MAKE=gmake
+ ;;
+
+ *)
+ echo "Target \"${TARGET}\" cannot be built on \"${HOST}\""
+ exit 1
+ ;;
+
+ esac
+
+ IDENTIFIER="${HOST}-${IDENTIFIER}"
+ OLD_IDENTIFIER="${HOST}-${OLD_IDENTIFIER}"
+ PKG_SRC=nsgtk3
+ PKG_SFX=
+ ;;
+
+
+ "framebuffer")
+ case ${HOST} in
+ "x86_64-linux-gnu")
+ ;;
+
+ arm-linux-gnueabihf)
+ ;;
+
+ "aarch64-linux-gnu")
+ ;;
+
+ "i686-apple-darwin10")
+ ;;
+
+ "powerpc-apple-darwin9")
+ ;;
+
+ amd64-unknown-openbsd*)
+ MAKE=gmake
+ ;;
+
+ x86_64-unknown-freebsd*)
+ MAKE=gmake
+ ;;
+
+ "arm-unknown-riscos")
+ export GCCSDK_INSTALL_ENV=/opt/netsurf/${HOST}/env
+ export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/${HOST}/cross/bin
+ ;;
+
+ "m68k-atari-mint")
+ export GCCSDK_INSTALL_ENV=/opt/netsurf/${HOST}/env
+ export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/${HOST}/cross/bin
+ ;;
+
+ "m5475-atari-mint")
+ ATARIARCH=v4e
+ export GCCSDK_INSTALL_ENV=/opt/netsurf/${HOST}/env
+ export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/${HOST}/cross/bin
+ ;;
+
+ "i686-w64-mingw32")
+ export GCCSDK_INSTALL_ENV=/opt/netsurf/${HOST}/env
+ export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/${HOST}/cross/bin
+ ;;
+
+ "ppc-amigaos")
+ export GCCSDK_INSTALL_ENV=/opt/netsurf/${HOST}/env
+ export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/${HOST}/cross/bin
+ ;;
+
+ "m68k-unknown-amigaos")
+ export GCCSDK_INSTALL_ENV=/opt/netsurf/${HOST}/env
+ export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/${HOST}/cross/bin
+ ;;
+
+ *)
+ echo "Target \"${TARGET}\" cannot be built on \"${HOST})\""
+ exit 1
+ ;;
+
+ esac
+
+ IDENTIFIER="${HOST}-${IDENTIFIER}"
+ OLD_IDENTIFIER="${HOST}-${OLD_IDENTIFIER}"
+ PKG_SRC=nsfb
+ PKG_SFX=
+ ;;
+
+
+ "monkey")
+ # monkey target can be built anywhere
+ case ${HOST} in
+ amd64-unknown-openbsd*)
+ MAKE=gmake
+ ;;
+
+ x86_64-unknown-freebsd*)
+ MAKE=gmake
+ ;;
+
+ "arm-unknown-riscos")
+ export GCCSDK_INSTALL_ENV=/opt/netsurf/${HOST}/env
+ export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/${HOST}/cross/bin
+ # headers and compiler combination throw these warnings
+ export CFLAGS="-Wno-redundant-decls -Wno-parentheses"
+ export LDFLAGS=-lcares
+ ;;
+
+ "m68k-atari-mint")
+ export GCCSDK_INSTALL_ENV=/opt/netsurf/${HOST}/env
+ export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/${HOST}/cross/bin
+ ;;
+
+ "m5475-atari-mint")
+ ATARIARCH=v4e
+ export GCCSDK_INSTALL_ENV=/opt/netsurf/${HOST}/env
+ export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/${HOST}/cross/bin
+ ;;
+
+ "i686-w64-mingw32")
+ export GCCSDK_INSTALL_ENV=/opt/netsurf/${HOST}/env
+ export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/${HOST}/cross/bin
+ ;;
+
+ "ppc-amigaos")
+ export GCCSDK_INSTALL_ENV=/opt/netsurf/${HOST}/env
+ export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/${HOST}/cross/bin
+ ;;
+
+ *)
+ echo "Target \"${TARGET}\" generic build on \"${HOST})\""
+ ;;
+
+ esac
+
+ IDENTIFIER="${HOST}-${IDENTIFIER}"
+ OLD_IDENTIFIER="${HOST}-${OLD_IDENTIFIER}"
+ PKG_SRC=nsmonkey
+ PKG_SFX=
+ ;;
+
+ *)
+ # TARGET must be in the environment and set correctly
+ echo "Unkown TARGET \"${TARGET}\""
+ exit 1
+ ;;
+
+esac
+
+# setup environment
+export PREFIX=${JENKINS_HOME}/artifacts-${HOST}
+export PKG_CONFIG_PATH=${PREFIX}/lib/pkgconfig
+export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${PREFIX}/lib
+export PATH=${PATH}:${PREFIX}/bin
+
+# configure ccache for clang
+if [ "${CC}" = "clang" ];then
+ export CCACHE_CPP2=yes
+ export CC="clang -Qunused-arguments"
+fi
+
+########### Use distcc if present ######
+
+DISTCC=distcc
+PARALLEL=1
+HAVE_DISTCC=$(${DISTCC} --version >/dev/null 2>&1 && echo "true" || echo "false")
+if [ ${HAVE_DISTCC} = "true" ];then
+ PARALLEL=$(${DISTCC} -j)
+ export PATH=/usr/lib/distcc:${PATH}
+ export DISTCC_DIR=${JENKINS_HOME}
+fi
+
+
+########### Prepare a Makefile.config ##################
+
+rm -f Makefile.config
+cat > Makefile.config <<EOF
+override NETSURF_LOG_LEVEL := DEBUG
+EOF
+
+########### Additional environment info ########
+
+uname -a
+
+
+########### Build from source ##################
+
+# Clean first
+${MAKE} clean
+
+# Do the Build
+${MAKE} -j ${PARALLEL} -k CI_BUILD=${BUILD_NUMBER} ATARIARCH=${ATARIARCH} Q=
+
+
+############ Package artifact construction ################
+
+# build the package file
+${MAKE} -k CI_BUILD=${BUILD_NUMBER} ATARIARCH=${ATARIARCH} PACKAGER="NetSurf Developers <support@netsurf-browser.org>" Q= package
+
+if [ ! -f "${PKG_SRC}${PKG_SFX}" ]; then
+ # unable to find package file
+ exit 1
+fi
+
+# create package checksum files
+
+# find md5sum binary
+MD5SUM=md5sum;
+command -v ${MD5SUM} >/dev/null 2>&1 || MD5SUM=md5
+command -v ${MD5SUM} >/dev/null 2>&1 || MD5SUM=echo
+
+# find sha256 binary name
+SHAR256SUM=sha256sum
+command -v ${SHAR256SUM} >/dev/null 2>&1 || SHAR256SUM=sha256
+command -v ${SHAR256SUM} >/dev/null 2>&1 || SHAR256SUM=echo
+
+${MD5SUM} "${PKG_SRC}${PKG_SFX}" > ${PKG_SRC}.md5
+${SHAR256SUM} "${PKG_SRC}${PKG_SFX}" > ${PKG_SRC}.sha256
+
+
+############ Package artifact deployment ################
+
+#destination for package artifacts
+DESTDIR=/srv/ci.netsurf-browser.org/html/builds/${TARGET}/
+
+NEW_ARTIFACT_TARGET="NetSurf-${IDENTIFIER}"
+OLD_ARTIFACT_TARGETS=""
+
+for SUFFIX in "${PKG_SFX}" .md5 .sha256;do
+ # copy the file to the output - always use scp as it works local or remote
+ scp "${PKG_SRC}${SUFFIX}" netsurf@ci.netsurf-browser.org:${DESTDIR}/${NEW_ARTIFACT_TARGET}${SUFFIX}
+
+ # remove the local file artifact
+ rm -f "${PKG_SRC}${SUFFIX}"
+
+ OLD_ARTIFACT_TARGETS="${OLD_ARTIFACT_TARGETS} ${DESTDIR}/NetSurf-${OLD_IDENTIFIER}${SUFFIX}"
+done
+
+
+############ Expired package artifact removal and latest linking ##############
+
+
+ssh netsurf@ci.netsurf-browser.org "rm -f ${OLD_ARTIFACT_TARGETS} ${DESTDIR}/LATEST && echo "${NEW_ARTIFACT_TARGET}${PKG_SFX}" > ${DESTDIR}/LATEST"
diff --git a/tools/memanalyze.pl b/tools/memanalyze.pl
new file mode 100755
index 000000000..57e107d11
--- /dev/null
+++ b/tools/memanalyze.pl
@@ -0,0 +1,380 @@
+#!/usr/bin/env perl
+#
+# Example input:
+#
+# MEM mprintf.c:1094 malloc(32) = e5718
+# MEM mprintf.c:1103 realloc(e5718, 64) = e6118
+# MEM sendf.c:232 free(f6520)
+
+my $mallocs=0;
+my $callocs=0;
+my $reallocs=0;
+my $strdups=0;
+my $showlimit;
+
+while(1) {
+ if($ARGV[0] eq "-v") {
+ $verbose=1;
+ shift @ARGV;
+ }
+ elsif($ARGV[0] eq "-t") {
+ $trace=1;
+ shift @ARGV;
+ }
+ elsif($ARGV[0] eq "-l") {
+ # only show what alloc that caused a memlimit failure
+ $showlimit=1;
+ shift @ARGV;
+ }
+ else {
+ last;
+ }
+}
+
+my $maxmem;
+
+sub newtotal {
+ my ($newtot)=@_;
+ # count a max here
+
+ if($newtot > $maxmem) {
+ $maxmem= $newtot;
+ }
+}
+
+my $file = $ARGV[0];
+
+if(! -f $file) {
+ print "Usage: memanalyze.pl [options] <dump file>\n",
+ "Options:\n",
+ " -l memlimit failure displayed\n",
+ " -v Verbose\n",
+ " -t Trace\n";
+ exit;
+}
+
+open(FILE, "<$file");
+
+if($showlimit) {
+ while(<FILE>) {
+ if(/^LIMIT.*memlimit$/) {
+ print $_;
+ last;
+ }
+ }
+ close(FILE);
+ exit;
+}
+
+
+
+while(<FILE>) {
+ chomp $_;
+ $line = $_;
+
+ if($line =~ /^LIMIT ([^ ]*):(\d*) (.*)/) {
+ # new memory limit test prefix
+ my $i = $3;
+ my ($source, $linenum) = ($1, $2);
+ if($trace && ($i =~ /([^ ]*) reached memlimit/)) {
+ print "LIMIT: $1 returned error at $source:$linenum\n";
+ }
+ }
+ elsif($line =~ /^MEM ([^ ]*):(\d*) (.*)/) {
+ # generic match for the filename+linenumber
+ $source = $1;
+ $linenum = $2;
+ $function = $3;
+
+ if($function =~ /free\(0x([0-9a-f]*)/) {
+ $addr = $1;
+ if(!exists $sizeataddr{$addr}) {
+ print "FREE ERROR: No memory allocated: $line\n";
+ }
+ elsif(-1 == $sizeataddr{$addr}) {
+ print "FREE ERROR: Memory freed twice: $line\n";
+ print "FREE ERROR: Previously freed at: ".$getmem{$addr}."\n";
+ }
+ else {
+ $totalmem -= $sizeataddr{$addr};
+ if($trace) {
+ print "FREE: malloc at ".$getmem{$addr}." is freed again at $source:$linenum\n";
+ printf("FREE: %d bytes freed, left allocated: $totalmem bytes\n", $sizeataddr{$addr});
+ }
+
+ newtotal($totalmem);
+ $frees++;
+
+ $sizeataddr{$addr}=-1; # set -1 to mark as freed
+ $getmem{$addr}="$source:$linenum";
+
+ }
+ }
+ elsif($function =~ /malloc\((\d*)\) = 0x([0-9a-f]*)/) {
+ $size = $1;
+ $addr = $2;
+
+ if($sizeataddr{$addr}>0) {
+ # this means weeeeeirdo
+ print "Mixed debug compile, rebuild curl now\n";
+ }
+
+ $sizeataddr{$addr}=$size;
+ $totalmem += $size;
+
+ if($trace) {
+ print "MALLOC: malloc($size) at $source:$linenum",
+ " makes totally $totalmem bytes\n";
+ }
+
+ newtotal($totalmem);
+ $mallocs++;
+
+ $getmem{$addr}="$source:$linenum";
+ }
+ elsif($function =~ /calloc\((\d*),(\d*)\) = 0x([0-9a-f]*)/) {
+ $size = $1*$2;
+ $addr = $3;
+
+ $arg1 = $1;
+ $arg2 = $2;
+
+ if($sizeataddr{$addr}>0) {
+ # this means weeeeeirdo
+ print "Mixed debug compile, rebuild curl now\n";
+ }
+
+ $sizeataddr{$addr}=$size;
+ $totalmem += $size;
+
+ if($trace) {
+ print "CALLOC: calloc($arg1,$arg2) at $source:$linenum",
+ " makes totally $totalmem bytes\n";
+ }
+
+ newtotal($totalmem);
+ $callocs++;
+
+ $getmem{$addr}="$source:$linenum";
+ }
+ elsif($function =~ /realloc\(0x([0-9a-f]*), (\d*)\) = 0x([0-9a-f]*)/) {
+ $oldaddr = $1;
+ $newsize = $2;
+ $newaddr = $3;
+
+ $totalmem -= $sizeataddr{$oldaddr};
+ if($trace) {
+ printf("REALLOC: %d less bytes and ", $sizeataddr{$oldaddr});
+ }
+ $sizeataddr{$oldaddr}=0;
+
+ $totalmem += $newsize;
+ $sizeataddr{$newaddr}=$newsize;
+
+ if($trace) {
+ printf("%d more bytes ($source:$linenum)\n", $newsize);
+ }
+
+ newtotal($totalmem);
+ $reallocs++;
+
+ $getmem{$oldaddr}="";
+ $getmem{$newaddr}="$source:$linenum";
+ }
+ elsif($function =~ /strdup\(0x([0-9a-f]*)\) \((\d*)\) = 0x([0-9a-f]*)/) {
+ # strdup(a5b50) (8) = df7c0
+
+ $dup = $1;
+ $size = $2;
+ $addr = $3;
+ $getmem{$addr}="$source:$linenum";
+ $sizeataddr{$addr}=$size;
+
+ $totalmem += $size;
+
+ if($trace) {
+ printf("STRDUP: $size bytes at %s, makes totally: %d bytes\n",
+ $getmem{$addr}, $totalmem);
+ }
+
+ newtotal($totalmem);
+ $strdups++;
+ }
+ elsif($function =~ /strndup\(0x([0-9a-f]*), (\d*)\) \((\d*)\) = 0x([0-9a-f]*)/) {
+ # strndup(a5b50, 20) (8) = df7c0
+
+ $dup = $1;
+ $limit = $2;
+ $size = $3;
+ $addr = $4;
+ $getmem{$addr}="$source:$linenum";
+ $sizeataddr{$addr}=$size;
+
+ $totalmem += $size;
+
+ if($trace) {
+ printf("STRDUP: $size bytes at %s, makes totally: %d bytes\n",
+ $getmem{$addr}, $totalmem);
+ }
+
+ newtotal($totalmem);
+ $strdups++;
+ }
+ else {
+ print "Not recognized input line: $function\n";
+ }
+ }
+ # FD url.c:1282 socket() = 5
+ elsif($_ =~ /^FD ([^ ]*):(\d*) (.*)/) {
+ # generic match for the filename+linenumber
+ $source = $1;
+ $linenum = $2;
+ $function = $3;
+
+ if($function =~ /socket\(\) = (\d*)/) {
+ $filedes{$1}=1;
+ $getfile{$1}="$source:$linenum";
+ $openfile++;
+ }
+ elsif($function =~ /accept\(\) = (\d*)/) {
+ $filedes{$1}=1;
+ $getfile{$1}="$source:$linenum";
+ $openfile++;
+ }
+ elsif($function =~ /sclose\((\d*)\)/) {
+ if($filedes{$1} != 1) {
+ print "Close without open: $line\n";
+ }
+ else {
+ $filedes{$1}=0; # closed now
+ $openfile--;
+ }
+ }
+ }
+ # FILE url.c:1282 fopen("blabla") = 0x5ddd
+ elsif($_ =~ /^FILE ([^ ]*):(\d*) (.*)/) {
+ # generic match for the filename+linenumber
+ $source = $1;
+ $linenum = $2;
+ $function = $3;
+
+ if($function =~ /fopen\(\"([^\"]*)\",\"([^\"]*)\"\) = (\(nil\)|0x([0-9a-f]*))/) {
+ if($3 eq "(nil)") {
+ ;
+ }
+ else {
+ $fopen{$4}=1;
+ $fopenfile{$4}="$source:$linenum";
+ $fopens++;
+ }
+ }
+ # fclose(0x1026c8)
+ elsif($function =~ /fclose\(0x([0-9a-f]*)\)/) {
+ if(!$fopen{$1}) {
+ print "fclose() without fopen(): $line\n";
+ }
+ else {
+ $fopen{$1}=0;
+ $fopens--;
+ }
+ }
+ }
+ # GETNAME url.c:1901 getnameinfo()
+ elsif($_ =~ /^GETNAME ([^ ]*):(\d*) (.*)/) {
+ # not much to do
+ }
+
+ # ADDR url.c:1282 getaddrinfo() = 0x5ddd
+ elsif($_ =~ /^ADDR ([^ ]*):(\d*) (.*)/) {
+ # generic match for the filename+linenumber
+ $source = $1;
+ $linenum = $2;
+ $function = $3;
+
+ if($function =~ /getaddrinfo\(\) = (\(nil\)|0x([0-9a-f]*))/) {
+ my $add = $2;
+ if($add eq "(nil)") {
+ ;
+ }
+ else {
+ $addrinfo{$add}=1;
+ $addrinfofile{$add}="$source:$linenum";
+ $addrinfos++;
+ }
+ }
+ # fclose(0x1026c8)
+ elsif($function =~ /freeaddrinfo\(0x([0-9a-f]*)\)/) {
+ if(!$addrinfo{$1}) {
+ print "freeaddrinfo() without getaddrinfo(): $line\n";
+ }
+ else {
+ $addrinfo{$1}=0;
+ $addrinfos--;
+ }
+ }
+
+
+ }
+ else {
+ print "Not recognized prefix line: $line\n";
+ }
+}
+close(FILE);
+
+if($totalmem) {
+ print "Leak detected: memory still allocated: $totalmem bytes\n";
+
+ for(keys %sizeataddr) {
+ $addr = $_;
+ $size = $sizeataddr{$addr};
+ if($size > 0) {
+ print "At $addr, there's $size bytes.\t";
+ print " allocated by ".$getmem{$addr}."\n";
+ $allocs{$getmem{$addr}}++;
+ $amount{$getmem{$addr}} += $size;
+ }
+ }
+
+ print "Summary by location of allocation:\n";
+ print "Allocs\tBytes\tLocation\n";
+ for (sort { $amount{$b} <=> $amount{$a} } keys %allocs) {
+ print "$allocs{$_}\t$amount{$_}\t$_\n";
+ }
+}
+
+if($openfile) {
+ for(keys %filedes) {
+ if($filedes{$_} == 1) {
+ print "Open file descriptor created at ".$getfile{$_}."\n";
+ }
+ }
+}
+
+if($fopens) {
+ print "Open FILE handles left at:\n";
+ for(keys %fopen) {
+ if($fopen{$_} == 1) {
+ print "fopen() called at ".$fopenfile{$_}."\n";
+ }
+ }
+}
+
+if($addrinfos) {
+ print "IPv6-style name resolve data left at:\n";
+ for(keys %addrinfofile) {
+ if($addrinfo{$_} == 1) {
+ print "getaddrinfo() called at ".$addrinfofile{$_}."\n";
+ }
+ }
+}
+
+if($verbose) {
+ print "Mallocs: $mallocs\n",
+ "Reallocs: $reallocs\n",
+ "Callocs: $callocs\n",
+ "Strdups: $strdups\n",
+ "Frees: $frees\n",
+ "Allocations: ".($mallocs + $callocs + $reallocs + $strdups)."\n";
+
+ print "Maximum allocated: $maxmem\n";
+}
diff --git a/tools/split-messages.c b/tools/split-messages.c
new file mode 100644
index 000000000..0bce7397f
--- /dev/null
+++ b/tools/split-messages.c
@@ -0,0 +1,549 @@
+/**
+ * \file
+ * simple tool to split fat messages file without the capabilities of
+ * the full tool but without the dependancy on perl.
+ *
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <zlib.h>
+
+#include "utils/errors.h"
+
+enum out_fmt {
+ OUTPUTFMT_NONE = 0,
+ OUTPUTFMT_MESSAGES,
+};
+
+/**
+ * parameters that control behaviour of tool
+ */
+struct param {
+ /**
+ * compress output
+ */
+ int compress;
+ /**
+ * select language
+ */
+ char *selected;
+ /**
+ * fallback language for items unavailable in selecte dlanguage
+ */
+ char *fallback;
+ int warnings;
+ char *platform;
+ enum out_fmt format;
+ char *infilename;
+ char *outfilename;
+};
+
+struct trnsltn_entry {
+ struct trnsltn_entry *next;
+ char *lang;
+ char *key;
+ char *value;
+};
+
+static nserror usage(int argc, char **argv)
+{
+ fprintf(stderr,
+ "Usage: %s -l lang [-z] [-d lang] [-W warning] [-o <file>] [-i <file>] [-p platform] [-f format] [<file> [<file>]]\n"
+ "Options:\n"
+ " -z Gzip output\n"
+ " -l lang Language to select for\n"
+ " -d lang Fallback language [default: en]\n"
+ " -W warning Warnings generated none, all [default: none]\n"
+ " -p platform Platform to select for any, gtk, ami [default: any]\n"
+ " -f format Output format [default: messages]\n"
+ " -i filename Input file\n"
+ " -o filename Output file\n",
+ argv[0]);
+ return NSERROR_OK;
+}
+
+/**
+ * process command line arguments
+ *
+ *
+ */
+static nserror process_cmdline(int argc, char **argv, struct param *param)
+{
+ int opt;
+
+ memset(param, 0, sizeof(*param));
+
+ while ((opt = getopt(argc, argv, "zl:d:W:o:i:p:f:")) != -1) {
+ switch (opt) {
+ case 'z':
+ param->compress = 1;
+ break;
+
+ case 'l':
+ param->selected = strdup(optarg);
+ break;
+
+ case 'd':
+ param->fallback = strdup(optarg);
+ break;
+
+ case 'W':
+ param->warnings = 1;
+ break;
+
+ case 'o':
+ param->outfilename = strdup(optarg);
+ break;
+
+ case 'i':
+ param->infilename = strdup(optarg);
+ break;
+
+ case 'p':
+ param->platform = strdup(optarg);
+ break;
+
+ case 'f':
+ if (strcmp(optarg, "messages") == 0) {
+ param->format = OUTPUTFMT_MESSAGES;
+ } else {
+ fprintf(stderr,
+ "output format %s not supported",
+ optarg);
+ usage(argc, argv);
+ return NSERROR_NOT_IMPLEMENTED;
+ }
+ break;
+
+ default:
+ usage(argc, argv);
+ return NSERROR_BAD_PARAMETER;
+ }
+ }
+
+ /* trailing filename arguments */
+ if (optind < argc) {
+ param->infilename = strdup(argv[optind]);
+ optind++;
+ }
+
+ if (optind < argc) {
+ param->outfilename = strdup(argv[optind]);
+ optind++;
+ }
+
+ /* parameter checks */
+ if (param->selected == NULL) {
+ fprintf(stderr, "A language to select must be specified\n");
+ usage(argc, argv);
+ return NSERROR_BAD_PARAMETER;
+ }
+
+ if (param->infilename == NULL) {
+ fprintf(stderr, "Input file required\n");
+ usage(argc, argv);
+ return NSERROR_BAD_PARAMETER;
+ }
+
+ if (param->outfilename == NULL) {
+ fprintf(stderr, "Output file required\n");
+ usage(argc, argv);
+ return NSERROR_BAD_PARAMETER;
+ }
+
+ if ((param->platform != NULL) &&
+ (strcmp(param->platform, "any") ==0)) {
+ free(param->platform);
+ param->platform = NULL;
+ }
+
+ /* defaults */
+ if (param->fallback == NULL) {
+ param->fallback = strdup("en");
+ }
+
+ if (param->format == OUTPUTFMT_NONE) {
+ param->format = OUTPUTFMT_MESSAGES;
+ }
+
+ return NSERROR_OK;
+}
+
+
+/**
+ * extract key/value from a line of input
+ *
+ * \retun NSERROR_OK and key_out and value_out updated
+ * NSERROR_NOT_FOUND if not a key/value input line
+ * NSERROR_INVALID if the line is and invalid format (missing colon)
+ */
+static nserror
+get_key_value(char *line, ssize_t linelen, char **key_out, char **value_out)
+{
+ char *key;
+ char *value;
+
+ /* skip leading whitespace for start of key */
+ for (key = line; *key != 0; key++) {
+ if ((*key != ' ') && (*key != '\t') && (*key != '\n')) {
+ break;
+ }
+ }
+
+ /* empty line or only whitespace */
+ if (*key == 0) {
+ return NSERROR_NOT_FOUND;
+ }
+
+ /* comment */
+ if (*key == '#') {
+ return NSERROR_NOT_FOUND;
+ }
+
+ /* get start of value */
+ for (value = key; *value != 0; value++) {
+ if (*value == ':') {
+ *value = 0;
+ value++;
+ break;
+ }
+ }
+
+ /* missing colon separator */
+ if (*value == 0) {
+ return NSERROR_INVALID;
+ }
+
+ /* remove delimiter from value */
+ if (line[linelen - 1] == '\n') {
+ linelen--;
+ line[linelen] = 0;
+ }
+
+ *key_out = key;
+ *value_out = value;
+ return NSERROR_OK;
+}
+
+
+/**
+ * extract language, platform and token elements from a string
+ */
+static nserror
+get_lang_plat_tok(char *str, char **lang_out, char **plat_out, char **tok_out)
+{
+ char *plat;
+ char *tok;
+
+ for (plat = str; *plat != 0; plat++) {
+ if (*plat == '.') {
+ *plat = 0;
+ plat++;
+ break;
+ }
+ }
+ if (*plat == 0) {
+ return NSERROR_INVALID;
+ }
+
+ for (tok = plat; *tok != 0; tok++) {
+ if (*tok == '.') {
+ *tok = 0;
+ tok++;
+ break;
+ }
+ }
+ if (*tok == 0) {
+ return NSERROR_INVALID;
+ }
+
+ *lang_out = str;
+ *plat_out = plat;
+ *tok_out = tok;
+
+ return NSERROR_OK;
+}
+
+
+/**
+ * reverse order of entries in a translation list
+ */
+static nserror
+translation_list_reverse(struct trnsltn_entry **tlist)
+{
+ struct trnsltn_entry *prev;
+ struct trnsltn_entry *next;
+ struct trnsltn_entry *curr;
+
+ prev = NULL;
+ next = NULL;
+ curr = *tlist;
+
+ while (curr != NULL) {
+ next = curr->next;
+ curr->next = prev;
+ prev = curr;
+ curr = next;
+ }
+
+ *tlist = prev;
+ return NSERROR_OK;
+}
+
+
+/**
+ * find a translation entry from a key
+ *
+ * \todo This implementation is imcomplete! it only considers the very
+ * first entry on the list. this introduces the odd ordering
+ * requirement for keys in the fatmessages file. This is done to avoid
+ * an O(n^2) list search for every line of input.
+ *
+ * \param tlist translation list head
+ * \param key The key of the translation to search for
+ * \param trans_out The sucessful result
+ * \return NSERROR_OK and trans_out updated on success else NSERROR_NOT_FOUND;
+ */
+static nserror
+translation_from_key(struct trnsltn_entry *tlist,
+ char *key,
+ struct trnsltn_entry **trans_out)
+{
+ if (tlist == NULL) {
+ return NSERROR_NOT_FOUND;
+ }
+
+ if (strcmp(tlist->key, key) != 0) {
+ return NSERROR_NOT_FOUND;
+ }
+
+ *trans_out = tlist;
+ return NSERROR_OK;
+}
+
+
+/**
+ * create and link an entry into translation list
+ */
+static nserror
+translation_add(struct trnsltn_entry **tlist,
+ const char *lang,
+ const char *key,
+ const char *value)
+{
+ struct trnsltn_entry *tnew;
+
+ tnew = malloc(sizeof(*tnew));
+ if (tnew == NULL) {
+ return NSERROR_NOMEM;
+ }
+ tnew->next = *tlist;
+ tnew->lang = strdup(lang);
+ tnew->key = strdup(key);
+ tnew->value = strdup(value);
+
+ *tlist = tnew;
+ return NSERROR_OK;
+}
+
+
+/**
+ * replace key and value on a translation entry
+ */
+static nserror
+translation_replace(struct trnsltn_entry *tran,
+ const char *lang,
+ const char *key,
+ const char *value)
+{
+ free(tran->lang);
+ tran->lang = strdup(lang);
+ free(tran->key);
+ tran->key = strdup(key);
+ free(tran->value);
+ tran->value = strdup(value);
+
+ return NSERROR_OK;
+}
+
+
+/**
+ * process a line of the input file
+ *
+ */
+static nserror
+messageline(struct param *param,
+ struct trnsltn_entry **tlist,
+ char *line, ssize_t linelen)
+{
+ nserror res;
+ char *key;
+ char *value;
+ char *lang;
+ char *plat;
+ char *tok;
+ struct trnsltn_entry *tran;
+
+ res = get_key_value(line, linelen, &key, &value);
+ if (res != NSERROR_OK) {
+ /* skip line as no valid key value pair found */
+ return res;
+ }
+
+ res = get_lang_plat_tok(key, &lang, &plat, &tok);
+ if (res != NSERROR_OK) {
+ /* malformed key */
+ return res;
+ }
+
+ if ((param->platform != NULL) &&
+ (strcmp(plat, "all") != 0) &&
+ (strcmp(plat, param->platform) != 0)) {
+ /* this translation is not for the selected platform */
+ return NSERROR_OK;
+ }
+
+ res = translation_from_key(*tlist, tok, &tran);
+ if (res == NSERROR_OK) {
+ if (strcmp(tran->lang, param->selected) != 0) {
+ /* current entry is not the selected language */
+ if (strcmp(lang, param->selected) == 0) {
+ /*
+ * new entry is in selected language and
+ * current entry is not
+ */
+ res = translation_replace(tran, lang, tok, value);
+ } else if ((strcmp(lang, param->fallback) != 0) &&
+ (strcmp(tran->lang, param->fallback) != 0)) {
+ /*
+ * new entry is in fallback language and
+ * current entry is not.
+ */
+ res = translation_replace(tran, lang, tok, value);
+ }
+ } else {
+ if (strcmp(tran->lang, lang) == 0) {
+ /* second entry with matching language */
+ res = translation_replace(tran, lang, tok, value);
+ }
+ }
+ } else if (res == NSERROR_NOT_FOUND) {
+ res = translation_add(tlist, lang, tok, value);
+ }
+
+ return res;
+}
+
+
+/**
+ * read fatmessages file and create a translation entry list
+ */
+static nserror
+fatmessages_read(struct param *param, struct trnsltn_entry **tlist)
+{
+ nserror res;
+ FILE *infile;
+ char *line = NULL;
+ size_t linealloc = 0;
+ ssize_t linelen;
+ int linenum = 0;
+
+ infile = fopen(param->infilename, "r");
+ if (infile == NULL) {
+ perror("Unable to open input file");
+ return NSERROR_NOT_FOUND;
+ }
+
+ while (1) {
+ linelen = getline(&line, &linealloc, infile);
+ if (linelen == -1) {
+ break;
+ }
+ linenum++;
+
+ res = messageline(param, tlist, line, linelen);
+ if ((res == NSERROR_INVALID) && (param->warnings > 0)) {
+ fprintf(stderr, "line %d Malformed: \"%s\"\n",
+ linenum, line);
+ }
+ }
+
+ fclose(infile);
+
+ res = translation_list_reverse(tlist);
+
+ return res;
+}
+
+
+/**
+ * write output in NetSurf messages format
+ */
+static nserror
+message_write(struct param *param, struct trnsltn_entry *tlist)
+{
+ gzFile outf;
+ const char *mode;
+
+ if (param->compress == 0) {
+ mode = "wbT";
+ } else {
+ mode = "wb9";
+ }
+
+ outf = gzopen(param->outfilename, mode);
+ if (outf == NULL) {
+ perror("Unable to open output file");
+ return NSERROR_PERMISSION;
+ }
+
+ gzprintf(outf,
+ "# This messages file is automatically generated from %s\n"
+ "# at build-time. Please go and edit that instead of this.\n\n",
+ param->infilename);
+
+ while (tlist != NULL) {
+ gzprintf(outf, "%s:%s\n", tlist->key, tlist->value);
+ tlist = tlist->next;
+ }
+
+ gzclose(outf);
+
+ return NSERROR_OK;
+}
+
+int main(int argc, char **argv)
+{
+ nserror res;
+ struct param param; /* control paramters */
+ struct trnsltn_entry *translations = NULL;
+
+ res = process_cmdline(argc, argv, &param);
+ if (res != NSERROR_OK) {
+ return EXIT_FAILURE;
+ }
+
+ res = fatmessages_read(&param, &translations);
+ if (res != NSERROR_OK) {
+ return EXIT_FAILURE;
+ }
+
+ switch (param.format) {
+ case OUTPUTFMT_NONE:
+ res = NSERROR_OK;
+ break;
+
+ case OUTPUTFMT_MESSAGES:
+ res = message_write(&param, translations);
+ break;
+ }
+
+ if (res != NSERROR_OK) {
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tools/split-messages.pl b/tools/split-messages.pl
new file mode 100644
index 000000000..0504b24c1
--- /dev/null
+++ b/tools/split-messages.pl
@@ -0,0 +1,318 @@
+#!/usr/bin/perl
+#
+# Copyright 2013 Vivek Dasmohapatra <vivek@collabora.co.uk>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# * The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+=head1
+
+Filter the NetSurf combined messages (i10n) file according to language
+and platform and generate output in a selection of formats for use
+both internally within netsurf and externally for translation
+services.
+
+=cut
+
+use strict;
+
+use Getopt::Long ();
+use Fcntl qw( O_CREAT O_EXCL O_WRONLY O_APPEND O_RDONLY O_WRONLY );
+
+use IO::Compress::Gzip;
+
+use constant GETOPT_OPTS => qw( auto_abbrev no_getopt_compat bundling );
+use constant GETOPT_SPEC =>
+ qw( output|o=s
+ input|i=s
+ lang|l=s
+ dlang|d=s
+ plat|platform|p=s
+ format|fmt|f=s
+ warning|W=s
+ gzip|z
+ help|h|? );
+
+# default option values:
+my %opt = qw( dlang en plat any format messages warning none );
+
+sub input_stream ();
+sub output_stream ();
+sub formatter ();
+sub static_section($);
+sub usage ();
+
+sub main ()
+{
+ my $input;
+ my $output;
+ my $format;
+ my $header;
+ my $footer;
+ my $opt_ok;
+
+ # option parsing:
+ Getopt::Long::Configure( GETOPT_OPTS );
+ $opt_ok = Getopt::Long::GetOptions( \%opt, GETOPT_SPEC );
+
+ # allow input and output to be specified as non-option arguments:
+ if( @ARGV ) { $opt{input } ||= shift( @ARGV ) }
+ if( @ARGV ) { $opt{output} ||= shift( @ARGV ) }
+
+ # open the appropriate streams and get the formatter and headers:
+ if( $opt_ok )
+ {
+ $input = input_stream();
+ $output = output_stream();
+ $format = formatter();
+ $header = static_section('header');
+ $footer = static_section('footer');
+ }
+
+ # double check the options are sane (and we weren't asked for the help)
+ if( !$opt_ok || $opt{help} || $opt{lang} !~ /^[a-z]{2}$/ || $opt{dlang} !~ /^[a-z]{2}$/ )
+ {
+ usage();
+ }
+
+ # we are good to go:
+ print( $output $header );
+
+ my $cur_key;
+
+ my $dlang_key;
+ my $dlang_val;
+
+ my $tran_out = 1;
+ my $tran_val;
+ my $tran_key;
+
+ while (<$input>)
+ {
+ # skip comment and empty lines
+ /^#/ && next;
+ /^\s*$/ && next;
+
+ # only parsing things that look like message lines:
+ if( /^([a-z]{2}).([^.]+).([^:]+):(.*)/ )
+ {
+ my( $lang, $plat, $key, $val ) = ( $1, $2, $3, $4 );
+
+ # skip the line if it is not for our target platform
+ if( $opt{plat} ne 'any' &&
+ $opt{plat} ne $plat &&
+ 'all' ne $plat )
+ {
+ next;
+ }
+
+ # On key change ensure a translation has been generated
+ if ($cur_key ne $key)
+ {
+ if ($tran_out == 0)
+ {
+ # No translaton for previous key
+ if ($cur_key eq $dlang_key)
+ {
+ print( $output $format->( $dlang_key, $dlang_val ) );
+ if( $opt{warning} eq "fb" )
+ {
+ warn( "warning: $dlang_key missing translation in $opt{lang} using $opt{dlang} instead" );
+ }
+ }
+ else
+ {
+ # No translation and nothing in default language
+ warn( "warning: $dlang_key missing translation in $opt{lang} and no fallback in $opt{dlang}" );
+ }
+ }
+ else
+ {
+ if (($opt{dlang} ne $opt{lang} ) && ($tran_key eq $dlang_key) && ($tran_val eq $dlang_val))
+ {
+ if( $opt{warning} eq "dup" )
+ {
+ warn( "warning: $tran_key value in $opt{lang} is same as in default $opt{dlang}" );
+ }
+ }
+ }
+ $cur_key = $key;
+ $tran_out = 0;
+ }
+
+ # capture the key/value in the default language
+ if( $lang eq $opt{dlang} )
+ {
+ $dlang_key = $key;
+ $dlang_val = $val;
+ }
+
+ # output if its the target language
+ if( $lang eq $opt{lang} ) {
+ print( $output $format->( $key, $val ) );
+ $tran_out = 1;
+ $tran_val = $val;
+ $tran_key = $key;
+ }
+ }
+ else
+ {
+ warn( "Malformed entry: $_" );
+ }
+ }
+
+ print( $output $footer );
+}
+
+main();
+
+sub usage ()
+{
+ my @fmt = map { s/::$//; $_ } keys(%{$::{'msgfmt::'}});
+ print(STDERR <<TXT );
+usage:
+ $0 -l lang-code [-d def-lang-code] [-W warning] \
+ [-o output-file] [-i input-file] [-p platform] [-f format] [-z]
+
+ $0 -l lang-code ... [input-file [output-file]]
+
+ lang-code : en fr ko ... (no default)
+ def-lang-code : en fr ko ... (default 'en')
+ warning : none, all (default 'none')
+ platform : any gtk ami (default 'any')
+ format : @fmt (default 'messages')
+ input-file : defaults to standard input
+ output-file : defaults to standard output
+TXT
+ exit(1);
+}
+
+sub input_stream ()
+{
+ if( $opt{input} )
+ {
+ my $ifh;
+
+ sysopen( $ifh, $opt{input}, O_RDONLY ) ||
+ die( "$0: Failed to open input file $opt{input}: $!\n" );
+
+ return $ifh;
+ }
+
+ return \*STDIN;
+}
+
+sub underlying_output_stream ()
+{
+ if( $opt{output} )
+ {
+ my $ofh;
+
+ sysopen( $ofh, $opt{output}, O_CREAT|O_EXCL|O_APPEND|O_WRONLY ) ||
+ die( "$0: Failed to open output file $opt{output}: $!\n" );
+
+ return $ofh;
+ }
+
+ return \*STDOUT;
+}
+
+sub output_stream ()
+{
+ my $ofh = underlying_output_stream();
+
+ if( $opt{gzip} )
+ {
+ $ofh = new IO::Compress::Gzip( $ofh, AutoClose => 1, -Level => 9 );
+ }
+
+ return $ofh;
+}
+
+sub formatter ()
+{
+ my $name = $opt{format};
+ my $func = "msgfmt::$name"->UNIVERSAL::can("format");
+
+ return $func || die( "No handler found for format '$name'\n" );
+}
+
+sub static_section ($)
+{
+ my $name = $opt{format};
+ my $sect = shift();
+ my $func = "msgfmt::$name"->UNIVERSAL::can( $sect );
+
+ return $func ? $func->() : "";
+}
+
+# format implementations:
+{
+ package msgfmt::java;
+
+ sub escape { $_[0] =~ s/([:'\\])/\\$1/g; $_[0] }
+ sub format { return join(' = ', $_[0], escape( $_[1] ) ) . "\n" }
+ sub header { "# autogenerated from " . ($opt{input} || '-stdin-') . "\n" }
+}
+
+{
+ package msgfmt::messages; # native netsurf format
+
+ sub format { return join( ":", @_ ) . "\n" }
+ sub header
+ {
+ my $in = $opt{input} || '-stdin-';
+ return <<TXT;
+# This messages file is automatically generated from $in
+# at build-time. Please go and edit that instead of this.\n
+TXT
+ }
+}
+
+{
+ package msgfmt::transifex;
+ use base 'msgfmt::java';
+
+ # transifex has the following quirks:
+ # \ processing is buggy - they re-process every \\ as a \
+ # so \\n, instead or producing literal '\n', is interpreted as \ ^J
+ # Additionally, although the java properties format specifies
+ # that ' should be \ escaped, transifex does not allow/support this:
+ sub escape { $_[0] =~ s/(:|\\(?![abfnrtv]))/\\$1/g; $_[0] }
+ sub format { return join(' = ', $_[0], escape( $_[1] ) ) . "\n" }
+}
+
+########### YAML ###########
+#{
+# package msgfmt::yaml;
+# use YAML qw(Dump Bless);
+# print Dump %data;
+#}
+
+{
+ package msgfmt::android;
+
+ sub header { qq|<?xml version="1.0" encoding="utf-8"?>\n<resources>\n| }
+ sub footer { qq|</resources>| }
+ sub format
+ {
+ use HTML::Entities qw(encode_entities);
+ my $escaped = encode_entities( $_[1], '<>&"' );
+ qq| <string name="$_[0]">$escaped</string>\n|;
+ }
+}
diff --git a/tools/test-netsurf b/tools/test-netsurf
new file mode 100755
index 000000000..90c7e121b
--- /dev/null
+++ b/tools/test-netsurf
@@ -0,0 +1,35 @@
+#!/bin/sh
+# This file is part of NetSurf, http://netsurf-browser.org/
+# Licensed under the GNU General Public License,
+# http://www.opensource.org/licenses/gpl-license
+# Copyright 2007 Rob Kendrick <rjek@netsurf-browser.org>
+#
+# This launcher script is meant only for running nsgtk from inside the
+# build tree, with some debugging enabled. It is not meant for day-to-day
+# or packaged use!
+
+if [ -d ~/.netsurf ]; then
+ LOG=~/.netsurf/log.txt
+elif [ -d /tmp ]; then
+ LOG=/tmp/netsurf-log.txt
+else
+ LOG=netsurf-log.txt
+fi
+
+echo $0: using $LOG as logfile
+
+ulimit -c unlimited
+
+if [ "x$1" = "x--gdb" ]; then
+ GDB="gdb --args"
+ echo
+ echo
+ echo "**********************************************************"
+ echo "Remember to type 'run' and press ENTER once gdb has loaded"
+ echo "**********************************************************"
+ echo
+ echo
+ shift
+fi
+
+exec $GDB $PREFIX/bin/netsurf -v "$@" 2>&1 | tee $LOG
diff --git a/tools/valgrind.supp b/tools/valgrind.supp
new file mode 100644
index 000000000..f1a27f4b4
--- /dev/null
+++ b/tools/valgrind.supp
@@ -0,0 +1,14 @@
+# Valgrind suppression file for NetSurf
+
+# Suppress a valgrind message about use of uninitialized memory in strchrnul().
+# This use is OK because it provides only a speedup.
+{
+ strchrnul-addr4
+ Memcheck:Addr4
+ fun:strchrnul
+}
+{
+ strchrnul-addr8
+ Memcheck:Addr8
+ fun:strchrnul
+}
diff --git a/tools/xxd.c b/tools/xxd.c
new file mode 100644
index 000000000..a4e5dac5b
--- /dev/null
+++ b/tools/xxd.c
@@ -0,0 +1,135 @@
+/*
+ * xxd utility
+ *
+ * Copyright 2020 Lars Wirzenius
+ * Copyright 2020 Vincent Sanders <vince@netsurf-browser.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+
+static char *get_array_name(const char *fname)
+{
+ int fnamelen;
+ char *aryname;
+ int idx;
+
+ fnamelen = strlen(fname);
+ aryname = malloc(fnamelen + 1);
+
+ if (aryname != NULL) {
+ for (idx = 0; idx < fnamelen; idx++) {
+ int c = fname[idx];
+ if ((c >= '0' && c <= '9') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= 'a' && c <= 'z')) {
+ aryname[idx] = fname[idx];
+ } else {
+ aryname[idx] = '_';
+ }
+ }
+ aryname[idx] = 0;
+ }
+ return aryname;
+}
+
+int main(int argc, char **argv)
+{
+ int inc = 0;
+ int opt;
+ int c, n;
+ FILE *inf;
+ FILE *outf;
+ char *aryname = NULL;
+ int outlen;
+
+ while ((opt = getopt(argc, argv, "i")) != -1) {
+ switch (opt) {
+ case 'i':
+ inc = 1;
+ break;
+
+ default: /* '?' */
+ fprintf(stderr, "Usage: %s [-i] [infile [outfile]]]\n",
+ argv[0]);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ if (optind < argc) {
+ inf = fopen(argv[optind], "r");
+ if (inf == NULL) {
+ perror("Opening for read");
+ exit(EXIT_FAILURE);
+ }
+ aryname = get_array_name(argv[optind]);
+ optind++;
+ } else {
+ inf = stdin;
+
+ }
+
+ if (optind < argc) {
+ outf = fopen(argv[optind], "w");
+ if (outf == NULL) {
+ perror("Opening for write");
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ outf = stdout;
+ }
+
+ if ((inc != 0) && (aryname != NULL)) {
+ fprintf(outf, "unsigned char %s[] = {\n", aryname);
+ }
+
+ outlen = 0;
+ n = 0;
+ while ((c = getc(inf)) != EOF) {
+ if (n == 0) {
+ fprintf(outf, " ");
+ }
+ fprintf(outf, " 0x%02x,", c);
+ n += 1;
+ outlen++;
+ if (n >= 12) {
+ fprintf(outf, "\n");
+ n = 0;
+ }
+ }
+ if (n > 0) {
+ fprintf(outf, "\n");
+ }
+
+ if ((inc != 0) && (aryname != NULL)) {
+ fprintf(outf, "};\nunsigned int %s_len = %d;\n",
+ aryname, outlen);
+ }
+
+
+
+ fclose(outf);
+ fclose(inf);
+
+ return 0;
+}