diff options
-rw-r--r-- | build/tools/gentranstab.pl | 344 | ||||
-rw-r--r-- | doc/ChangeLog | 1 | ||||
-rw-r--r-- | src/Makefile | 6 | ||||
-rw-r--r-- | src/iconv.c | 77 | ||||
-rw-r--r-- | src/internal.h | 7 | ||||
-rw-r--r-- | src/transtab | 1689 | ||||
-rw-r--r-- | test/INDEX | 1 | ||||
-rw-r--r-- | test/Makefile | 2 | ||||
-rw-r--r-- | test/translit.c | 94 |
9 files changed, 2179 insertions, 42 deletions
diff --git a/build/tools/gentranstab.pl b/build/tools/gentranstab.pl new file mode 100644 index 0000000..0e9205a --- /dev/null +++ b/build/tools/gentranstab.pl @@ -0,0 +1,344 @@ +#!/usr/bin/perl + +use warnings; +use strict; + +# Usage: gentranstab.pl <path to transtab> + +usage() if (@ARGV != 1); + +my $transtab = shift @ARGV; + +open TRANSTAB,"<$transtab" or die "Failed opening $transtab: $!\n"; + +print <<EOF; +/* This file is autogenerated. Manual changes will be lost */ + +#include <assert.h> +#include <inttypes.h> +#include <stdbool.h> + +#include "internal.h" + +static int translit_write_character(struct encoding_context *e, + UCS4 c, char **buffer, size_t *buflen, bool use_transout) +{ + Encoding *out = use_transout ? e->transout : e->out; + int ret; + + if (out != NULL) { + char *prev_buf = *buffer; + size_t prev_len = *buflen; + + ret = encoding_write(out, c, buffer, (int *) buflen); + + if (ret <= 0) + *buflen = prev_len - (*buffer - prev_buf); + } else { + ret = iconv_eightbit_write(e, c, buffer, (int *) buflen); + } + + return ret; +} + +static int translit_try_sequence(struct encoding_context *e, + const size_t seqlen, const UCS2 *replacement) +{ + char *tmpbuf, *ptmpbuf; + size_t orig_tmplen, tmplen, index; + int ret = 1; + + /* First, determine if sequence can be written to target encoding */ + /* Worst case: conversion to UTF-8 (needing 6 bytes per character) */ + orig_tmplen = tmplen = (seqlen + 1) * 6; + ptmpbuf = tmpbuf = malloc(tmplen); + if (tmpbuf == NULL) + return 0; + + /* Reset the transout codec */ + if (e->transout != NULL) { + encoding_reset(e->transout); + encoding_set_flags(e->transout, e->outflags, e->outflags); + } + + for (index = 0; index < seqlen; index++) { + UCS4 c = replacement[index]; + do { + ret = translit_write_character(e, c, &ptmpbuf, + &tmplen, true); + if (ret == 0) { + char *tmp = realloc(tmpbuf, orig_tmplen * 2); + if (tmp == NULL) + break; + + ptmpbuf = tmp + (ptmpbuf - tmpbuf); + tmpbuf = tmp; + tmplen += orig_tmplen; + orig_tmplen *= 2; + } + } while (ret == 0); + + if (ret <= 0) + break; + } + + free(tmpbuf); + + if (ret <= 0) { + /* Consider lack of memory an inability to write the output */ + return -1; + } + + e->substitution = replacement; + e->substlen = seqlen; + + /* Emit replacement for real */ + return translit_flush_replacement(e); +} + +int translit_flush_replacement(struct encoding_context *e) +{ + const UCS2 *substitution = e->substitution; + size_t substlen = e->substlen; + int ret = 1; + + while (substlen > 0) { + UCS4 c = substitution[0]; + + ret = translit_write_character(e, c, + e->outbuf, e->outbytesleft, false); + assert(ret != -1); + if (ret <= 0) + break; + + substitution++; + substlen--; + } + + e->substitution = substitution; + e->substlen = substlen; + + return ret; +} + +EOF + +# Map from codepoint -> ttvals ref +# ttvals is a list of chars ref +my %transmap = (); +# Length, in characters, of longest substitution string seen so far +my $maxsubst = 0; +# Total number of substitution strings encountered +my $numsubsts = 0; +# Map from substitution string -> start index in charbin +my %substs = (); +# Accumulated list of substitution character sequences +my @charbin = (); + +# Read in transtab data +while (my $line = <TRANSTAB>) { + # Skip comments and blank lines + next if ($line =~ /^%/); + next if ($line =~ /^\s*$/); + + # Format: <codepoint> <data> + my ($codepoint, $data) = split(' ', $line); + + # Strip '<U' from start, and '>' from end of input codepoint + $codepoint =~ s/^<U([^>]+)>/$1/; + + # Data is a list of semi-colon-separated substitutions + my @substitutions = split(';', $data); + + my @ttvals = (); + + foreach my $sub (@substitutions) { + # Strip quotes around substitution sequence + $sub =~ s/"([^"]*)"/$1/; + + $numsubsts++; + + if ($sub eq "") { + # Special-case empty substitutions + my @empty = (); + push(@ttvals, \@empty); + next; + } + + # Split characters in sequence + my @chars = split('<', $sub); + shift @chars; + my $num_chars = scalar(@chars); + + # Strip leading 'U' and trailing '>' + map { $_ =~ s/U([^>]+)>/$1/; } @chars; + + $maxsubst = $num_chars if ($num_chars > $maxsubst); + + # Stringify chars to produce hash key + my $hkey = "@chars"; + + # Find/insert in bin, if new substitution + if (!defined($substs{$hkey})) { + my $pos = find_in_bin(\@chars, $num_chars); + + $substs{$hkey} = $pos; + } + + # Append to list of substitutions for codepoint + push(@ttvals, \@chars); + } + + # Insert into transmap + $transmap{$codepoint} = \@ttvals; +} + +close TRANSTAB; + +# Ensure transtab is representable +die "Charbin length exceeds 2^13!" if $#charbin >= 2**13; +die "Maxsubst exceeds 8!" if $maxsubst >= 2**3; + +print <<EOF; +struct translit_entry { + uint32_t codepoint : 16, + offset : 13, + length : 3; +}; + +EOF + +# Emit substitution data +my $cblen = @charbin; +print "static const UCS2 substdata[$cblen] = {\n"; +foreach my $c (@charbin) { + print "\t0x$c,\n"; +} +print "};\n\n"; + +# Emit transliteration LUT +my $ttlen = $numsubsts + 1; # + 1 for sentinel +print "static const struct translit_entry transtab[$ttlen] = {\n"; +foreach my $codepoint (sort(keys %transmap)) { + my $ttvals = $transmap{$codepoint}; + + for my $subst (@$ttvals) { + my $hkey = "@$subst"; + + if ($hkey ne "") { + my $slen = @$subst; + print "\t{ 0x$codepoint, $substs{$hkey}, $slen },\n"; + } else { + print "\t{ 0x$codepoint, 0, 0 },\n"; + } + } +} +# Place sentinel at the end +print "\t{ 0, 0, 0 }\n"; +print "};\n\n"; + +print <<EOF; +static int translit_tab_cmp(const void *a, const void *b) +{ + const struct translit_entry *aa = (const struct translit_entry *) a; + const struct translit_entry *bb = (const struct translit_entry *) b; + + return (int) aa->codepoint - (int) bb->codepoint; +} + +int translit_substitute(struct encoding_context *e, UCS4 c) +{ + static const UCS2 default_subst[1] = { '?' }; + int ret = 1; + + if (c <= 0xFFFF) { + struct translit_entry key = { c, 0, 0 }; + const struct translit_entry *res; + + res = bsearch(&key, transtab, $numsubsts, + sizeof(struct translit_entry), + translit_tab_cmp); + if (res != NULL) { + /* Reverse until we find the first entry for c */ + while (res > transtab) { + if (res[-1].codepoint != c) + break; + res--; + } + + /* Try substitutions in turn, until we run out */ + while (res->codepoint == c) { + ret = translit_try_sequence(e, res->length, + substdata + res->offset); + if (ret >= 0) + return ret; + + res++; + } + } + } + + /* Last-ditch replacement: must succeed */ + return translit_try_sequence(e, 1, default_subst); +} +EOF + +# Search bin for existing sequence, or append if not found. +# +# The intent here is to minimise duplication of substitution +# sequences. This implementation is decidedly trivial, and +# makes no attempt to discover the optimal insertion order. +# +# Inspection of the output indicates that we use approximately +# 5.5 bytes of storage for each substitution sequence +# encountered (4 of these are the translit_entry, so there +# doesn't seem much point in trying to optimise the layout of +# the charbin any further.) +sub find_in_bin +{ + my $pchars = shift; + my $pcharslen = shift; + my $binlen = scalar(@charbin); + my $offset = 0; + + # Search bin for pchars + while ($offset <= $binlen - $pcharslen) { + my @slice = @charbin[$offset .. $offset + $pcharslen - 1]; + + last if aeq(\@slice, $pchars); + + $offset++; + } + + if ($offset <= $binlen - $pcharslen) { + # Found in bin + return $offset; + } else { + # Not found, so append + push(@charbin, @$pchars); + return $binlen; + } +} + +# Compare two arrays for equality +sub aeq +{ + my ($aref, $bref) = @_; + return 0 unless @$aref == @$bref; + + my $idx = 0; + for my $item (@$aref) { + return 0 unless $item eq $bref->[$idx++]; + } + + return 1; +} + +sub usage +{ + print STDERR <<EOF; +Usage: gentranstab.pl <path to transtab> +EOF + + exit 1; +} diff --git a/doc/ChangeLog b/doc/ChangeLog index b055aa9..26d30a7 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -110,3 +110,4 @@ Iconv Changelog - Correct handling of trailing valid shift sequences. Previously would erroneously report EINVAL, instead of silently accepting them. + - Add proper transliteration behaviour when requested using //TRANSLIT. diff --git a/src/Makefile b/src/Makefile index 2a7d350..e520c6f 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,4 +1,10 @@ # Sources DIR_SOURCES := alias.c aliases.c eightbit.c iconv.c utils.c +SOURCES := $(SOURCES) $(BUILDDIR)/src_translit.c + +$(BUILDDIR)/src_translit.c: src/transtab build/tools/gentranstab.pl + $(VQ)$(ECHO) "TRANSTAB: $<" + $(Q)$(PERL) build/tools/gentranstab.pl $< >$@ + include $(NSBUILD)/Makefile.subdir diff --git a/src/iconv.c b/src/iconv.c index db47cbc..c81a0b2 100644 --- a/src/iconv.c +++ b/src/iconv.c @@ -234,6 +234,18 @@ iconv_t iconv_open(const char *tocode, const char *fromcode) return (iconv_t)(-1); } + e->transout = encoding_new(to, encoding_WRITE_STRICT); + if (e->transout == NULL) { + if (e->out) + encoding_delete(e->out); + if (e->in) + encoding_delete(e->in); + iconv_eightbit_delete(e); + free(e); + errno = ENOMEM; /* Assume memory exhaustion */ + return (iconv_t)(-1); + } + /* Set encoding flags */ unsigned int flags = 0; if (to_force_le) @@ -243,6 +255,7 @@ iconv_t iconv_open(const char *tocode, const char *fromcode) flags |= encoding_FLAG_NO_HEADER; encoding_set_flags(e->out, flags, flags); + encoding_set_flags(e->transout, flags, flags); e->outflags = flags; } @@ -262,6 +275,7 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, { struct encoding_context *e; unsigned int read = 0; + int ret; /* search for cd in list */ for (e = context_list; e; e = e->next) @@ -289,7 +303,6 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, if (outbuf != NULL) { char *prev_outbuf = *outbuf; size_t prev_outbytesleft = *outbytesleft; - int ret; ret = encoding_write(e->out, NULL_UCS4, outbuf, (int*) outbytesleft); @@ -328,6 +341,13 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, e->outbuf = outbuf; e->outbytesleft = outbytesleft; + /* Flush through any remaining transliteration */ + ret = translit_flush_replacement(e); + if (ret <= 0) { + errno = E2BIG; + return (size_t)-1; + } + LOG(("reading")); /* If, on the previous attempt to convert data, we reached the end @@ -397,6 +417,10 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, errno = EINVAL; break; case WRITE_NOMEM: /* 4 */ + if (e->substlen > 0) { + /* Buffer full while transliterating: skip input */ + e->skip = read; + } errno = E2BIG; break; case WRITE_FAILED: /* 1 */ @@ -426,6 +450,8 @@ int iconv_close(iconv_t cd) encoding_delete(e->in); if (e->out) encoding_delete(e->out); + if (e->transout) + encoding_delete(e->transout); iconv_eightbit_delete(e); /* remove from list */ @@ -495,48 +521,17 @@ int character_callback(void *handle, UCS4 c) (int*)e->outbytesleft); } - e->write_state = ret == -1 ? WRITE_FAILED - : ret == 0 ? WRITE_NOMEM : WRITE_SUCCESS; + if (ret == -1 && e->transliterate) { + /* Transliterate, if we've been asked to. */ + ret = translit_substitute(e, c); + } if (ret == -1) { - /* Transliterate, if we've been asked to. - * Assumes that output is 8bit/8bit multibyte with ASCII G0. - * This should be fine as the only <>8bit encodings are - * UCS{2,4}, UTF-{16,32}, neither of which return -1. - * Also, afaiaa, all supported multibyte encodings are ASCII - * compatible. */ - /** \todo Actually perform some kind of transliteration */ - if (e->transliterate) { - if ((int)*e->outbytesleft > 0) { - if (e->out) { - /* Flush through any pending shift sequences */ - /** \todo this is a bit dodgy, as we only - * really need to ensure that the ASCII set - * is mapped into G0 in ISO2022 encodings. - * This will reset G1->G3, too, which may - * break things. If so, we may have to - * perform some dirty hackery which relies - * upon knowledge of UnicodeLib's internals - */ - encoding_write(e->out, NULL_UCS4, - e->outbuf, - (int*)e->outbytesleft); - } - - if ((int)*e->outbytesleft > 0) { - *(*e->outbuf)++ = '?'; - --*e->outbytesleft; - - e->write_state = WRITE_SUCCESS; - } else { - e->write_state = WRITE_NOMEM; - } - } else { - e->write_state = WRITE_NOMEM; - } - } else { - e->write_state = WRITE_FAILED; - } + e->write_state = WRITE_FAILED; + } else if (ret == 0) { + e->write_state = WRITE_NOMEM; + } else { + e->write_state = WRITE_SUCCESS; } /* Always stop after processing each character */ diff --git a/src/internal.h b/src/internal.h index 42efefe..827dccb 100644 --- a/src/internal.h +++ b/src/internal.h @@ -18,11 +18,14 @@ struct encoding_context { Encoding *in; unsigned int inflags; Encoding *out; + Encoding *transout; unsigned int outflags; unsigned short *intab, *outtab; char **outbuf; size_t *outbytesleft; char transliterate; + const UCS2 *substitution; + size_t substlen; enum { WRITE_SUCCESS, WRITE_FAILED, @@ -67,6 +70,10 @@ struct canon *alias_canonicalise(const char *alias); short mibenum_from_name(const char *alias); const char *mibenum_to_name(short mibenum); +/* in translit.c */ +int translit_flush_replacement(struct encoding_context *e); +int translit_substitute(struct encoding_context *e, UCS4 c); + /* in utils.c */ int strcasecmp(const char *s1, const char *s2); int strncasecmp(const char *s1, const char *s2, size_t len); diff --git a/src/transtab b/src/transtab new file mode 100644 index 0000000..e51465e --- /dev/null +++ b/src/transtab @@ -0,0 +1,1689 @@ +% Source: http://www.cl.cam.ac.uk/~mgk25/unicode.html#libs +% +% "This package contains a table for transliterating ISO 10646 texts into +% best-effort representations using smaller coded character sets (ASCII, +% ISO 8859, etc.). It is primarily intended for inclusion into the GNU C +% library, but might be of use for other applications as well. The table +% is freely available to anyone." + + +% APOSTROPHE +<U0027> <U2019> +% GRAVE ACCENT +<U0060> <U201B>;<U2018> +% NO-BREAK SPACE +<U00A0> <U0020> +% INVERTED EXCLAMATION MARK +<U00A1> <U0021> +% CENT SIGN +<U00A2> <U0063> +% POUND SIGN +<U00A3> "<U0047><U0042><U0050>" +% YEN SIGN +<U00A5> <U0059> +% BROKEN BAR +<U00A6> <U007C> +% SECTION SIGN +<U00A7> <U0053> +% DIAERESIS +<U00A8> <U0022> +% COPYRIGHT SIGN +<U00A9> "<U0028><U0063><U0029>";<U0063> +% FEMININE ORDINAL INDICATOR +<U00AA> <U0061> +% LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +<U00AB> "<U003C><U003C>" +% NOT SIGN +<U00AC> <U002D> +% SOFT HYPHEN +<U00AD> <U002D> +% REGISTERED SIGN +<U00AE> "<U0028><U0052><U0029>" +% MACRON +<U00AF> <U002D> +% DEGREE SIGN +<U00B0> <U0020> +% PLUS-MINUS SIGN +<U00B1> "<U002B><U002F><U002D>" +% SUPERSCRIPT TWO +<U00B2> "<U005E><U0032>";<U0032> +% SUPERSCRIPT THREE +<U00B3> "<U005E><U0033>";<U0033> +% ACUTE ACCENT +<U00B4> <U0027> +% MICRO SIGN +<U00B5> <U03BC>;<U0075> +% PILCROW SIGN +<U00B6> <U0050> +% MIDDLE DOT +<U00B7> <U002E> +% CEDILLA +<U00B8> <U002C> +% SUPERSCRIPT ONE +<U00B9> "<U005E><U0031>";<U0031> +% MASCULINE ORDINAL INDICATOR +<U00BA> <U006F> +% RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +<U00BB> "<U003E><U003E>" +% VULGAR FRACTION ONE QUARTER +<U00BC> "<U0020><U0031><U002F><U0034>" +% VULGAR FRACTION ONE HALF +<U00BD> "<U0020><U0031><U002F><U0032>" +% VULGAR FRACTION THREE QUARTERS +<U00BE> "<U0020><U0033><U002F><U0034>" +% INVERTED QUESTION MARK +<U00BF> <U003F> +% LATIN CAPITAL LETTER A WITH GRAVE +<U00C0> <U0041> +% LATIN CAPITAL LETTER A WITH ACUTE +<U00C1> <U0041> +% LATIN CAPITAL LETTER A WITH CIRCUMFLEX +<U00C2> <U0041> +% LATIN CAPITAL LETTER A WITH TILDE +<U00C3> <U0041> +% LATIN CAPITAL LETTER A WITH DIAERESIS +<U00C4> "<U0041><U0065>";<U0041> +% LATIN CAPITAL LETTER A WITH RING ABOVE +<U00C5> "<U0041><U0061>";<U0041> +% LATIN CAPITAL LETTER AE +<U00C6> "<U0041><U0045>";<U0041> +% LATIN CAPITAL LETTER C WITH CEDILLA +<U00C7> <U0043> +% LATIN CAPITAL LETTER E WITH GRAVE +<U00C8> <U0045> +% LATIN CAPITAL LETTER E WITH ACUTE +<U00C9> <U0045> +% LATIN CAPITAL LETTER E WITH CIRCUMFLEX +<U00CA> <U0045> +% LATIN CAPITAL LETTER E WITH DIAERESIS +<U00CB> <U0045> +% LATIN CAPITAL LETTER I WITH GRAVE +<U00CC> <U0049> +% LATIN CAPITAL LETTER I WITH ACUTE +<U00CD> <U0049> +% LATIN CAPITAL LETTER I WITH CIRCUMFLEX +<U00CE> <U0049> +% LATIN CAPITAL LETTER I WITH DIAERESIS +<U00CF> <U0049> +% LATIN CAPITAL LETTER ETH +<U00D0> <U0044> +% LATIN CAPITAL LETTER N WITH TILDE +<U00D1> <U004E> +% LATIN CAPITAL LETTER O WITH GRAVE +<U00D2> <U004F> +% LATIN CAPITAL LETTER O WITH ACUTE +<U00D3> <U004F> +% LATIN CAPITAL LETTER O WITH CIRCUMFLEX +<U00D4> <U004F> +% LATIN CAPITAL LETTER O WITH TILDE +<U00D5> <U004F> +% LATIN CAPITAL LETTER O WITH DIAERESIS +<U00D6> "<U004F><U0065>";<U004F> +% MULTIPLICATION SIGN +<U00D7> <U0078> +% LATIN CAPITAL LETTER O WITH STROKE +<U00D8> <U004F> +% LATIN CAPITAL LETTER U WITH GRAVE +<U00D9> <U0055> +% LATIN CAPITAL LETTER U WITH ACUTE +<U00DA> <U0055> +% LATIN CAPITAL LETTER U WITH CIRCUMFLEX +<U00DB> <U0055> +% LATIN CAPITAL LETTER U WITH DIAERESIS +<U00DC> "<U0055><U0065>";<U0055> +% LATIN CAPITAL LETTER Y WITH ACUTE +<U00DD> <U0059> +% LATIN CAPITAL LETTER THORN +<U00DE> "<U0054><U0068>" +% LATIN SMALL LETTER SHARP S +<U00DF> "<U0073><U0073>";<U03B2> +% LATIN SMALL LETTER A WITH GRAVE +<U00E0> <U0061> +% LATIN SMALL LETTER A WITH ACUTE +<U00E1> <U0061> +% LATIN SMALL LETTER A WITH CIRCUMFLEX +<U00E2> <U0061> +% LATIN SMALL LETTER A WITH TILDE +<U00E3> <U0061> +% LATIN SMALL LETTER A WITH DIAERESIS +<U00E4> "<U0061><U0065>";<U0061> +% LATIN SMALL LETTER A WITH RING ABOVE +<U00E5> "<U0061><U0061>";<U0061> +% LATIN SMALL LETTER AE +<U00E6> "<U0061><U0065>";<U0061> +% LATIN SMALL LETTER C WITH CEDILLA +<U00E7> <U0063> +% LATIN SMALL LETTER E WITH GRAVE +<U00E8> <U0065> +% LATIN SMALL LETTER E WITH ACUTE +<U00E9> <U0065> +% LATIN SMALL LETTER E WITH CIRCUMFLEX +<U00EA> <U0065> +% LATIN SMALL LETTER E WITH DIAERESIS +<U00EB> <U0065> +% LATIN SMALL LETTER I WITH GRAVE +<U00EC> <U0069> +% LATIN SMALL LETTER I WITH ACUTE +<U00ED> <U0069> +% LATIN SMALL LETTER I WITH CIRCUMFLEX +<U00EE> <U0069> +% LATIN SMALL LETTER I WITH DIAERESIS +<U00EF> <U0069> +% LATIN SMALL LETTER ETH +<U00F0> <U0064> +% LATIN SMALL LETTER N WITH TILDE +<U00F1> <U006E> +% LATIN SMALL LETTER O WITH GRAVE +<U00F2> <U006F> +% LATIN SMALL LETTER O WITH ACUTE +<U00F3> <U006F> +% LATIN SMALL LETTER O WITH CIRCUMFLEX +<U00F4> <U006F> +% LATIN SMALL LETTER O WITH TILDE +<U00F5> <U006F> +% LATIN SMALL LETTER O WITH DIAERESIS +<U00F6> "<U006F><U0065>";<U006F> +% DIVISION SIGN +<U00F7> <U003A> +% LATIN SMALL LETTER O WITH STROKE +<U00F8> <U006F> +% LATIN SMALL LETTER U WITH GRAVE +<U00F9> <U0075> +% LATIN SMALL LETTER U WITH ACUTE +<U00FA> <U0075> +% LATIN SMALL LETTER U WITH CIRCUMFLEX +<U00FB> <U0075> +% LATIN SMALL LETTER U WITH DIAERESIS +<U00FC> "<U0075><U0065>";<U0075> +% LATIN SMALL LETTER Y WITH ACUTE +<U00FD> <U0079> +% LATIN SMALL LETTER THORN +<U00FE> "<U0074><U0068>" +% LATIN SMALL LETTER Y WITH DIAERESIS +<U00FF> <U0079> +% LATIN CAPITAL LETTER A WITH MACRON +<U0100> <U0041> +% LATIN SMALL LETTER A WITH MACRON +<U0101> <U0061> +% LATIN CAPITAL LETTER A WITH BREVE +<U0102> <U0041> +% LATIN SMALL LETTER A WITH BREVE +<U0103> <U0061> +% LATIN CAPITAL LETTER A WITH OGONEK +<U0104> <U0041> +% LATIN SMALL LETTER A WITH OGONEK +<U0105> <U0061> +% LATIN CAPITAL LETTER C WITH ACUTE +<U0106> <U0043> +% LATIN SMALL LETTER C WITH ACUTE +<U0107> <U0063> +% LATIN CAPITAL LETTER C WITH CIRCUMFLEX +<U0108> "<U0043><U0068>";<U0043> +% LATIN SMALL LETTER C WITH CIRCUMFLEX +<U0109> "<U0063><U0068>";<U0063> +% LATIN CAPITAL LETTER C WITH DOT ABOVE +<U010A> <U0043> +% LATIN SMALL LETTER C WITH DOT ABOVE +<U010B> <U0063> +% LATIN CAPITAL LETTER C WITH CARON +<U010C> <U0043> +% LATIN SMALL LETTER C WITH CARON +<U010D> <U0063> +% LATIN CAPITAL LETTER D WITH CARON +<U010E> <U0044> +% LATIN SMALL LETTER D WITH CARON +<U010F> <U0064> +% LATIN CAPITAL LETTER D WITH STROKE +<U0110> <U0044> +% LATIN SMALL LETTER D WITH STROKE +<U0111> <U0064> +% LATIN CAPITAL LETTER E WITH MACRON +<U0112> <U0045> +% LATIN SMALL LETTER E WITH MACRON +<U0113> <U0065> +% LATIN CAPITAL LETTER E WITH BREVE +<U0114> <U0045> +% LATIN SMALL LETTER E WITH BREVE +<U0115> <U0065> +% LATIN CAPITAL LETTER E WITH DOT ABOVE +<U0116> <U0045> +% LATIN SMALL LETTER E WITH DOT ABOVE +<U0117> <U0065> +% LATIN CAPITAL LETTER E WITH OGONEK +<U0118> <U0045> +% LATIN SMALL LETTER E WITH OGONEK +<U0119> <U0065> +% LATIN CAPITAL LETTER E WITH CARON +<U011A> <U0045> +% LATIN SMALL LETTER E WITH CARON +<U011B> <U0065> +% LATIN CAPITAL LETTER G WITH CIRCUMFLEX +<U011C> "<U0047><U0068>";<U0047> +% LATIN SMALL LETTER G WITH CIRCUMFLEX +<U011D> "<U0067><U0068>";<U0067> +% LATIN CAPITAL LETTER G WITH BREVE +<U011E> <U0047> +% LATIN SMALL LETTER G WITH BREVE +<U011F> <U0067> +% LATIN CAPITAL LETTER G WITH DOT ABOVE +<U0120> <U0047> +% LATIN SMALL LETTER G WITH DOT ABOVE +<U0121> <U0067> +% LATIN CAPITAL LETTER G WITH CEDILLA +<U0122> <U0047> +% LATIN SMALL LETTER G WITH CEDILLA +<U0123> <U0067> +% LATIN CAPITAL LETTER H WITH CIRCUMFLEX +<U0124> "<U0048><U0068>";<U0048> +% LATIN SMALL LETTER H WITH CIRCUMFLEX +<U0125> "<U0068><U0068>";<U0068> +% LATIN CAPITAL LETTER H WITH STROKE +<U0126> <U0048> +% LATIN SMALL LETTER H WITH STROKE +<U0127> <U0068> +% LATIN CAPITAL LETTER I WITH TILDE +<U0128> <U0049> +% LATIN SMALL LETTER I WITH TILDE +<U0129> <U0069> +% LATIN CAPITAL LETTER I WITH MACRON +<U012A> <U0049> +% LATIN SMALL LETTER I WITH MACRON +<U012B> <U0069> +% LATIN CAPITAL LETTER I WITH BREVE +<U012C> <U0049> +% LATIN SMALL LETTER I WITH BREVE +<U012D> <U0069> +% LATIN CAPITAL LETTER I WITH OGONEK +<U012E> <U0049> +% LATIN SMALL LETTER I WITH OGONEK +<U012F> <U0069> +% LATIN CAPITAL LETTER I WITH DOT ABOVE +<U0130> <U0049> +% LATIN SMALL LETTER DOTLESS I +<U0131> <U0069> +% LATIN CAPITAL LIGATURE IJ +<U0132> "<U0049><U004A>" +% LATIN SMALL LIGATURE IJ +<U0133> "<U0069><U006A>" +% LATIN CAPITAL LETTER J WITH CIRCUMFLEX +<U0134> "<U004A><U0068>";<U004A> +% LATIN SMALL LETTER J WITH CIRCUMFLEX +<U0135> "<U006A><U0068>";<U006A> +% LATIN CAPITAL LETTER K WITH CEDILLA +<U0136> <U004B> +% LATIN SMALL LETTER K WITH CEDILLA +<U0137> <U006B> +% LATIN SMALL LETTER KRA +<U0138> <U006B> +% LATIN CAPITAL LETTER L WITH ACUTE +<U0139> <U004C> +% LATIN SMALL LETTER L WITH ACUTE +<U013A> <U006C> +% LATIN CAPITAL LETTER L WITH CEDILLA +<U013B> <U004C> +% LATIN SMALL LETTER L WITH CEDILLA +<U013C> <U006C> +% LATIN CAPITAL LETTER L WITH CARON +<U013D> <U004C> +% LATIN SMALL LETTER L WITH CARON +<U013E> <U006C> +% LATIN CAPITAL LETTER L WITH MIDDLE DOT +<U013F> "<U004C><U00B7>";"<U004C><U002E>";<U004C> +% LATIN SMALL LETTER L WITH MIDDLE DOT +<U0140> "<U006C><U00B7>";"<U006C><U002E>";<U006C> +% LATIN CAPITAL LETTER L WITH STROKE +<U0141> <U004C> +% LATIN SMALL LETTER L WITH STROKE +<U0142> <U006C> +% LATIN CAPITAL LETTER N WITH ACUTE +<U0143> <U004E> +% LATIN SMALL LETTER N WITH ACUTE +<U0144> <U006E> +% LATIN CAPITAL LETTER N WITH CEDILLA +<U0145> <U004E> +% LATIN SMALL LETTER N WITH CEDILLA +<U0146> <U006E> +% LATIN CAPITAL LETTER N WITH CARON +<U0147> <U004E> +% LATIN SMALL LETTER N WITH CARON +<U0148> <U006E> +% LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +<U0149> "<U0027><U006E>" +% LATIN CAPITAL LETTER ENG +<U014A> "<U004E><U0047>";<U004E> +% LATIN SMALL LETTER ENG +<U014B> "<U006E><U0067>";<U006E> +% LATIN CAPITAL LETTER O WITH MACRON +<U014C> <U004F> +% LATIN SMALL LETTER O WITH MACRON +<U014D> <U006F> +% LATIN CAPITAL LETTER O WITH BREVE +<U014E> <U004F> +% LATIN SMALL LETTER O WITH BREVE +<U014F> <U006F> +% LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +<U0150> <U004F> +% LATIN SMALL LETTER O WITH DOUBLE ACUTE +<U0151> <U006F> +% LATIN CAPITAL LIGATURE OE +<U0152> "<U004F><U0045>" +% LATIN SMALL LIGATURE OE +<U0153> "<U006F><U0065>" +% LATIN CAPITAL LETTER R WITH ACUTE +<U0154> <U0052> +% LATIN SMALL LETTER R WITH ACUTE +<U0155> <U0072> +% LATIN CAPITAL LETTER R WITH CEDILLA +<U0156> <U0052> +% LATIN SMALL LETTER R WITH CEDILLA +<U0157> <U0072> +% LATIN CAPITAL LETTER R WITH CARON +<U0158> <U0052> +% LATIN SMALL LETTER R WITH CARON +<U0159> <U0072> +% LATIN CAPITAL LETTER S WITH ACUTE +<U015A> <U0053> +% LATIN SMALL LETTER S WITH ACUTE +<U015B> <U0073> +% LATIN CAPITAL LETTER S WITH CIRCUMFLEX +<U015C> "<U0053><U0068>";<U0053> +% LATIN SMALL LETTER S WITH CIRCUMFLEX +<U015D> "<U0073><U0068>";<U0073> +% LATIN CAPITAL LETTER S WITH CEDILLA +<U015E> <U0053> +% LATIN SMALL LETTER S WITH CEDILLA +<U015F> <U0073> +% LATIN CAPITAL LETTER S WITH CARON +<U0160> <U0053> +% LATIN SMALL LETTER S WITH CARON +<U0161> <U0073> +% LATIN CAPITAL LETTER T WITH CEDILLA +<U0162> <U0054> +% LATIN SMALL LETTER T WITH CEDILLA +<U0163> <U0074> +% LATIN CAPITAL LETTER T WITH CARON +<U0164> <U0054> +% LATIN SMALL LETTER T WITH CARON +<U0165> <U0074> +% LATIN CAPITAL LETTER T WITH STROKE +<U0166> <U0054> +% LATIN SMALL LETTER T WITH STROKE +<U0167> <U0074> +% LATIN CAPITAL LETTER U WITH TILDE +<U0168> <U0055> +% LATIN SMALL LETTER U WITH TILDE +<U0169> <U0075> +% LATIN CAPITAL LETTER U WITH MACRON +<U016A> <U0055> +% LATIN SMALL LETTER U WITH MACRON +<U016B> <U0075> +% LATIN CAPITAL LETTER U WITH BREVE +<U016C> <U0055> +% LATIN SMALL LETTER U WITH BREVE +<U016D> <U0075> +% LATIN CAPITAL LETTER U WITH RING ABOVE +<U016E> <U0055> +% LATIN SMALL LETTER U WITH RING ABOVE +<U016F> <U0075> +% LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +<U0170> <U0055> +% LATIN SMALL LETTER U WITH DOUBLE ACUTE +<U0171> <U0075> +% LATIN CAPITAL LETTER U WITH OGONEK +<U0172> <U0055> +% LATIN SMALL LETTER U WITH OGONEK +<U0173> <U0075> +% LATIN CAPITAL LETTER W WITH CIRCUMFLEX +<U0174> <U0057> +% LATIN SMALL LETTER W WITH CIRCUMFLEX +<U0175> <U0077> +% LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +<U0176> <U0059> +% LATIN SMALL LETTER Y WITH CIRCUMFLEX +<U0177> <U0079> +% LATIN CAPITAL LETTER Y WITH DIAERESIS +<U0178> <U0059> +% LATIN CAPITAL LETTER Z WITH ACUTE +<U0179> <U005A> +% LATIN SMALL LETTER Z WITH ACUTE +<U017A> <U007A> +% LATIN CAPITAL LETTER Z WITH DOT ABOVE +<U017B> <U005A> +% LATIN SMALL LETTER Z WITH DOT ABOVE +<U017C> <U007A> +% LATIN CAPITAL LETTER Z WITH CARON +<U017D> <U005A> +% LATIN SMALL LETTER Z WITH CARON +<U017E> <U007A> +% LATIN SMALL LETTER LONG S +<U017F> <U0073> +% LATIN SMALL LETTER F WITH HOOK +<U0192> <U0066> +% LATIN CAPITAL LETTER S WITH COMMA BELOW +<U0218> <U015E>;<U0053> +% LATIN SMALL LETTER S WITH COMMA BELOW +<U0219> <U015F>;<U0073> +% LATIN CAPITAL LETTER T WITH COMMA BELOW +<U021A> <U0162>;<U0054> +% LATIN SMALL LETTER T WITH COMMA BELOW +<U021B> <U0163>;<U0074> +% MODIFIER LETTER PRIME +<U02B9> <U2032>;<U0027> +% MODIFIER LETTER TURNED COMMA +<U02BB> <U2018> +% MODIFIER LETTER APOSTROPHE +<U02BC> <U2019>;<U0027> +% MODIFIER LETTER REVERSED COMMA +<U02BD> <U201B> +% MODIFIER LETTER CIRCUMFLEX ACCENT +<U02C6> <U005E> +% MODIFIER LETTER VERTICAL LINE +<U02C8> <U0027> +% MODIFIER LETTER MACRON +<U02C9> <U00AF> +% MODIFIER LETTER LOW VERTICAL LINE +<U02CC> <U002C> +% MODIFIER LETTER TRIANGULAR COLON +<U02D0> <U003A> +% RING ABOVE +<U02DA> <U00B0> +% SMALL TILDE +<U02DC> <U007E> +% DOUBLE ACUTE ACCENT +<U02DD> <U0022> +% GREEK NUMERAL SIGN +<U0374> <U0027> +% GREEK LOWER NUMERAL SIGN +<U0375> <U002C> +% GREEK QUESTION MARK +<U037E> <U003B> +% LATIN CAPITAL LETTER B WITH DOT ABOVE +<U1E02> <U0042> +% LATIN SMALL LETTER B WITH DOT ABOVE +<U1E03> <U0062> +% LATIN CAPITAL LETTER D WITH DOT ABOVE +<U1E0A> <U0044> +% LATIN SMALL LETTER D WITH DOT ABOVE +<U1E0B> <U0064> +% LATIN CAPITAL LETTER F WITH DOT ABOVE +<U1E1E> <U0046> +% LATIN SMALL LETTER F WITH DOT ABOVE +<U1E1F> <U0066> +% LATIN CAPITAL LETTER M WITH DOT ABOVE +<U1E40> <U004D> +% LATIN SMALL LETTER M WITH DOT ABOVE +<U1E41> <U006D> +% LATIN CAPITAL LETTER P WITH DOT ABOVE +<U1E56> <U0050> +% LATIN SMALL LETTER P WITH DOT ABOVE +<U1E57> <U0070> +% LATIN CAPITAL LETTER S WITH DOT ABOVE +<U1E60> <U0053> +% LATIN SMALL LETTER S WITH DOT ABOVE +<U1E61> <U0073> +% LATIN CAPITAL LETTER T WITH DOT ABOVE +<U1E6A> <U0054> +% LATIN SMALL LETTER T WITH DOT ABOVE +<U1E6B> <U0074> +% LATIN CAPITAL LETTER W WITH GRAVE +<U1E80> <U0057> +% LATIN SMALL LETTER W WITH GRAVE +<U1E81> <U0077> +% LATIN CAPITAL LETTER W WITH ACUTE +<U1E82> <U0057> +% LATIN SMALL LETTER W WITH ACUTE +<U1E83> <U0077> +% LATIN CAPITAL LETTER W WITH DIAERESIS +<U1E84> <U0057> +% LATIN SMALL LETTER W WITH DIAERESIS +<U1E85> <U0077> +% LATIN CAPITAL LETTER Y WITH GRAVE +<U1EF2> <U0059> +% LATIN SMALL LETTER Y WITH GRAVE +<U1EF3> <U0079> +% EN QUAD +<U2000> <U0020> +% EM QUAD +<U2001> "<U0020><U0020>" +% EN SPACE +<U2002> <U0020> +% EM SPACE +<U2003> "<U0020><U0020>" +% THREE-PER-EM SPACE +<U2004> <U0020> +% FOUR-PER-EM SPACE +<U2005> <U0020> +% SIX-PER-EM SPACE +<U2006> <U0020> +% FIGURE SPACE +<U2007> <U0020> +% PUNCTUATION SPACE +<U2008> <U0020> +% THIN SPACE +<U2009> <U0020> +% HAIR SPACE +<U200A> "" +% ZERO WIDTH SPACE +<U200B> "" +% ZERO WIDTH NON-JOINER +<U200C> "" +% ZERO WIDTH JOINER +<U200D> "" +% LEFT-TO-RIGHT MARK +<U200E> "" +% RIGHT-TO-LEFT MARK +<U200F> "" +% HYPHEN +<U2010> <U002D> +% NON-BREAKING HYPHEN +<U2011> <U002D> +% FIGURE DASH +<U2012> <U002D> +% EN DASH +<U2013> <U002D> +% EM DASH +<U2014> "<U002D><U002D>" +% HORIZONTAL BAR +<U2015> "<U002D><U002D>" +% DOUBLE VERTICAL LINE +<U2016> "<U007C><U007C>" +% DOUBLE LOW LINE +<U2017> <U005F> +% LEFT SINGLE QUOTATION MARK +<U2018> <U0027> +% RIGHT SINGLE QUOTATION MARK +<U2019> <U0027> +% SINGLE LOW-9 QUOTATION MARK +<U201A> <U0027> +% SINGLE HIGH-REVERSED-9 QUOTATION MARK +<U201B> <U0027> +% LEFT DOUBLE QUOTATION MARK +<U201C> <U0022> +% RIGHT DOUBLE QUOTATION MARK +<U201D> <U0022> +% DOUBLE LOW-9 QUOTATION MARK +<U201E> <U0022> +% DOUBLE HIGH-REVERSED-9 QUOTATION MARK +<U201F> <U0022> +% DAGGER +<U2020> <U002B> +% DOUBLE DAGGER +<U2021> "<U002B><U002B>" +% BULLET +<U2022> <U006F> +% TRIANGULAR BULLET +<U2023> <U003E> +% ONE DOT LEADER +<U2024> <U002E> +% TWO DOT LEADER +<U2025> "<U002E><U002E>" +% HORIZONTAL ELLIPSIS +<U2026> "<U002E><U002E><U002E>" +% HYPHENATION POINT +<U2027> <U002D> +% LEFT-TO-RIGHT EMBEDDING +<U202A> "" +% RIGHT-TO-LEFT EMBEDDING +<U202B> "" +% POP DIRECTIONAL FORMATTING +<U202C> "" +% LEFT-TO-RIGHT OVERRIDE +<U202D> "" +% RIGHT-TO-LEFT OVERRIDE +<U202E> "" +% NARROW NO-BREAK SPACE +<U202F> <U0020> +% PER MILLE SIGN +<U2030> "<U0020><U0030><U002F><U0030><U0030>" +% PRIME +<U2032> <U0027> +% DOUBLE PRIME +<U2033> <U0022> +% TRIPLE PRIME +<U2034> "<U0027><U0027><U0027>" +% REVERSED PRIME +<U2035> <U0060> +% REVERSED DOUBLE PRIME +<U2036> "<U0060><U0060>" +% REVERSED TRIPLE PRIME +<U2037> "<U0060><U0060><U0060>" +% SINGLE LEFT-POINTING ANGLE QUOTATION MARK +<U2039> <U003C> +% SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +<U203A> <U003E> +% DOUBLE EXCLAMATION MARK +<U203C> "<U0021><U0021>" +% OVERLINE +<U203E> <U002D> +% HYPHEN BULLET +<U2043> <U002D> +% FRACTION SLASH +<U2044> <U002F> +% QUESTION EXCLAMATION MARK +<U2048> "<U003F><U0021>" +% EXCLAMATION QUESTION MARK +<U2049> "<U0021><U003F>" +% TIRONIAN SIGN ET +<U204A> <U0037> +% SUPERSCRIPT ZERO +<U2070> "<U005E><U0030>";<U0030> +% SUPERSCRIPT FOUR +<U2074> "<U005E><U0034>";<U0034> +% SUPERSCRIPT FIVE +<U2075> "<U005E><U0035>";<U0035> +% SUPERSCRIPT SIX +<U2076> "<U005E><U0036>";<U0036> +% SUPERSCRIPT SEVEN +<U2077> "<U005E><U0037>";<U0037> +% SUPERSCRIPT EIGHT +<U2078> "<U005E><U0038>";<U0038> +% SUPERSCRIPT NINE +<U2079> "<U005E><U0039>";<U0039> +% SUPERSCRIPT PLUS SIGN +<U207A> "<U005E><U002B>";<U002B> +% SUPERSCRIPT MINUS +<U207B> "<U005E><U002D>";<U002D> +% SUPERSCRIPT EQUALS SIGN +<U207C> "<U005E><U003D>";<U003D> +% SUPERSCRIPT LEFT PARENTHESIS +<U207D> "<U005E><U0028>";<U0028> +% SUPERSCRIPT RIGHT PARENTHESIS +<U207E> "<U005E><U0029>";<U0029> +% SUPERSCRIPT LATIN SMALL LETTER N +<U207F> "<U005E><U006E>";<U006E> +% SUBSCRIPT ZERO +<U2080> "<U005F><U0030>";<U0030> +% SUBSCRIPT ONE +<U2081> "<U005F><U0031>";<U0031> +% SUBSCRIPT TWO +<U2082> "<U005F><U0032>";<U0032> +% SUBSCRIPT THREE +<U2083> "<U005F><U0033>";<U0033> +% SUBSCRIPT FOUR +<U2084> "<U005F><U0034>";<U0034> +% SUBSCRIPT FIVE +<U2085> "<U005F><U0035>";<U0035> +% SUBSCRIPT SIX +<U2086> "<U005F><U0036>";<U0036> +% SUBSCRIPT SEVEN +<U2087> "<U005F><U0037>";<U0037> +% SUBSCRIPT EIGHT +<U2088> "<U005F><U0038>";<U0038> +% SUBSCRIPT NINE +<U2089> "<U005F><U0039>";<U0039> +% SUBSCRIPT PLUS SIGN +<U208A> "<U005F><U002B>";<U002B> +% SUBSCRIPT MINUS +<U208B> "<U005F><U002D>";<U002D> +% SUBSCRIPT EQUALS SIGN +<U208C> "<U005F><U003D>";<U003D> +% SUBSCRIPT LEFT PARENTHESIS +<U208D> "<U005F><U0028>";<U0028> +% SUBSCRIPT RIGHT PARENTHESIS +<U208E> "<U005F><U0029>";<U0029> +% EURO SIGN +<U20AC> "<U0045><U0055><U0052>";<U0045> +% ACCOUNT OF +<U2100> "<U0061><U002F><U0063>" +% ADDRESSED TO THE SUBJECT +<U2101> "<U0061><U002F><U0073>" +% DEGREE CELSIUS +<U2103> "<U00B0><U0043>";<U0043> +% CARE OF +<U2105> "<U0063><U002F><U006F>" +% CADA UNA +<U2106> "<U0063><U002F><U0075>" +% DEGREE FAHRENHEIT +<U2109> "<U00B0><U0046>";<U0046> +% SCRIPT SMALL L +<U2113> <U006C> +% NUMERO SIGN +<U2116> "<U004E><U00BA>";"<U004E><U006F>" +% SOUND RECORDING COPYRIGHT +<U2117> "<U0028><U0050><U0029>" +% SERVICE MARK +<U2120> "<U005B><U0053><U004D><U005D>" +% TELEPHONE SIGN +<U2121> "<U0054><U0045><U004C>" +% TRADE MARK SIGN +<U2122> "<U005B><U0054><U004D><U005D>" +% OHM SIGN +<U2126> <U03A9>;"<U006F><U0068><U006D>";<U004F> +% KELVIN SIGN +<U212A> <U004B> +% ANGSTROM SIGN +<U212B> <U00C5> +% ESTIMATED SYMBOL +<U212E> <U0065> +% VULGAR FRACTION ONE THIRD +<U2153> "<U0020><U0031><U002F><U0033>" +% VULGAR FRACTION TWO THIRDS +<U2154> "<U0020><U0032><U002F><U0033>" +% VULGAR FRACTION ONE FIFTH +<U2155> "<U0020><U0031><U002F><U0035>" +% VULGAR FRACTION TWO FIFTHS +<U2156> "<U0020><U0032><U002F><U0035>" +% VULGAR FRACTION THREE FIFTHS +<U2157> "<U0020><U0033><U002F><U0035>" +% VULGAR FRACTION FOUR FIFTHS +<U2158> "<U0020><U0034><U002F><U0035>" +% VULGAR FRACTION ONE SIXTH +<U2159> "<U0020><U0031><U002F><U0036>" +% VULGAR FRACTION FIVE SIXTHS +<U215A> "<U0020><U0035><U002F><U0036>" +% VULGAR FRACTION ONE EIGHTH +<U215B> "<U0020><U0031><U002F><U0038>" +% VULGAR FRACTION THREE EIGHTHS +<U215C> "<U0020><U0033><U002F><U0038>" +% VULGAR FRACTION FIVE EIGHTHS +<U215D> "<U0020><U0035><U002F><U0038>" +% VULGAR FRACTION SEVEN EIGHTHS +<U215E> "<U0020><U0037><U002F><U0038>" +% FRACTION NUMERATOR ONE +<U215F> "<U0020><U0031><U002F>" +% ROMAN NUMERAL ONE +<U2160> <U0049> +% ROMAN NUMERAL TWO +<U2161> "<U0049><U0049>" +% ROMAN NUMERAL THREE +<U2162> "<U0049><U0049><U0049>" +% ROMAN NUMERAL FOUR +<U2163> "<U0049><U0056>" +% ROMAN NUMERAL FIVE +<U2164> <U0056> +% ROMAN NUMERAL SIX +<U2165> "<U0056><U0049>" +% ROMAN NUMERAL SEVEN +<U2166> "<U0056><U0049><U0049>" +% ROMAN NUMERAL EIGHT +<U2167> "<U0056><U0049><U0049><U0049>" +% ROMAN NUMERAL NINE +<U2168> "<U0049><U0058>" +% ROMAN NUMERAL TEN +<U2169> <U0058> +% ROMAN NUMERAL ELEVEN +<U216A> "<U0058><U0049>" +% ROMAN NUMERAL TWELVE +<U216B> "<U0058><U0049><U0049>" +% ROMAN NUMERAL FIFTY +<U216C> <U004C> +% ROMAN NUMERAL ONE HUNDRED +<U216D> <U0043> +% ROMAN NUMERAL FIVE HUNDRED +<U216E> <U0044> +% ROMAN NUMERAL ONE THOUSAND +<U216F> <U004D> +% SMALL ROMAN NUMERAL ONE +<U2170> <U0069> +% SMALL ROMAN NUMERAL TWO +<U2171> "<U0069><U0069>" +% SMALL ROMAN NUMERAL THREE +<U2172> "<U0069><U0069><U0069>" +% SMALL ROMAN NUMERAL FOUR +<U2173> "<U0069><U0076>" +% SMALL ROMAN NUMERAL FIVE +<U2174> <U0076> +% SMALL ROMAN NUMERAL SIX +<U2175> "<U0076><U0069>" +% SMALL ROMAN NUMERAL SEVEN +<U2176> "<U0076><U0069><U0069>" +% SMALL ROMAN NUMERAL EIGHT +<U2177> "<U0076><U0069><U0069><U0069>" +% SMALL ROMAN NUMERAL NINE +<U2178> "<U0069><U0078>" +% SMALL ROMAN NUMERAL TEN +<U2179> <U0078> +% SMALL ROMAN NUMERAL ELEVEN +<U217A> "<U0078><U0069>" +% SMALL ROMAN NUMERAL TWELVE +<U217B> "<U0078><U0069><U0069>" +% SMALL ROMAN NUMERAL FIFTY +<U217C> <U006C> +% SMALL ROMAN NUMERAL ONE HUNDRED +<U217D> <U0063> +% SMALL ROMAN NUMERAL FIVE HUNDRED +<U217E> <U0064> +% SMALL ROMAN NUMERAL ONE THOUSAND +<U217F> <U006D> +% LEFTWARDS ARROW +<U2190> "<U003C><U002D>" +% UPWARDS ARROW +<U2191> <U005E> +% RIGHTWARDS ARROW +<U2192> "<U002D><U003E>" +% DOWNWARDS ARROW +<U2193> <U0076> +% LEFT RIGHT ARROW +<U2194> "<U003C><U002D><U003E>" +% LEFTWARDS DOUBLE ARROW +<U21D0> "<U003C><U003D>" +% RIGHTWARDS DOUBLE ARROW +<U21D2> "<U003D><U003E>" +% LEFT RIGHT DOUBLE ARROW +<U21D4> "<U003C><U003D><U003E>" +% MINUS SIGN +<U2212> <U2013>;<U002D> +% DIVISION SLASH +<U2215> <U002F> +% SET MINUS +<U2216> <U005C> +% ASTERISK OPERATOR +<U2217> <U002A> +% RING OPERATOR +<U2218> <U006F> +% BULLET OPERATOR +<U2219> <U00B7> +% INFINITY +<U221E> "<U0069><U006E><U0066>" +% DIVIDES +<U2223> <U007C> +% PARALLEL TO +<U2225> "<U007C><U007C>" +% RATIO +<U2236> <U003A> +% TILDE OPERATOR +<U223C> <U007E> +% NOT EQUAL TO +<U2260> "<U002F><U003D>" +% IDENTICAL TO +<U2261> <U003D> +% LESS-THAN OR EQUAL TO +<U2264> "<U003C><U003D>" +% GREATER-THAN OR EQUAL TO +<U2265> "<U003E><U003D>" +% MUCH LESS-THAN +<U226A> "<U003C><U003C>" +% MUCH GREATER-THAN +<U226B> "<U003E><U003E>" +% CIRCLED PLUS +<U2295> "<U0028><U002B><U0029>" +% CIRCLED MINUS +<U2296> "<U0028><U002D><U0029>" +% CIRCLED TIMES +<U2297> "<U0028><U0078><U0029>" +% CIRCLED DIVISION SLASH +<U2298> "<U0028><U002F><U0029>" +% RIGHT TACK +<U22A2> "<U007C><U002D>" +% LEFT TACK +<U22A3> "<U002D><U007C>" +% ASSERTION +<U22A6> "<U007C><U002D>" +% MODELS +<U22A7> "<U007C><U003D>" +% TRUE +<U22A8> "<U007C><U003D>" +% FORCES +<U22A9> "<U007C><U007C><U002D>" +% DOT OPERATOR +<U22C5> <U00B7> +% STAR OPERATOR +<U22C6> <U002A> +% EQUAL AND PARALLEL TO +<U22D5> <U0023> +% VERY MUCH LESS-THAN +<U22D8> "<U003C><U003C><U003C>" +% VERY MUCH GREATER-THAN +<U22D9> "<U003E><U003E><U003E>" +% MIDLINE HORIZONTAL ELLIPSIS +<U22EF> "<U002E><U002E><U002E>" +% LEFT-POINTING ANGLE BRACKET +<U2329> <U003C> +% RIGHT-POINTING ANGLE BRACKET +<U232A> <U003E> +% SYMBOL FOR NULL +<U2400> "<U004E><U0055><U004C>" +% SYMBOL FOR START OF HEADING +<U2401> "<U0053><U004F><U0048>" +% SYMBOL FOR START OF TEXT +<U2402> "<U0053><U0054><U0058>" +% SYMBOL FOR END OF TEXT +<U2403> "<U0045><U0054><U0058>" +% SYMBOL FOR END OF TRANSMISSION +<U2404> "<U0045><U004F><U0054>" +% SYMBOL FOR ENQUIRY +<U2405> "<U0045><U004E><U0051>" +% SYMBOL FOR ACKNOWLEDGE +<U2406> "<U0041><U0043><U004B>" +% SYMBOL FOR BELL +<U2407> "<U0042><U0045><U004C>" +% SYMBOL FOR BACKSPACE +<U2408> "<U0042><U0053>" +% SYMBOL FOR HORIZONTAL TABULATION +<U2409> "<U0048><U0054>" +% SYMBOL FOR LINE FEED +<U240A> "<U004C><U0046>" +% SYMBOL FOR VERTICAL TABULATION +<U240B> "<U0056><U0054>" +% SYMBOL FOR FORM FEED +<U240C> "<U0046><U0046>" +% SYMBOL FOR CARRIAGE RETURN +<U240D> "<U0043><U0052>" +% SYMBOL FOR SHIFT OUT +<U240E> "<U0053><U004F>" +% SYMBOL FOR SHIFT IN +<U240F> "<U0053><U0049>" +% SYMBOL FOR DATA LINK ESCAPE +<U2410> "<U0044><U004C><U0045>" +% SYMBOL FOR DEVICE CONTROL ONE +<U2411> "<U0044><U0043><U0031>" +% SYMBOL FOR DEVICE CONTROL TWO +<U2412> "<U0044><U0043><U0032>" +% SYMBOL FOR DEVICE CONTROL THREE +<U2413> "<U0044><U0043><U0033>" +% SYMBOL FOR DEVICE CONTROL FOUR +<U2414> "<U0044><U0043><U0034>" +% SYMBOL FOR NEGATIVE ACKNOWLEDGE +<U2415> "<U004E><U0041><U004B>" +% SYMBOL FOR SYNCHRONOUS IDLE +<U2416> "<U0053><U0059><U004E>" +% SYMBOL FOR END OF TRANSMISSION BLOCK +<U2417> "<U0045><U0054><U0042>" +% SYMBOL FOR CANCEL +<U2418> "<U0043><U0041><U004E>" +% SYMBOL FOR END OF MEDIUM +<U2419> "<U0045><U004D>" +% SYMBOL FOR SUBSTITUTE +<U241A> "<U0053><U0055><U0042>" +% SYMBOL FOR ESCAPE +<U241B> "<U0045><U0053><U0043>" +% SYMBOL FOR FILE SEPARATOR +<U241C> "<U0046><U0053>" +% SYMBOL FOR GROUP SEPARATOR +<U241D> "<U0047><U0053>" +% SYMBOL FOR RECORD SEPARATOR +<U241E> "<U0052><U0053>" +% SYMBOL FOR UNIT SEPARATOR +<U241F> "<U0055><U0053>" +% SYMBOL FOR SPACE +<U2420> "<U0053><U0050>" +% SYMBOL FOR DELETE +<U2421> "<U0044><U0045><U004C>" +% OPEN BOX +<U2423> <U005F> +% SYMBOL FOR NEWLINE +<U2424> "<U004E><U004C>" +% SYMBOL FOR DELETE FORM TWO +<U2425> "<U002F><U002F><U002F>" +% SYMBOL FOR SUBSTITUTE FORM TWO +<U2426> <U003F> +% CIRCLED DIGIT ONE +<U2460> "<U0028><U0031><U0029>";<U0031> +% CIRCLED DIGIT TWO +<U2461> "<U0028><U0032><U0029>";<U0032> +% CIRCLED DIGIT THREE +<U2462> "<U0028><U0033><U0029>";<U0033> +% CIRCLED DIGIT FOUR +<U2463> "<U0028><U0034><U0029>";<U0034> +% CIRCLED DIGIT FIVE +<U2464> "<U0028><U0035><U0029>";<U0035> +% CIRCLED DIGIT SIX +<U2465> "<U0028><U0036><U0029>";<U0036> +% CIRCLED DIGIT SEVEN +<U2466> "<U0028><U0037><U0029>";<U0037> +% CIRCLED DIGIT EIGHT +<U2467> "<U0028><U0038><U0029>";<U0038> +% CIRCLED DIGIT NINE +<U2468> "<U0028><U0039><U0029>";<U0039> +% CIRCLED NUMBER TEN +<U2469> "<U0028><U0031><U0030><U0029>" +% CIRCLED NUMBER ELEVEN +<U246A> "<U0028><U0031><U0031><U0029>" +% CIRCLED NUMBER TWELVE +<U246B> "<U0028><U0031><U0032><U0029>" +% CIRCLED NUMBER THIRTEEN +<U246C> "<U0028><U0031><U0033><U0029>" +% CIRCLED NUMBER FOURTEEN +<U246D> "<U0028><U0031><U0034><U0029>" +% CIRCLED NUMBER FIFTEEN +<U246E> "<U0028><U0031><U0035><U0029>" +% CIRCLED NUMBER SIXTEEN +<U246F> "<U0028><U0031><U0036><U0029>" +% CIRCLED NUMBER SEVENTEEN +<U2470> "<U0028><U0031><U0037><U0029>" +% CIRCLED NUMBER EIGHTEEN +<U2471> "<U0028><U0031><U0038><U0029>" +% CIRCLED NUMBER NINETEEN +<U2472> "<U0028><U0031><U0039><U0029>" +% CIRCLED NUMBER TWENTY +<U2473> "<U0028><U0032><U0030><U0029>" +% PARENTHESIZED DIGIT ONE +<U2474> "<U0028><U0031><U0029>";<U0031> +% PARENTHESIZED DIGIT TWO +<U2475> "<U0028><U0032><U0029>";<U0032> +% PARENTHESIZED DIGIT THREE +<U2476> "<U0028><U0033><U0029>";<U0033> +% PARENTHESIZED DIGIT FOUR +<U2477> "<U0028><U0034><U0029>";<U0034> +% PARENTHESIZED DIGIT FIVE +<U2478> "<U0028><U0035><U0029>";<U0035> +% PARENTHESIZED DIGIT SIX +<U2479> "<U0028><U0036><U0029>";<U0036> +% PARENTHESIZED DIGIT SEVEN +<U247A> "<U0028><U0037><U0029>";<U0037> +% PARENTHESIZED DIGIT EIGHT +<U247B> "<U0028><U0038><U0029>";<U0038> +% PARENTHESIZED DIGIT NINE +<U247C> "<U0028><U0039><U0029>";<U0039> +% PARENTHESIZED NUMBER TEN +<U247D> "<U0028><U0031><U0030><U0029>" +% PARENTHESIZED NUMBER ELEVEN +<U247E> "<U0028><U0031><U0031><U0029>" +% PARENTHESIZED NUMBER TWELVE +<U247F> "<U0028><U0031><U0032><U0029>" +% PARENTHESIZED NUMBER THIRTEEN +<U2480> "<U0028><U0031><U0033><U0029>" +% PARENTHESIZED NUMBER FOURTEEN +<U2481> "<U0028><U0031><U0034><U0029>" +% PARENTHESIZED NUMBER FIFTEEN +<U2482> "<U0028><U0031><U0035><U0029>" +% PARENTHESIZED NUMBER SIXTEEN +<U2483> "<U0028><U0031><U0036><U0029>" +% PARENTHESIZED NUMBER SEVENTEEN +<U2484> "<U0028><U0031><U0037><U0029>" +% PARENTHESIZED NUMBER EIGHTEEN +<U2485> "<U0028><U0031><U0038><U0029>" +% PARENTHESIZED NUMBER NINETEEN +<U2486> "<U0028><U0031><U0039><U0029>" +% PARENTHESIZED NUMBER TWENTY +<U2487> "<U0028><U0032><U0030><U0029>" +% DIGIT ONE FULL STOP +<U2488> "<U0031><U002E>";<U0031> +% DIGIT TWO FULL STOP +<U2489> "<U0032><U002E>";<U0032> +% DIGIT THREE FULL STOP +<U248A> "<U0033><U002E>";<U0033> +% DIGIT FOUR FULL STOP +<U248B> "<U0034><U002E>";<U0034> +% DIGIT FIVE FULL STOP +<U248C> "<U0035><U002E>";<U0035> +% DIGIT SIX FULL STOP +<U248D> "<U0036><U002E>";<U0036> +% DIGIT SEVEN FULL STOP +<U248E> "<U0037><U002E>";<U0037> +% DIGIT EIGHT FULL STOP +<U248F> "<U0038><U002E>";<U0038> +% DIGIT NINE FULL STOP +<U2490> "<U0039><U002E>";<U0039> +% NUMBER TEN FULL STOP +<U2491> "<U0031><U0030><U002E>" +% NUMBER ELEVEN FULL STOP +<U2492> "<U0031><U0031><U002E>" +% NUMBER TWELVE FULL STOP +<U2493> "<U0031><U0032><U002E>" +% NUMBER THIRTEEN FULL STOP +<U2494> "<U0031><U0033><U002E>" +% NUMBER FOURTEEN FULL STOP +<U2495> "<U0031><U0034><U002E>" +% NUMBER FIFTEEN FULL STOP +<U2496> "<U0031><U0035><U002E>" +% NUMBER SIXTEEN FULL STOP +<U2497> "<U0031><U0036><U002E>" +% NUMBER SEVENTEEN FULL STOP +<U2498> "<U0031><U0037><U002E>" +% NUMBER EIGHTEEN FULL STOP +<U2499> "<U0031><U0038><U002E>" +% NUMBER NINETEEN FULL STOP +<U249A> "<U0031><U0039><U002E>" +% NUMBER TWENTY FULL STOP +<U249B> "<U0032><U0030><U002E>" +% PARENTHESIZED LATIN SMALL LETTER A +<U249C> "<U0028><U0061><U0029>";<U0061> +% PARENTHESIZED LATIN SMALL LETTER B +<U249D> "<U0028><U0062><U0029>";<U0062> +% PARENTHESIZED LATIN SMALL LETTER C +<U249E> "<U0028><U0063><U0029>";<U0063> +% PARENTHESIZED LATIN SMALL LETTER D +<U249F> "<U0028><U0064><U0029>";<U0064> +% PARENTHESIZED LATIN SMALL LETTER E +<U24A0> "<U0028><U0065><U0029>";<U0065> +% PARENTHESIZED LATIN SMALL LETTER F +<U24A1> "<U0028><U0066><U0029>";<U0066> +% PARENTHESIZED LATIN SMALL LETTER G +<U24A2> "<U0028><U0067><U0029>";<U0067> +% PARENTHESIZED LATIN SMALL LETTER H +<U24A3> "<U0028><U0068><U0029>";<U0068> +% PARENTHESIZED LATIN SMALL LETTER I +<U24A4> "<U0028><U0069><U0029>";<U0069> +% PARENTHESIZED LATIN SMALL LETTER J +<U24A5> "<U0028><U006A><U0029>";<U006A> +% PARENTHESIZED LATIN SMALL LETTER K +<U24A6> "<U0028><U006B><U0029>";<U006B> +% PARENTHESIZED LATIN SMALL LETTER L +<U24A7> "<U0028><U006C><U0029>";<U006C> +% PARENTHESIZED LATIN SMALL LETTER M +<U24A8> "<U0028><U006D><U0029>";<U006D> +% PARENTHESIZED LATIN SMALL LETTER N +<U24A9> "<U0028><U006E><U0029>";<U006E> +% PARENTHESIZED LATIN SMALL LETTER O +<U24AA> "<U0028><U006F><U0029>";<U006F> +% PARENTHESIZED LATIN SMALL LETTER P +<U24AB> "<U0028><U0070><U0029>";<U0070> +% PARENTHESIZED LATIN SMALL LETTER Q +<U24AC> "<U0028><U0071><U0029>";<U0071> +% PARENTHESIZED LATIN SMALL LETTER R +<U24AD> "<U0028><U0072><U0029>";<U0072> +% PARENTHESIZED LATIN SMALL LETTER S +<U24AE> "<U0028><U0073><U0029>";<U0073> +% PARENTHESIZED LATIN SMALL LETTER T +<U24AF> "<U0028><U0074><U0029>";<U0074> +% PARENTHESIZED LATIN SMALL LETTER U +<U24B0> "<U0028><U0075><U0029>";<U0075> +% PARENTHESIZED LATIN SMALL LETTER V +<U24B1> "<U0028><U0076><U0029>";<U0076> +% PARENTHESIZED LATIN SMALL LETTER W +<U24B2> "<U0028><U0077><U0029>";<U0077> +% PARENTHESIZED LATIN SMALL LETTER X +<U24B3> "<U0028><U0078><U0029>";<U0078> +% PARENTHESIZED LATIN SMALL LETTER Y +<U24B4> "<U0028><U0079><U0029>";<U0079> +% PARENTHESIZED LATIN SMALL LETTER Z +<U24B5> "<U0028><U007A><U0029>";<U007A> +% CIRCLED LATIN CAPITAL LETTER A +<U24B6> "<U0028><U0041><U0029>";<U0041> +% CIRCLED LATIN CAPITAL LETTER B +<U24B7> "<U0028><U0042><U0029>";<U0042> +% CIRCLED LATIN CAPITAL LETTER C +<U24B8> "<U0028><U0043><U0029>";<U0043> +% CIRCLED LATIN CAPITAL LETTER D +<U24B9> "<U0028><U0044><U0029>";<U0044> +% CIRCLED LATIN CAPITAL LETTER E +<U24BA> "<U0028><U0045><U0029>";<U0045> +% CIRCLED LATIN CAPITAL LETTER F +<U24BB> "<U0028><U0046><U0029>";<U0046> +% CIRCLED LATIN CAPITAL LETTER G +<U24BC> "<U0028><U0047><U0029>";<U0047> +% CIRCLED LATIN CAPITAL LETTER H +<U24BD> "<U0028><U0048><U0029>";<U0048> +% CIRCLED LATIN CAPITAL LETTER I +<U24BE> "<U0028><U0049><U0029>";<U0049> +% CIRCLED LATIN CAPITAL LETTER J +<U24BF> "<U0028><U004A><U0029>";<U004A> +% CIRCLED LATIN CAPITAL LETTER K +<U24C0> "<U0028><U004B><U0029>";<U004B> +% CIRCLED LATIN CAPITAL LETTER L +<U24C1> "<U0028><U004C><U0029>";<U004C> +% CIRCLED LATIN CAPITAL LETTER M +<U24C2> "<U0028><U004D><U0029>";<U004D> +% CIRCLED LATIN CAPITAL LETTER N +<U24C3> "<U0028><U004E><U0029>";<U004E> +% CIRCLED LATIN CAPITAL LETTER O +<U24C4> "<U0028><U004F><U0029>";<U004F> +% CIRCLED LATIN CAPITAL LETTER P +<U24C5> "<U0028><U0050><U0029>";<U0050> +% CIRCLED LATIN CAPITAL LETTER Q +<U24C6> "<U0028><U0051><U0029>";<U0051> +% CIRCLED LATIN CAPITAL LETTER R +<U24C7> "<U0028><U0052><U0029>";<U0052> +% CIRCLED LATIN CAPITAL LETTER S +<U24C8> "<U0028><U0053><U0029>";<U0053> +% CIRCLED LATIN CAPITAL LETTER T +<U24C9> "<U0028><U0054><U0029>";<U0054> +% CIRCLED LATIN CAPITAL LETTER U +<U24CA> "<U0028><U0055><U0029>";<U0055> +% CIRCLED LATIN CAPITAL LETTER V +<U24CB> "<U0028><U0056><U0029>";<U0056> +% CIRCLED LATIN CAPITAL LETTER W +<U24CC> "<U0028><U0057><U0029>";<U0057> +% CIRCLED LATIN CAPITAL LETTER X +<U24CD> "<U0028><U0058><U0029>";<U0058> +% CIRCLED LATIN CAPITAL LETTER Y +<U24CE> "<U0028><U0059><U0029>";<U0059> +% CIRCLED LATIN CAPITAL LETTER Z +<U24CF> "<U0028><U005A><U0029>";<U005A> +% CIRCLED LATIN SMALL LETTER A +<U24D0> "<U0028><U0061><U0029>";<U0061> +% CIRCLED LATIN SMALL LETTER B +<U24D1> "<U0028><U0062><U0029>";<U0062> +% CIRCLED LATIN SMALL LETTER C +<U24D2> "<U0028><U0063><U0029>";<U0063> +% CIRCLED LATIN SMALL LETTER D +<U24D3> "<U0028><U0064><U0029>";<U0064> +% CIRCLED LATIN SMALL LETTER E +<U24D4> "<U0028><U0065><U0029>";<U0065> +% CIRCLED LATIN SMALL LETTER F +<U24D5> "<U0028><U0066><U0029>";<U0066> +% CIRCLED LATIN SMALL LETTER G +<U24D6> "<U0028><U0067><U0029>";<U0067> +% CIRCLED LATIN SMALL LETTER H +<U24D7> "<U0028><U0068><U0029>";<U0068> +% CIRCLED LATIN SMALL LETTER I +<U24D8> "<U0028><U0069><U0029>";<U0069> +% CIRCLED LATIN SMALL LETTER J +<U24D9> "<U0028><U006A><U0029>";<U006A> +% CIRCLED LATIN SMALL LETTER K +<U24DA> "<U0028><U006B><U0029>";<U006B> +% CIRCLED LATIN SMALL LETTER L +<U24DB> "<U0028><U006C><U0029>";<U006C> +% CIRCLED LATIN SMALL LETTER M +<U24DC> "<U0028><U006D><U0029>";<U006D> +% CIRCLED LATIN SMALL LETTER N +<U24DD> "<U0028><U006E><U0029>";<U006E> +% CIRCLED LATIN SMALL LETTER O +<U24DE> "<U0028><U006F><U0029>";<U006F> +% CIRCLED LATIN SMALL LETTER P +<U24DF> "<U0028><U0070><U0029>";<U0070> +% CIRCLED LATIN SMALL LETTER Q +<U24E0> "<U0028><U0071><U0029>";<U0071> +% CIRCLED LATIN SMALL LETTER R +<U24E1> "<U0028><U0072><U0029>";<U0072> +% CIRCLED LATIN SMALL LETTER S +<U24E2> "<U0028><U0073><U0029>";<U0073> +% CIRCLED LATIN SMALL LETTER T +<U24E3> "<U0028><U0074><U0029>";<U0074> +% CIRCLED LATIN SMALL LETTER U +<U24E4> "<U0028><U0075><U0029>";<U0075> +% CIRCLED LATIN SMALL LETTER V +<U24E5> "<U0028><U0076><U0029>";<U0076> +% CIRCLED LATIN SMALL LETTER W +<U24E6> "<U0028><U0077><U0029>";<U0077> +% CIRCLED LATIN SMALL LETTER X +<U24E7> "<U0028><U0078><U0029>";<U0078> +% CIRCLED LATIN SMALL LETTER Y +<U24E8> "<U0028><U0079><U0029>";<U0079> +% CIRCLED LATIN SMALL LETTER Z +<U24E9> "<U0028><U007A><U0029>";<U007A> +% CIRCLED DIGIT ZERO +<U24EA> "<U0028><U0030><U0029>";<U0030> +% BOX DRAWINGS LIGHT HORIZONTAL +<U2500> <U002D> +% BOX DRAWINGS HEAVY HORIZONTAL +<U2501> <U003D> +% BOX DRAWINGS LIGHT VERTICAL +<U2502> <U007C> +% BOX DRAWINGS HEAVY VERTICAL +<U2503> <U007C> +% BOX DRAWINGS LIGHT TRIPLE DASH HORIZONTAL +<U2504> <U002D> +% BOX DRAWINGS HEAVY TRIPLE DASH HORIZONTAL +<U2505> <U003D> +% BOX DRAWINGS LIGHT TRIPLE DASH VERTICAL +<U2506> <U007C> +% BOX DRAWINGS HEAVY TRIPLE DASH VERTICAL +<U2507> <U007C> +% BOX DRAWINGS LIGHT QUADRUPLE DASH HORIZONTAL +<U2508> <U002D> +% BOX DRAWINGS HEAVY QUADRUPLE DASH HORIZONTAL +<U2509> <U003D> +% BOX DRAWINGS LIGHT QUADRUPLE DASH VERTICAL +<U250A> <U007C> +% BOX DRAWINGS HEAVY QUADRUPLE DASH VERTICAL +<U250B> <U007C> +% BOX DRAWINGS LIGHT DOWN AND RIGHT +<U250C> <U002B> +% BOX DRAWINGS DOWN LIGHT AND RIGHT HEAVY +<U250D> <U002B> +% BOX DRAWINGS DOWN HEAVY AND RIGHT LIGHT +<U250E> <U002B> +% BOX DRAWINGS HEAVY DOWN AND RIGHT +<U250F> <U002B> +% BOX DRAWINGS LIGHT DOWN AND LEFT +<U2510> <U002B> +% BOX DRAWINGS DOWN LIGHT AND LEFT HEAVY +<U2511> <U002B> +% BOX DRAWINGS DOWN HEAVY AND LEFT LIGHT +<U2512> <U002B> +% BOX DRAWINGS HEAVY DOWN AND LEFT +<U2513> <U002B> +% BOX DRAWINGS LIGHT UP AND RIGHT +<U2514> <U002B> +% BOX DRAWINGS UP LIGHT AND RIGHT HEAVY +<U2515> <U002B> +% BOX DRAWINGS UP HEAVY AND RIGHT LIGHT +<U2516> <U002B> +% BOX DRAWINGS HEAVY UP AND RIGHT +<U2517> <U002B> +% BOX DRAWINGS LIGHT UP AND LEFT +<U2518> <U002B> +% BOX DRAWINGS UP LIGHT AND LEFT HEAVY +<U2519> <U002B> +% BOX DRAWINGS UP HEAVY AND LEFT LIGHT +<U251A> <U002B> +% BOX DRAWINGS HEAVY UP AND LEFT +<U251B> <U002B> +% BOX DRAWINGS LIGHT VERTICAL AND RIGHT +<U251C> <U002B> +% BOX DRAWINGS VERTICAL LIGHT AND RIGHT HEAVY +<U251D> <U002B> +% BOX DRAWINGS UP HEAVY AND RIGHT DOWN LIGHT +<U251E> <U002B> +% BOX DRAWINGS DOWN HEAVY AND RIGHT UP LIGHT +<U251F> <U002B> +% BOX DRAWINGS VERTICAL HEAVY AND RIGHT LIGHT +<U2520> <U002B> +% BOX DRAWINGS DOWN LIGHT AND RIGHT UP HEAVY +<U2521> <U002B> +% BOX DRAWINGS UP LIGHT AND RIGHT DOWN HEAVY +<U2522> <U002B> +% BOX DRAWINGS HEAVY VERTICAL AND RIGHT +<U2523> <U002B> +% BOX DRAWINGS LIGHT VERTICAL AND LEFT +<U2524> <U002B> +% BOX DRAWINGS VERTICAL LIGHT AND LEFT HEAVY +<U2525> <U002B> +% BOX DRAWINGS UP HEAVY AND LEFT DOWN LIGHT +<U2526> <U002B> +% BOX DRAWINGS DOWN HEAVY AND LEFT UP LIGHT +<U2527> <U002B> +% BOX DRAWINGS VERTICAL HEAVY AND LEFT LIGHT +<U2528> <U002B> +% BOX DRAWINGS DOWN LIGHT AND LEFT UP HEAVY +<U2529> <U002B> +% BOX DRAWINGS UP LIGHT AND LEFT DOWN HEAVY +<U252A> <U002B> +% BOX DRAWINGS HEAVY VERTICAL AND LEFT +<U252B> <U002B> +% BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +<U252C> <U002B> +% BOX DRAWINGS LEFT HEAVY AND RIGHT DOWN LIGHT +<U252D> <U002B> +% BOX DRAWINGS RIGHT HEAVY AND LEFT DOWN LIGHT +<U252E> <U002B> +% BOX DRAWINGS DOWN LIGHT AND HORIZONTAL HEAVY +<U252F> <U002B> +% BOX DRAWINGS DOWN HEAVY AND HORIZONTAL LIGHT +<U2530> <U002B> +% BOX DRAWINGS RIGHT LIGHT AND LEFT DOWN HEAVY +<U2531> <U002B> +% BOX DRAWINGS LEFT LIGHT AND RIGHT DOWN HEAVY +<U2532> <U002B> +% BOX DRAWINGS HEAVY DOWN AND HORIZONTAL +<U2533> <U002B> +% BOX DRAWINGS LIGHT UP AND HORIZONTAL +<U2534> <U002B> +% BOX DRAWINGS LEFT HEAVY AND RIGHT UP LIGHT +<U2535> <U002B> +% BOX DRAWINGS RIGHT HEAVY AND LEFT UP LIGHT +<U2536> <U002B> +% BOX DRAWINGS UP LIGHT AND HORIZONTAL HEAVY +<U2537> <U002B> +% BOX DRAWINGS UP HEAVY AND HORIZONTAL LIGHT +<U2538> <U002B> +% BOX DRAWINGS RIGHT LIGHT AND LEFT UP HEAVY +<U2539> <U002B> +% BOX DRAWINGS LEFT LIGHT AND RIGHT UP HEAVY +<U253A> <U002B> +% BOX DRAWINGS HEAVY UP AND HORIZONTAL +<U253B> <U002B> +% BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +<U253C> <U002B> +% BOX DRAWINGS LEFT HEAVY AND RIGHT VERTICAL LIGHT +<U253D> <U002B> +% BOX DRAWINGS RIGHT HEAVY AND LEFT VERTICAL LIGHT +<U253E> <U002B> +% BOX DRAWINGS VERTICAL LIGHT AND HORIZONTAL HEAVY +<U253F> <U002B> +% BOX DRAWINGS UP HEAVY AND DOWN HORIZONTAL LIGHT +<U2540> <U002B> +% BOX DRAWINGS DOWN HEAVY AND UP HORIZONTAL LIGHT +<U2541> <U002B> +% BOX DRAWINGS VERTICAL HEAVY AND HORIZONTAL LIGHT +<U2542> <U002B> +% BOX DRAWINGS LEFT UP HEAVY AND RIGHT DOWN LIGHT +<U2543> <U002B> +% BOX DRAWINGS RIGHT UP HEAVY AND LEFT DOWN LIGHT +<U2544> <U002B> +% BOX DRAWINGS LEFT DOWN HEAVY AND RIGHT UP LIGHT +<U2545> <U002B> +% BOX DRAWINGS RIGHT DOWN HEAVY AND LEFT UP LIGHT +<U2546> <U002B> +% BOX DRAWINGS DOWN LIGHT AND UP HORIZONTAL HEAVY +<U2547> <U002B> +% BOX DRAWINGS UP LIGHT AND DOWN HORIZONTAL HEAVY +<U2548> <U002B> +% BOX DRAWINGS RIGHT LIGHT AND LEFT VERTICAL HEAVY +<U2549> <U002B> +% BOX DRAWINGS LEFT LIGHT AND RIGHT VERTICAL HEAVY +<U254A> <U002B> +% BOX DRAWINGS HEAVY VERTICAL AND HORIZONTAL +<U254B> <U002B> +% BOX DRAWINGS LIGHT DOUBLE DASH HORIZONTAL +<U254C> <U002D> +% BOX DRAWINGS HEAVY DOUBLE DASH HORIZONTAL +<U254D> <U003D> +% BOX DRAWINGS LIGHT DOUBLE DASH VERTICAL +<U254E> <U007C> +% BOX DRAWINGS HEAVY DOUBLE DASH VERTICAL +<U254F> <U007C> +% BOX DRAWINGS DOUBLE HORIZONTAL +<U2550> <U003D> +% BOX DRAWINGS DOUBLE VERTICAL +<U2551> <U007C> +% BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +<U2552> <U002B> +% BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +<U2553> <U002B> +% BOX DRAWINGS DOUBLE DOWN AND RIGHT +<U2554> <U002B> +% BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +<U2555> <U002B> +% BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +<U2556> <U002B> +% BOX DRAWINGS DOUBLE DOWN AND LEFT +<U2557> <U002B> +% BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +<U2558> <U002B> +% BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +<U2559> <U002B> +% BOX DRAWINGS DOUBLE UP AND RIGHT +<U255A> <U002B> +% BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +<U255B> <U002B> +% BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +<U255C> <U002B> +% BOX DRAWINGS DOUBLE UP AND LEFT +<U255D> <U002B> +% BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +<U255E> <U002B> +% BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +<U255F> <U002B> +% BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +<U2560> <U002B> +% BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +<U2561> <U002B> +% BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +<U2562> <U002B> +% BOX DRAWINGS DOUBLE VERTICAL AND LEFT +<U2563> <U002B> +% BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +<U2564> <U002B> +% BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +<U2565> <U002B> +% BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +<U2566> <U002B> +% BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +<U2567> <U002B> +% BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +<U2568> <U002B> +% BOX DRAWINGS DOUBLE UP AND HORIZONTAL +<U2569> <U002B> +% BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +<U256A> <U002B> +% BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +<U256B> <U002B> +% BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +<U256C> <U002B> +% BOX DRAWINGS LIGHT ARC DOWN AND RIGHT +<U256D> <U002B> +% BOX DRAWINGS LIGHT ARC DOWN AND LEFT +<U256E> <U002B> +% BOX DRAWINGS LIGHT ARC UP AND LEFT +<U256F> <U002B> +% BOX DRAWINGS LIGHT ARC UP AND RIGHT +<U2570> <U002B> +% BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT +<U2571> <U002F> +% BOX DRAWINGS LIGHT DIAGONAL UPPER LEFT TO LOWER RIGHT +<U2572> <U005C> +% BOX DRAWINGS LIGHT DIAGONAL CROSS +<U2573> <U0058> +% BOX DRAWINGS LIGHT LEFT AND HEAVY RIGHT +<U257C> <U002D> +% BOX DRAWINGS LIGHT UP AND HEAVY DOWN +<U257D> <U007C> +% BOX DRAWINGS HEAVY LEFT AND LIGHT RIGHT +<U257E> <U002D> +% BOX DRAWINGS HEAVY UP AND LIGHT DOWN +<U257F> <U007C> +% WHITE CIRCLE +<U25CB> <U006F> +% WHITE BULLET +<U25E6> <U006F> +% BLACK STAR +<U2605> <U002A> +% WHITE STAR +<U2606> <U002A> +% BALLOT BOX WITH X +<U2612> <U0058> +% SALTIRE +<U2613> <U0058> +% WHITE FROWNING FACE +<U2639> "<U003A><U002D><U0028>" +% WHITE SMILING FACE +<U263A> "<U003A><U002D><U0029>" +% BLACK SMILING FACE +<U263B> "<U0028><U002D><U003A>" +% MUSIC FLAT SIGN +<U266D> <U0062> +% MUSIC SHARP SIGN +<U266F> <U0023> +% UPPER BLADE SCISSORS +<U2701> "<U0025><U003C>" +% BLACK SCISSORS +<U2702> "<U0025><U003C>" +% LOWER BLADE SCISSORS +<U2703> "<U0025><U003C>" +% WHITE SCISSORS +<U2704> "<U0025><U003C>" +% VICTORY HAND +<U270C> <U0056> +% CHECK MARK +<U2713> <U221A> +% HEAVY CHECK MARK +<U2714> <U221A> +% MULTIPLICATION X +<U2715> <U0078> +% HEAVY MULTIPLICATION X +<U2716> <U0078> +% BALLOT X +<U2717> <U0058> +% HEAVY BALLOT X +<U2718> <U0058> +% OUTLINED GREEK CROSS +<U2719> <U002B> +% HEAVY GREEK CROSS +<U271A> <U002B> +% OPEN CENTRE CROSS +<U271B> <U002B> +% HEAVY OPEN CENTRE CROSS +<U271C> <U002B> +% LATIN CROSS +<U271D> <U002B> +% SHADOWED WHITE LATIN CROSS +<U271E> <U002B> +% OUTLINED LATIN CROSS +<U271F> <U002B> +% MALTESE CROSS +<U2720> <U002B> +% STAR OF DAVID +<U2721> <U002A> +% FOUR TEARDROP-SPOKED ASTERISK +<U2722> <U002B> +% FOUR BALLOON-SPOKED ASTERISK +<U2723> <U002B> +% HEAVY FOUR BALLOON-SPOKED ASTERISK +<U2724> <U002B> +% FOUR CLUB-SPOKED ASTERISK +<U2725> <U002B> +% BLACK FOUR POINTED STAR +<U2726> <U002B> +% WHITE FOUR POINTED STAR +<U2727> <U002B> +% STRESS OUTLINED WHITE STAR +<U2729> <U002A> +% CIRCLED WHITE STAR +<U272A> <U002A> +% OPEN CENTRE BLACK STAR +<U272B> <U002A> +% BLACK CENTRE WHITE STAR +<U272C> <U002A> +% OUTLINED BLACK STAR +<U272D> <U002A> +% HEAVY OUTLINED BLACK STAR +<U272E> <U002A> +% PINWHEEL STAR +<U272F> <U002A> +% SHADOWED WHITE STAR +<U2730> <U002A> +% HEAVY ASTERISK +<U2731> <U002A> +% OPEN CENTRE ASTERISK +<U2732> <U002A> +% EIGHT SPOKED ASTERISK +<U2733> <U002A> +% EIGHT POINTED BLACK STAR +<U2734> <U002A> +% EIGHT POINTED PINWHEEL STAR +<U2735> <U002A> +% SIX POINTED BLACK STAR +<U2736> <U002A> +% EIGHT POINTED RECTILINEAR BLACK STAR +<U2737> <U002A> +% HEAVY EIGHT POINTED RECTILINEAR BLACK STAR +<U2738> <U002A> +% TWELVE POINTED BLACK STAR +<U2739> <U002A> +% SIXTEEN POINTED ASTERISK +<U273A> <U002A> +% TEARDROP-SPOKED ASTERISK +<U273B> <U002A> +% OPEN CENTRE TEARDROP-SPOKED ASTERISK +<U273C> <U002A> +% HEAVY TEARDROP-SPOKED ASTERISK +<U273D> <U002A> +% SIX PETALLED BLACK AND WHITE FLORETTE +<U273E> <U002A> +% BLACK FLORETTE +<U273F> <U002A> +% WHITE FLORETTE +<U2740> <U002A> +% EIGHT PETALLED OUTLINED BLACK FLORETTE +<U2741> <U002A> +% CIRCLED OPEN CENTRE EIGHT POINTED STAR +<U2742> <U002A> +% HEAVY TEARDROP-SPOKED PINWHEEL ASTERISK +<U2743> <U002A> +% SNOWFLAKE +<U2744> <U002A> +% TIGHT TRIFOLIATE SNOWFLAKE +<U2745> <U002A> +% HEAVY CHEVRON SNOWFLAKE +<U2746> <U002A> +% SPARKLE +<U2747> <U002A> +% HEAVY SPARKLE +<U2748> <U002A> +% BALLOON-SPOKED ASTERISK +<U2749> <U002A> +% EIGHT TEARDROP-SPOKED PROPELLER ASTERISK +<U274A> <U002A> +% HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK +<U274B> <U002A> +% LATIN SMALL LIGATURE FF +<UFB00> "<U0066><U0066>" +% LATIN SMALL LIGATURE FI +<UFB01> "<U0066><U0069>" +% LATIN SMALL LIGATURE FL +<UFB02> "<U0066><U006C>" +% LATIN SMALL LIGATURE FFI +<UFB03> "<U0066><U0066><U0069>" +% LATIN SMALL LIGATURE FFL +<UFB04> "<U0066><U0066><U006C>" +% LATIN SMALL LIGATURE LONG S T +<UFB05> "<U017F><U0074>";"<U0073><U0074>" +% LATIN SMALL LIGATURE ST +<UFB06> "<U0073><U0074>" +% ZERO WIDTH NO-BREAK SPACE +<UFEFF> "" +% REPLACEMENT CHARACTER +<UFFFD> <U003F> @@ -3,5 +3,6 @@ # Test Description DataDir iconv Iconv initialisation/finalisation nullable Handling of nullable input sequences +translit Handling of transliteration # Regression tests diff --git a/test/Makefile b/test/Makefile index 89b8093..c5d298f 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,4 +1,4 @@ # Tests -DIR_TEST_ITEMS := iconv:iconv.c nullable:nullable.c +DIR_TEST_ITEMS := iconv:iconv.c nullable:nullable.c translit:translit.c include $(NSBUILD)/Makefile.subdir diff --git a/test/translit.c b/test/translit.c new file mode 100644 index 0000000..8f17889 --- /dev/null +++ b/test/translit.c @@ -0,0 +1,94 @@ +#include <errno.h> +#include <stdio.h> +#include <string.h> + +#include <iconv/iconv.h> +#include <iconv-internal/iconv.h> + +#include "testutils.h" + +#ifdef __riscos__ +#define ALIASES_FILE "Files.Aliases" +#else +#define ALIASES_FILE "Files/Aliases" +#endif + +typedef struct translit_testcase { + const char *to_charset; + const char *source; + const char *expected; +} translit_testcase; + +static const translit_testcase tests[] = { + { "iso-8859-1//TRANSLIT", "\xe2\x80\x93", "-" }, + { NULL, NULL, NULL } +}; + +static void run_test(const translit_testcase *test) +{ + iconv_t cd; + char out[128]; + char *inp = (char *) test->source, *outp = out; + size_t inlen = strlen(inp), outlen = sizeof(out); + size_t read; + + cd = iconv_open(test->to_charset, "utf-8"); + assert(cd != (iconv_t) -1); + + read = iconv(cd, &inp, &inlen, &outp, &outlen); + assert(read == 0); + + assert(sizeof(out) - outlen == strlen(test->expected)); + assert(memcmp(out, test->expected, sizeof(out) - outlen) == 0); + + iconv_close(cd); +} + +static void run_tests(void) +{ + int index; + + for (index = 0; tests[index].to_charset != NULL; index++) { + run_test(&tests[index]); + } +} + +int main(int argc, char **argv) +{ + const char *ucpath; + int alen; + char aliases[4096]; + + UNUSED(argc); + UNUSED(argv); + +#ifdef __riscos__ + ucpath = "Unicode:"; +#else + ucpath = getenv("UNICODE_DIR"); +#endif + + assert(ucpath != NULL); + + strncpy(aliases, ucpath, sizeof(aliases)); + alen = strlen(aliases); +#ifndef __riscos__ + if (aliases[alen - 1] != '/') { + strncat(aliases, "/", sizeof(aliases) - alen - 1); + alen += 1; + } +#endif + strncat(aliases, ALIASES_FILE, sizeof(aliases) - alen - 1); + aliases[sizeof(aliases) - 1] = '\0'; + + assert(iconv_initialise(aliases) == 1); + + run_tests(); + + iconv_finalise(); + + printf("PASS\n"); + + return 0; +} + |