From 90bab1fefd7d631f63ea0e60b5ad496ff7c100d0 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Thu, 11 Jun 2009 22:51:12 +0000 Subject: Windows sucks. svn path=/trunk/iconv/; revision=7770 --- doc/Uni->iconv | 204 --------------------------------------------------------- doc/Uni-iconv | 204 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 204 insertions(+), 204 deletions(-) delete mode 100644 doc/Uni->iconv create mode 100644 doc/Uni-iconv diff --git a/doc/Uni->iconv b/doc/Uni->iconv deleted file mode 100644 index caea2d0..0000000 --- a/doc/Uni->iconv +++ /dev/null @@ -1,204 +0,0 @@ -Introduction: -============= - -This file documents an approximate correlation between the data files -provided in the !Unicode distribution and the encoding headers in GNU -libiconv 1.9.1. - -Those with '?' in the iconv column either are not represented in iconv -or I've missed the relevant header file ;) - -A number of encodings are present in the iconv distribution but not -in !Unicode. These are documented at the end of this file. - -Changelog: -========== - -v 0.01 (09-Sep-2004) -~~~~~~~~~~~~~~~~~~~~ -Initial Incarnation - -v 0.02 (11-Sep-2004) -~~~~~~~~~~~~~~~~~~~~ -Documented additional encodings supported by the Iconv module. -Corrected list of !Unicode deficiencies. - - -!Unicode->iconv: -================ - -Unicode: iconv: notes: - -Acorn.Latin1 riscos1.h - -Apple.CentEuro mac_centraleurope.h -Apple.Cyrillic mac_cyrillic.h -Apple.Roman mac_roman.h -Apple.Ukrainian mac_ukraine.h - -BigFive big5.h - -ISO2022.C0.40[ISO646] ? - -ISO2022.C1.43[IS6429] ? - -ISO2022.G94.40[646old] iso646_cn.h -ISO2022.G94.41[646-GB] ? -ISO2022.G94.42[646IRV] ? -ISO2022.G94.43[FinSwe] ? -ISO2022.G94.47[646-SE] ? -ISO2022.G94.48[646-SE] ? -ISO2022.G94.49[JS201K] jisx0201.h top of JIS range -ISO2022.G94.4A[JS201R] jisx0201.h iso646_jp.h bottom of JIS range -ISO2022.G94.4B[646-DE] ? -ISO2022.G94.4C[646-PT] ? -ISO2022.G94.54[GB1988] ? -ISO2022.G94.56[Teltxt] ? -ISO2022.G94.59[646-IT] ? -ISO2022.G94.5A[646-ES] ? -ISO2022.G94.60[646-NO] ? -ISO2022.G94.66[646-FR] ? -ISO2022.G94.69[646-HU] ? -ISO2022.G94.6B[Arabic] ? -ISO2022.G94.6C[IS6397] ? -ISO2022.G94.7A[SerbCr] ? - -ISO2022.G94x94.40[JS6226] ? -ISO2022.G94x94.41[GB2312] gb2312.h -ISO2022.G94x94.42[JIS208] jis0x208.h -ISO2022.G94x94.43[KS1001] ksc5601.h -ISO2022.G94x94.44[JIS212] jis0x212.h -ISO2022.G94x94.47[CNS1] cns11643_1.h the tables differ -ISO2022.G94x94.48[CNS2] cns11643_2.h -ISO2022.G94x94.49[CNS3] cns11643_3.h -ISO2022.G94x94.4A[CNS4] cns11643_4.h -ISO2022.G94x94.4B[CNS5] cns11643_5.h -ISO2022.G94x94.4C[CNS6] cns11643_6.h -ISO2022.G94x94.4D[CNS7] cns11643_7.h - -ISO2022.G96.41[Lat1] iso8859_1.h -ISO2022.G96.42[Lat2] iso8859_2.h -ISO2022.G96.43[Lat3] iso8859_3.h -ISO2022.G96.44[Lat4] iso8859_4.h -ISO2022.G96.46[Greek] ? -ISO2022.G96.47[Arabic] iso8859_6.h ISO-8859-6 ignored -ISO2022.G96.48[Hebrew] ? -ISO2022.G96.4C[Cyrill] ? -ISO2022.G96.4D[Lat5] iso8859_5.h -ISO2022.G96.50[LatSup] ? -ISO2022.G96.52[IS6397] ? -ISO2022.G96.54[Thai] tis620.h -ISO2022.G96.56[Lat6] iso8859_6.h -ISO2022.G96.58[L6Sami] ? -ISO2022.G96.59[Lat7] iso8859_7.h -ISO2022.G96.5C[Welsh] ? -ISO2022.G96.5D[Sami] ? -ISO2022.G96.5E[Hebrew] ? -ISO2022.G96.5F[Lat8] iso8859_8.h -ISO2022.G96.62[Lat9] iso8859_9.h - -KOI8-R koi8_r.h - -Microsoft.CP1250 cp1250.h -Microsoft.CP1251 cp1251.h -Microsoft.CP1252 cp1252.h -Microsoft.CP1254 cp1254.h -Microsoft.CP866 cp866.h -Microsoft.CP932 cp932.h cp932ext.h - -iconv->!Unicode: -================ - -Iconv has the following encodings, which are not present in !Unicode. -Providing a suitable data file for !Unicode is trivial. Whether UnicodeLib -will then act upon the addition of these is unknown. -This list is ordered as per libiconv's NOTES file. - -European & Semitic languages: - - ISO-8859-16 (iso8859_16.h) - KOI8-{U,RU,T} (koi8_xx.h) - CP125{3,5,6,7} (cp125n.h) - CP850 (cp850.h) - CP862 (cp862.h) - Mac{Croatian,Romania,Greek,Turkish,Hebrew,Arabic} (mac_foo.h) - -Japanese: - - None afaikt. - -Simplified Chinese: - - GB18030 (gb18030.h, gb18030ext.h) - HZ-GB-2312 (hz.h) - -Traditional Chinese: - - CP950 (cp950.h) - BIG5-HKSCS (big5hkscs.h) - -Korean: - - CP949 (cp949.h) - -Armenian: - - ARMSCII-8 (armscii_8.h) - -Georgian: - - Georgian-Academy, Georgian-PS (georgian_academy.h, georgian_ps.h) - -Thai: - - CP874 (cp874.h) - MacThai (mac_thai.h) - -Laotian: - - MuleLao-1, CP1133 (mulelao.h, cp1133.h) - -Vietnamese: - - VISCII, TCVN (viscii.h, tcvn.h) - CP1258 (cp1258.h) - -Unicode: - - BE/LE variants of normal encodings. I assume UnicodeLib handles - these, but can't be sure. - C99 / JAVA - well, yes. - - -Iconv Module: -============= - -The iconv module is effectively a thin veneer around UnicodeLib. However, -8bit encodings are implemented within the module rather than using the -support in UnicodeLib. The rationale for this is simply that, although -UnicodeLib will understand (and act upon - reportedly...) additions to -the ISO2022 Unicode resource, other encodings are ignored. As the vast -majority of outstanding encodings fall into this category, and the code -is fairly simple, it made sense to implement it within the module. - -With use of the iconv module, the list of outstanding encodings is -reduced to: - - CP1255 (requires state-based transcoding) - - GB18030 (not 8bit - reportedly a requirement of PRC) - HZ-GB-2312 (not 8bit - supported by IE4) - - CP950 (not 8bit - a (MS) variant of Big5) - BIG5-HKSCS (not 8bit - again, a Big5 variant) - - CP949 (not 8bit) - - ARMSCII-8 (easily implemented, if required) - - VISCII (easily implemented, if required) - CP1258, TCVN (requires state-based transcoding) - -Additionally, the rest of the CodePage encodings implemented in iconv -but not listed above (due to omissions from the iconv documentation) -are implemented by the iconv module. diff --git a/doc/Uni-iconv b/doc/Uni-iconv new file mode 100644 index 0000000..caea2d0 --- /dev/null +++ b/doc/Uni-iconv @@ -0,0 +1,204 @@ +Introduction: +============= + +This file documents an approximate correlation between the data files +provided in the !Unicode distribution and the encoding headers in GNU +libiconv 1.9.1. + +Those with '?' in the iconv column either are not represented in iconv +or I've missed the relevant header file ;) + +A number of encodings are present in the iconv distribution but not +in !Unicode. These are documented at the end of this file. + +Changelog: +========== + +v 0.01 (09-Sep-2004) +~~~~~~~~~~~~~~~~~~~~ +Initial Incarnation + +v 0.02 (11-Sep-2004) +~~~~~~~~~~~~~~~~~~~~ +Documented additional encodings supported by the Iconv module. +Corrected list of !Unicode deficiencies. + + +!Unicode->iconv: +================ + +Unicode: iconv: notes: + +Acorn.Latin1 riscos1.h + +Apple.CentEuro mac_centraleurope.h +Apple.Cyrillic mac_cyrillic.h +Apple.Roman mac_roman.h +Apple.Ukrainian mac_ukraine.h + +BigFive big5.h + +ISO2022.C0.40[ISO646] ? + +ISO2022.C1.43[IS6429] ? + +ISO2022.G94.40[646old] iso646_cn.h +ISO2022.G94.41[646-GB] ? +ISO2022.G94.42[646IRV] ? +ISO2022.G94.43[FinSwe] ? +ISO2022.G94.47[646-SE] ? +ISO2022.G94.48[646-SE] ? +ISO2022.G94.49[JS201K] jisx0201.h top of JIS range +ISO2022.G94.4A[JS201R] jisx0201.h iso646_jp.h bottom of JIS range +ISO2022.G94.4B[646-DE] ? +ISO2022.G94.4C[646-PT] ? +ISO2022.G94.54[GB1988] ? +ISO2022.G94.56[Teltxt] ? +ISO2022.G94.59[646-IT] ? +ISO2022.G94.5A[646-ES] ? +ISO2022.G94.60[646-NO] ? +ISO2022.G94.66[646-FR] ? +ISO2022.G94.69[646-HU] ? +ISO2022.G94.6B[Arabic] ? +ISO2022.G94.6C[IS6397] ? +ISO2022.G94.7A[SerbCr] ? + +ISO2022.G94x94.40[JS6226] ? +ISO2022.G94x94.41[GB2312] gb2312.h +ISO2022.G94x94.42[JIS208] jis0x208.h +ISO2022.G94x94.43[KS1001] ksc5601.h +ISO2022.G94x94.44[JIS212] jis0x212.h +ISO2022.G94x94.47[CNS1] cns11643_1.h the tables differ +ISO2022.G94x94.48[CNS2] cns11643_2.h +ISO2022.G94x94.49[CNS3] cns11643_3.h +ISO2022.G94x94.4A[CNS4] cns11643_4.h +ISO2022.G94x94.4B[CNS5] cns11643_5.h +ISO2022.G94x94.4C[CNS6] cns11643_6.h +ISO2022.G94x94.4D[CNS7] cns11643_7.h + +ISO2022.G96.41[Lat1] iso8859_1.h +ISO2022.G96.42[Lat2] iso8859_2.h +ISO2022.G96.43[Lat3] iso8859_3.h +ISO2022.G96.44[Lat4] iso8859_4.h +ISO2022.G96.46[Greek] ? +ISO2022.G96.47[Arabic] iso8859_6.h ISO-8859-6 ignored +ISO2022.G96.48[Hebrew] ? +ISO2022.G96.4C[Cyrill] ? +ISO2022.G96.4D[Lat5] iso8859_5.h +ISO2022.G96.50[LatSup] ? +ISO2022.G96.52[IS6397] ? +ISO2022.G96.54[Thai] tis620.h +ISO2022.G96.56[Lat6] iso8859_6.h +ISO2022.G96.58[L6Sami] ? +ISO2022.G96.59[Lat7] iso8859_7.h +ISO2022.G96.5C[Welsh] ? +ISO2022.G96.5D[Sami] ? +ISO2022.G96.5E[Hebrew] ? +ISO2022.G96.5F[Lat8] iso8859_8.h +ISO2022.G96.62[Lat9] iso8859_9.h + +KOI8-R koi8_r.h + +Microsoft.CP1250 cp1250.h +Microsoft.CP1251 cp1251.h +Microsoft.CP1252 cp1252.h +Microsoft.CP1254 cp1254.h +Microsoft.CP866 cp866.h +Microsoft.CP932 cp932.h cp932ext.h + +iconv->!Unicode: +================ + +Iconv has the following encodings, which are not present in !Unicode. +Providing a suitable data file for !Unicode is trivial. Whether UnicodeLib +will then act upon the addition of these is unknown. +This list is ordered as per libiconv's NOTES file. + +European & Semitic languages: + + ISO-8859-16 (iso8859_16.h) + KOI8-{U,RU,T} (koi8_xx.h) + CP125{3,5,6,7} (cp125n.h) + CP850 (cp850.h) + CP862 (cp862.h) + Mac{Croatian,Romania,Greek,Turkish,Hebrew,Arabic} (mac_foo.h) + +Japanese: + + None afaikt. + +Simplified Chinese: + + GB18030 (gb18030.h, gb18030ext.h) + HZ-GB-2312 (hz.h) + +Traditional Chinese: + + CP950 (cp950.h) + BIG5-HKSCS (big5hkscs.h) + +Korean: + + CP949 (cp949.h) + +Armenian: + + ARMSCII-8 (armscii_8.h) + +Georgian: + + Georgian-Academy, Georgian-PS (georgian_academy.h, georgian_ps.h) + +Thai: + + CP874 (cp874.h) + MacThai (mac_thai.h) + +Laotian: + + MuleLao-1, CP1133 (mulelao.h, cp1133.h) + +Vietnamese: + + VISCII, TCVN (viscii.h, tcvn.h) + CP1258 (cp1258.h) + +Unicode: + + BE/LE variants of normal encodings. I assume UnicodeLib handles + these, but can't be sure. + C99 / JAVA - well, yes. + + +Iconv Module: +============= + +The iconv module is effectively a thin veneer around UnicodeLib. However, +8bit encodings are implemented within the module rather than using the +support in UnicodeLib. The rationale for this is simply that, although +UnicodeLib will understand (and act upon - reportedly...) additions to +the ISO2022 Unicode resource, other encodings are ignored. As the vast +majority of outstanding encodings fall into this category, and the code +is fairly simple, it made sense to implement it within the module. + +With use of the iconv module, the list of outstanding encodings is +reduced to: + + CP1255 (requires state-based transcoding) + + GB18030 (not 8bit - reportedly a requirement of PRC) + HZ-GB-2312 (not 8bit - supported by IE4) + + CP950 (not 8bit - a (MS) variant of Big5) + BIG5-HKSCS (not 8bit - again, a Big5 variant) + + CP949 (not 8bit) + + ARMSCII-8 (easily implemented, if required) + + VISCII (easily implemented, if required) + CP1258, TCVN (requires state-based transcoding) + +Additionally, the rest of the CodePage encodings implemented in iconv +but not listed above (due to omissions from the iconv documentation) +are implemented by the iconv module. -- cgit v1.2.3