summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2008-11-10 18:43:09 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2008-11-10 18:43:09 +0000
commitf8d8287cdbd7da9cd9392bcddf04860a10fa598e (patch)
tree668b4cc601fdfd050a51095d4f9bbebef9eaffec
downloadiconv-f8d8287cdbd7da9cd9392bcddf04860a10fa598e.tar.gz
iconv-f8d8287cdbd7da9cd9392bcddf04860a10fa598e.tar.bz2
Import Iconv sources
svn path=/trunk/iconv/; revision=5677
-rw-r--r--COPYING19
-rw-r--r--Makefile45
-rw-r--r--Makefile-riscos52
-rw-r--r--Makefile-ronative56
-rw-r--r--README43
-rw-r--r--basic/IconvEg,ffbbin0 -> 2441 bytes
-rw-r--r--basic/IconvTest,ffbbin0 -> 1204 bytes
-rw-r--r--build/Makefile.common158
-rw-r--r--build/Makefile.config4
-rw-r--r--doc/API132
-rw-r--r--doc/ChangeLog71
-rw-r--r--doc/Uni->iconv205
-rw-r--r--include/iconv/iconv.h87
l---------include/unicode1
-rw-r--r--libiconv.pc.in10
-rw-r--r--module/Mk80
-rw-r--r--module/errors.h11
-rw-r--r--module/header.cmhg18
-rw-r--r--module/menu.c618
-rw-r--r--module/module.c192
-rw-r--r--module/module.h24
-rw-r--r--module/stubs.c102
-rw-r--r--riscos/!Boot/Resources/!Unicode/!Boot,feb5
-rw-r--r--riscos/!Boot/Resources/!Unicode/!Help1
-rw-r--r--riscos/!Boot/Resources/!Unicode/!Run,feb5
-rw-r--r--riscos/!Boot/Resources/!Unicode/!Sprites,ff9bin0 -> 5780 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/!Sprites11,ff9bin0 -> 11132 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/!Sprites22,ff9bin0 -> 7324 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/Acorn/Latin1bin0 -> 256 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/Apple/CentEurobin0 -> 256 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/Apple/Cyrillicbin0 -> 256 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/Apple/Romanbin0 -> 256 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/Apple/Ukrainianbin0 -> 256 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/BigFivebin0 -> 29516 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/C0/40[ISO646]bin0 -> 64 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/C1/43[IS6429]bin0 -> 64 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/40[646old]bin0 -> 188 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/41[646-GB]bin0 -> 188 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/42[646IRV]bin0 -> 188 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/43[FinSwe]bin0 -> 188 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/47[646-SE]bin0 -> 188 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/48[646-SE]bin0 -> 188 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/49[JS201K]1
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/4A[JS201R]bin0 -> 188 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/4B[646-DE]bin0 -> 188 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/4C[646-PT]bin0 -> 188 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/54[GB1988]bin0 -> 188 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/56[Teltxt]bin0 -> 188 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/59[646-IT]bin0 -> 188 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/5A[646-ES]bin0 -> 188 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/60[646-NO]bin0 -> 188 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/66[646-FR]bin0 -> 188 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/69[646-HU]bin0 -> 188 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/6B[Arabic]bin0 -> 188 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/6C[IS6937]bin0 -> 188 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/7A[SerbCr]bin0 -> 188 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/40[JS6226]bin0 -> 17672 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/41[GB2312]bin0 -> 17672 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/42[JIS208]bin0 -> 17672 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/43[KS1001]bin0 -> 17672 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/44[JIS212]bin0 -> 17672 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/47[CNS1]bin0 -> 17672 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/48[CNS2]bin0 -> 17672 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/49[CNS3]bin0 -> 17672 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/4A[CNS4]bin0 -> 17672 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/4B[CNS5]bin0 -> 17672 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/4C[CNS6]bin0 -> 17672 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/4D[CNS7]2
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/41[Lat1]bin0 -> 192 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/42[Lat2]bin0 -> 192 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/43[Lat3]bin0 -> 192 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/44[Lat4]bin0 -> 192 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/46[Greek]bin0 -> 192 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/47[Arabic]bin0 -> 192 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/48[Hebrew]bin0 -> 192 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/4C[Cyrill]bin0 -> 192 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/4D[Lat5]bin0 -> 192 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/50[LatSup]bin0 -> 192 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/52[IS6937]bin0 -> 192 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/54[Thai]bin0 -> 192 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/56[Lat6]bin0 -> 192 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/58[L6Sami]bin0 -> 192 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/59[Lat7]bin0 -> 192 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/5C[Welsh]bin0 -> 192 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/5D[Sami]bin0 -> 192 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/5E[Hebrew]bin0 -> 192 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/5F[Lat8]bin0 -> 192 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/62[Lat9]bin0 -> 192 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/KOI8-Rbin0 -> 256 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1250bin0 -> 256 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1251bin0 -> 256 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1252bin0 -> 256 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1253bin0 -> 256 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1254bin0 -> 256 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1256bin0 -> 256 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP866bin0 -> 256 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP874bin0 -> 256 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP932bin0 -> 996 bytes
-rw-r--r--riscos/!Boot/Resources/!Unicode/Files/Aliases302
-rw-r--r--riscos/ReadMe47
-rw-r--r--src/Makefile49
-rw-r--r--src/alias.c89
-rw-r--r--src/aliases.c364
-rw-r--r--src/eightbit.c280
-rw-r--r--src/iconv.c457
-rw-r--r--src/internal.h58
-rw-r--r--src/utils.c53
-rw-r--r--test/INDEX5
-rw-r--r--test/Makefile103
-rw-r--r--test/README84
-rw-r--r--test/data/Aliases302
-rw-r--r--test/testrunner.pl167
-rw-r--r--test/testutils.h123
-rw-r--r--unicode/ReadMe23
114 files changed, 4448 insertions, 0 deletions
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..0c811fa
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,19 @@
+Copyright (C) 2004-8 J-M Bell
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..0c02248
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,45 @@
+# Toolchain definitions for building on the destination platform
+CC := gcc
+AR := ar
+LD := gcc
+
+CP := cp
+RM := rm
+MKDIR := mkdir
+MV := mv
+ECHO := echo
+MAKE := make
+PERL := perl
+PKGCONFIG := pkg-config
+INSTALL := install
+SED := sed
+TOUCH := touch
+LCOV := lcov
+GENHTML := genhtml
+
+# Toolchain flags
+WARNFLAGS := -Wall -Wextra -Wundef -Wpointer-arith -Wcast-align \
+ -Wwrite-strings -Wstrict-prototypes -Wmissing-prototypes \
+ -Wmissing-declarations -Wnested-externs -Werror -pedantic
+CFLAGS = -std=c99 -D_BSD_SOURCE -I$(TOP)/include/ $(WARNFLAGS)
+RELEASECFLAGS = $(CFLAGS) -DNDEBUG -O2
+DEBUGCFLAGS = $(CFLAGS) -O0 -g
+ARFLAGS := -cru
+LDFLAGS = -L$(TOP)/
+
+CPFLAGS :=
+RMFLAGS := -f
+MKDIRFLAGS := -p
+MVFLAGS :=
+ECHOFLAGS :=
+MAKEFLAGS :=
+PKGCONFIGFLAGS :=
+TOUCHFLAGS :=
+
+EXEEXT :=
+
+# Default installation prefix
+PREFIX ?= /usr/local
+
+
+include build/Makefile.common
diff --git a/Makefile-riscos b/Makefile-riscos
new file mode 100644
index 0000000..d7c938b
--- /dev/null
+++ b/Makefile-riscos
@@ -0,0 +1,52 @@
+# Toolchain definitions for building for RISC OS using the GCCSDK cross-compiler
+GCCSDK_INSTALL_CROSSBIN ?= /home/riscos/cross/bin
+GCCSDK_INSTALL_ENV ?= /home/riscos/env
+
+CC := $(wildcard $(GCCSDK_INSTALL_CROSSBIN)/*gcc)
+AR := $(wildcard $(GCCSDK_INSTALL_CROSSBIN)/*ar)
+LD := $(CC)
+
+CP := cp
+RM := rm
+MKDIR := mkdir
+MV := mv
+ECHO := echo
+MAKE := make
+PERL := perl
+PKGCONFIG := $(GCCSDK_INSTALL_ENV)/ro-pkg-config
+INSTALL := install
+SED := sed
+TOUCH := touch
+LCOV := echo
+GENHTML := echo
+
+# Toolchain flags
+WARNFLAGS := -Wall -Wextra -Wundef -Wpointer-arith -Wcast-align \
+ -Wwrite-strings -Wstrict-prototypes -Wmissing-prototypes \
+ -Wmissing-declarations -Wnested-externs -Werror -pedantic
+CFLAGS = -std=c99 -D_BSD_SOURCE -I$(TOP)/include/ $(WARNFLAGS) \
+ -mpoke-function-name
+RELEASECFLAGS = $(CFLAGS) -DNDEBUG -O2
+DEBUGCFLAGS = $(CFLAGS) -O0 -g
+ARFLAGS := -cru
+LDFLAGS = -L$(TOP)/
+
+CPFLAGS :=
+RMFLAGS := -f
+MKDIRFLAGS := -p
+MVFLAGS :=
+ECHOFLAGS :=
+MAKEFLAGS :=
+PKGCONFIGFLAGS :=
+TOUCHFLAGS :=
+
+ifneq (,$(findstring arm-unknown-riscos-gcc,$(CC)))
+ EXEEXT := ,e1f
+else
+ EXEEXT := ,ff8
+endif
+
+# Default installation prefix
+PREFIX ?= $(GCCSDK_INSTALL_ENV)
+
+include build/Makefile.common
diff --git a/Makefile-ronative b/Makefile-ronative
new file mode 100644
index 0000000..39dac7f
--- /dev/null
+++ b/Makefile-ronative
@@ -0,0 +1,56 @@
+# Toolchain definitions for building on RISC OS
+GCCSDK_INSTALL_ENV ?= <NSLibs$$Dir>
+
+CC := gcc
+AR := ar
+LD := gcc
+
+CP := cp
+RM := rm
+MKDIR := mkdir
+MV := mv
+ECHO := echo
+MAKE := make
+PERL := perl
+PKGCONFIG := echo
+INSTALL := echo
+SED := sed
+TOUCH := touch
+LCOV := echo
+GENHTML := echo
+
+# Toolchain flags
+WARNFLAGS := -Wall -Wextra -Wundef -Wpointer-arith -Wcast-align \
+ -Wwrite-strings -Wstrict-prototypes -Wmissing-prototypes \
+ -Wmissing-declarations -Wnested-externs -Werror -pedantic
+CFLAGS = -std=c99 -D_BSD_SOURCE -I$(TOP)/include/ $(WARNFLAGS) \
+ -mpoke-function-name
+RELEASECFLAGS = $(CFLAGS) -DNDEBUG -O2
+DEBUGCFLAGS = $(CFLAGS) -O0 -g
+ARFLAGS := -cru
+LDFLAGS = -L$(TOP)/
+
+CPFLAGS :=
+RMFLAGS := -f
+MKDIRFLAGS := -p
+MVFLAGS :=
+ECHOFLAGS :=
+MAKEFLAGS :=
+PKGCONFIGFLAGS :=
+TOUCHFLAGS :=
+
+EXEEXT :=
+
+# Default installation prefix
+PREFIX ?= $(GCCSDK_INSTALL_ENV)
+
+# This is nasty, but needed because $(CURDIR) will
+# contain colons, and thus confuse make mightily
+$(shell SetMacro Alias$$Iconvpwd Set %0 <FileSwitch$$CurrentFilingSystem>:|<FileSwitch$$<FileSwitch$$CurrentFilingSystem>$$CSD>|mUnset Alias$$Iconvpwd)
+$(shell Iconvpwd Iconv$$Dir)
+TOP := <Iconv$$Dir>
+
+# Tell everyone that we're building on RISC OS, so they can work around things.
+HOST := riscos
+
+include build/Makefile.common
diff --git a/README b/README
new file mode 100644
index 0000000..e35b63f
--- /dev/null
+++ b/README
@@ -0,0 +1,43 @@
+Iconv -- a character set conversion library and RISC OS module
+==============================================================
+
+Overview
+--------
+
+ Iconv is a library which provides character set conversion through an
+ implementation of the C iconv() function. It also provides a RISC OS
+ module.
+
+Requirements
+------------
+
+ Iconv requires the following tools:
+
+ + A C99 capable C compiler
+ + GNU make or compatible
+ + The RISC OS Unicode Library
+ + Perl (for the testcases)
+ + Pkg-config (for the testcases)
+
+Compilation
+-----------
+
+ If necessary, modify the toolchain settings in the Makefile.
+ Invoke make:
+ $ make
+
+Verification
+------------
+
+ To verify that the library is working, it is necessary to specify a
+ different makefile target than that used for normal compilation, thus:
+
+ $ make test
+
+API documentation
+-----------------
+
+ Currently, there is none. However, the code is well commented and the
+ public API may be found in the "include" directory. The testcase sources
+ may also be of use in working out how to use it.
+
diff --git a/basic/IconvEg,ffb b/basic/IconvEg,ffb
new file mode 100644
index 0000000..44c4757
--- /dev/null
+++ b/basic/IconvEg,ffb
Binary files differ
diff --git a/basic/IconvTest,ffb b/basic/IconvTest,ffb
new file mode 100644
index 0000000..e1a4dfa
--- /dev/null
+++ b/basic/IconvTest,ffb
Binary files differ
diff --git a/build/Makefile.common b/build/Makefile.common
new file mode 100644
index 0000000..7da0315
--- /dev/null
+++ b/build/Makefile.common
@@ -0,0 +1,158 @@
+# Top-level Makefile fragment
+
+# Default target
+all: release
+
+# Name of component
+COMPONENT := libiconv
+
+# Environment
+TOP ?= $(CURDIR)
+EXPORT := $(TOP)/dist
+RELEASEDIR := build/Release
+DEBUGDIR := build/Debug
+COVERAGEDIR := build/coverage
+
+# List of items to delete on clean
+ITEMS_CLEAN :=
+# List of items to delete on distclean
+ITEMS_DISTCLEAN :=
+
+# List of targets to run for testing
+TARGET_TESTS :=
+
+# Source files
+SOURCES :=
+
+# Include configuration Makefile fragment
+include build/Makefile.config
+
+# Include Makefile fragments in subdirectories
+
+define do_include
+DIR := $$(dir $(1))
+include $(1)
+
+endef
+
+MAKE_INCLUDES := $(wildcard */Makefile)
+$(eval $(foreach INC, $(MAKE_INCLUDES), $(call do_include,$(INC))))
+
+# Calculate objects to build
+OBJECTS := $(subst /,_,$(subst .c,.o,$(SOURCES)))
+
+.PHONY: release debug test coverage profile \
+ clean distclean setup export install uninstall
+
+# Rules
+release: setup $(addprefix $(RELEASEDIR)/,$(OBJECTS))
+ @$(AR) $(ARFLAGS) $(COMPONENT).a $(addprefix $(RELEASEDIR)/,$(OBJECTS))
+
+debug: setup $(addprefix $(DEBUGDIR)/,$(OBJECTS))
+ @$(AR) $(ARFLAGS) $(COMPONENT)-debug.a \
+ $(addprefix $(DEBUGDIR)/,$(OBJECTS))
+
+test: debug $(TARGET_TESTS)
+
+coverage: clean
+ @$(LCOV) --directory . --zerocounters
+ @$(MAKE) test CFLAGS="$(CFLAGS) -fprofile-arcs -ftest-coverage" \
+ LDFLAGS="$(LDFLAGS) -lgcov"
+ @$(LCOV) --directory $(DEBUGDIR) --base-directory $(TOP) \
+ --capture --output-file $(COVERAGEDIR)/$(COMPONENT)_tmp.info
+ @$(LCOV) --extract $(COVERAGEDIR)/$(COMPONENT)_tmp.info "$(TOP)/src*" \
+ -o $(COVERAGEDIR)/$(COMPONENT).info
+ @$(RM) $(RMFLAGS) $(COVERAGEDIR)/$(COMPONENT)_tmp.info
+ @$(GENHTML) -o $(COVERAGEDIR) --num-spaces 2 \
+ $(COVERAGEDIR)/$(COMPONENT).info
+
+profile: clean
+ @$(MAKE) test CFLAGS="$(CFLAGS) -pg" LDFLAGS="-pg $(LDFLAGS)"
+
+clean:
+ -@$(RM) $(RMFLAGS) $(ITEMS_CLEAN)
+ -@$(RM) $(RMFLAGS) gmon.out
+ -@$(RM) $(RMFLAGS) -r $(COVERAGEDIR)
+ -@$(RM) $(RMFLAGS) -r $(RELEASEDIR)
+ -@$(RM) $(RMFLAGS) -r $(DEBUGDIR)
+ -@$(RM) $(RMFLAGS) $(COMPONENT).a
+ -@$(RM) $(RMFLAGS) $(COMPONENT)-debug.a
+ -@$(RM) $(RMFLAGS) $(COMPONENT).pc
+
+distclean: clean
+ -@$(RM) $(RMFLAGS) $(ITEMS_DISTCLEAN)
+ -@$(RM) $(RMFLAGS) -r $(TOP)/dist
+
+setup:
+ @$(MKDIR) $(MKDIRFLAGS) $(RELEASEDIR)/deps
+ @$(MKDIR) $(MKDIRFLAGS) $(DEBUGDIR)/deps
+ @$(MKDIR) $(MKDIRFLAGS) $(COVERAGEDIR)
+
+export:
+ @$(MKDIR) $(MKDIRFLAGS) -p $(TOP)/dist
+ @$(MAKE) install PREFIX="$(TOP)/dist"
+
+install: release
+ @$(MKDIR) $(MKDIRFLAGS) -p $(DESTDIR)$(PREFIX)/lib/pkgconfig
+ @$(MKDIR) $(MKDIRFLAGS) -p $(DESTDIR)$(PREFIX)/include/iconv
+ @$(SED) -e 's#PREFIX#$(PREFIX)#' $(COMPONENT).pc.in >$(COMPONENT).pc
+ $(INSTALL) -m 644 $(COMPONENT).a $(DESTDIR)$(PREFIX)/lib
+ $(INSTALL) -m 644 $(COMPONENT).pc $(DESTDIR)$(PREFIX)/lib/pkgconfig
+ $(INSTALL) -m 644 $(filter %.h, $(wildcard include/iconv/*)) $(DESTDIR)$(PREFIX)/include/iconv
+
+uninstall:
+ $(RM) $(RMFLAGS) $(DESTDIR)$(PREFIX)/lib/$(COMPONENT).a
+ $(RM) $(RMFLAGS) $(DESTDIR)$(PREFIX)/lib/pkgconfig/$(COMPONENT).pc
+ $(RM) $(RMFLAGS) -r $(DESTDIR)$(PREFIX)/include/iconv
+
+$(RELEASEDIR)/deps/created:
+ @$(MKDIR) $(MKDIRFLAGS) $(RELEASEDIR)/deps
+ @$(TOUCH) $(TOUCHFLAGS) $(RELEASEDIR)/deps/created
+
+$(DEBUGDIR)/deps/created:
+ @$(MKDIR) $(MKDIRFLAGS) $(DEBUGDIR)/deps
+ @$(TOUCH) $(TOUCHFLAGS) $(DEBUGDIR)/deps/created
+
+DEPFILES :=
+
+define do_dep
+DEPFILES += $(2)
+$$(RELEASEDIR)/deps/$(2): $$(RELEASEDIR)/deps/created $(1)
+ @$$(ECHO) $$(ECHOFLAGS) "DEP $(1)"
+ @$$(RM) $$(RMFLAGS) $$(RELEASEDIR)/deps/$(2)
+ @$$(CC) $$(RELEASECFLAGS) -MM -MT \
+ '$$(RELEASEDIR)/deps/$(2) $$(RELEASEDIR)/$(3)' \
+ -MF $$(RELEASEDIR)/deps/$(2) $(1)
+
+$$(DEBUGDIR)/deps/$(2): $$(DEBUGDIR)/deps/created $(1)
+ @$$(ECHO) $$(ECHOFLAGS) "DEP $(1)"
+ @$$(RM) $$(RMFLAGS) $$(DEBUGDIR)/deps/$(2)
+ @$$(CC) $$(DEBUGCFLAGS) -MM -MT \
+ '$$(DEBUGDIR)/deps/$(2) $$(DEBUGDIR)/$(3)' \
+ -MF $$(DEBUGDIR)/deps/$(2) $(1)
+
+endef
+
+# Finally, build rules for compilation
+define do_compile
+$$(RELEASEDIR)/$(2): $(1)
+ @$$(ECHO) $$(ECHOFLAGS) "==> $(1)"
+ @$$(CC) -c $$(RELEASECFLAGS) -o $$@ $(1)
+
+$$(DEBUGDIR)/$(2): $(1)
+ @$$(ECHO) $$(ECHOFLAGS) "==> $(1)"
+ @$$(CC) -c $$(DEBUGCFLAGS) -o $$@ $(1)
+
+endef
+
+$(eval $(foreach SOURCE,$(filter %.c,$(SOURCES)), \
+ $(call do_dep,$(SOURCE),$(subst /,_,$(SOURCE:.c=.d)),$(subst /,_,$(SOURCE:.c=.o)))))
+
+ifneq ($(findstring clean,$(MAKECMDGOALS)),clean)
+-include $(sort $(addprefix $(RELEASEDIR)/deps/,$(DEPFILES)))
+-include $(sort $(addprefix $(DEBUGDIR)/deps/,$(DEPFILES)))
+endif
+
+$(eval $(foreach SOURCE,$(filter %.c,$(SOURCES)), \
+ $(call do_compile,$(SOURCE),$(subst /,_,$(SOURCE:.c=.o)))))
+
diff --git a/build/Makefile.config b/build/Makefile.config
new file mode 100644
index 0000000..1d27350
--- /dev/null
+++ b/build/Makefile.config
@@ -0,0 +1,4 @@
+# Configuration Makefile fragment
+
+# Cater for local configuration changes
+-include build/Makefile.config.override
diff --git a/doc/API b/doc/API
new file mode 100644
index 0000000..13fa22f
--- /dev/null
+++ b/doc/API
@@ -0,0 +1,132 @@
+Iconv Module API
+================
+
+If using C, then you really should be using the libiconv stubs provided
+(or UnixLib, if appropriate). See the iconv.h header file for further
+documentation of these calls.
+
+Iconv_Open (&57540)
+-------------------
+
+ Create a conversion descriptor
+
+ On Entry: r0 -> string containing name of destination encoding (eg "UTF-8")
+ r1 -> string containing name of source encoding (eg "CP1252")
+
+ On Exit: r0 = conversion descriptor
+ All others preserved
+
+ Either encoding name may have a number of parameters appended to them.
+ Parameters are separated by a pair of forward-slashes ("//").
+ Currently defined parameters are:
+
+ Parameter: Destination: Source:
+
+ TRANSLIT Transliterate unrepresentable None
+ output.
+
+ The conversion descriptor is an opaque value. The user should not,
+ therefore, assume anything about its meaning, nor modify it in any way.
+ Doing so is guaranteed to result in undefined behaviour.
+
+
+Iconv_Iconv (&57541)
+--------------------
+
+ This SWI is deprecated and Iconv_Convert should be used instead.
+
+
+Iconv_Close (&57542)
+--------------------
+
+ Destroy a conversion descriptor
+
+ On Entry: r0 = conversion descriptor to destroy
+
+ On Exit: r0 = 0
+ All others preserved
+
+
+Iconv_Convert (&57543)
+---------------------
+
+ Convert a byte sequence to another encoding
+
+ On Entry: r0 = conversion descriptor returned by Iconv_Open
+ r1 -> input buffer (or NULL to reset encoding context)
+ r2 = length of buffer pointed to by r1
+ r3 -> output buffer
+ r4 = length of buffer pointed to by r3
+
+ On Exit: r0 = number of non-reversible conversions performed (always 0)
+ r1 -> updated input buffer pointer (after last input read)
+ r2 = number of bytes remaining in input buffer
+ r3 -> updated output buffer pointer (i.e. end of output)
+ r4 = number of free bytes in the output buffer
+ All others preserved
+
+ Note that all strings should be NUL-terminated so, if calling from BASIC,
+ some terminating character munging may be needed.
+
+
+Errors:
+
+Should an error occur, the SWI will return with V set and r0 -> error buffer.
+Note that only the error number will be filled in and may be one of:
+
+ ICONV_NOMEM (&81b900)
+ ICONV_INVAL (&81b901)
+ ICONV_2BIG (&81b902)
+ ICONV_ILSEQ (&81b903)
+
+These map directly to the corresponding C errno values.
+
+
+Iconv_CreateMenu (&57544)
+-------------------------
+
+ Create a menu data structure containing all available encodings.
+
+ On Entry: r0 = flags. All bits reserved, must be 0
+ r1 -> buffer, or 0 to read required length
+ r2 = length of buffer in r1
+ r3 -> currently selected encoding name, or 0 if none selected
+ r4 -> buffer for indirected data, or 0 to read length
+ r5 = length of buffer in r4
+
+ On Exit: r2 = required size of buffer in r1 if r1 = 0 on entry,
+ or length of data placed in buffer
+ r5 = required size of buffer in r4 if r4 = 0 on entry,
+ or length of data placed in buffer
+
+ Menu titles are direct form text buffers. Menu entries are indirect text.
+ Entry text is stored in the buffer pointed to by R4 on entry to this call.
+
+
+Iconv_DecodeMenu (&57545)
+-------------------------
+
+ Decode a selection in a menu generated by Iconv_CreateMenu.
+ Places the corresponding encoding name in the result buffer.
+
+ On Entry: r0 = flags. All bits reserved, must be 0
+ r1 -> menu definition
+ r2 -> menu selections, as per Wimp_Poll
+ r3 -> buffer for result or 0 to read required length
+ r4 = buffer length
+
+ On Exit: r4 = required size of buffer if r3 = 0 on entry,
+ or length of data placed in buffer (0 if no selected
+ encoding)
+
+ The menu selections block pointed to by r2 on entry should be based at
+ the root of the encodings menu structure (i.e. index 0 in the block
+ should correspond to the selection in the main encoding menu).
+
+ This call will update the selection status of the menu(s) appropriately.
+
+
+Example Code:
+=============
+
+Example code may be found in the IconvEg BASIC file.
diff --git a/doc/ChangeLog b/doc/ChangeLog
new file mode 100644
index 0000000..96f5924
--- /dev/null
+++ b/doc/ChangeLog
@@ -0,0 +1,71 @@
+Iconv Changelog
+===============
+
+0.01 10-Sep-2004
+----------------
+
+ - Initial version - unreleased.
+
+0.02 27-Sep-2004
+----------------
+
+ - Use allocated SWI & error chunks.
+ - Fix issues in 8bit encoding handling.
+ - First public release.
+
+0.03 22-Jan-2005
+----------------
+
+ - Add Iconv_Convert SWI with improved interface.
+ - Deprecate Iconv_Iconv SWI.
+ - Add encoding name alias handling.
+ - Bundle !Unicode resource.
+
+0.04 08-Apr-2005
+----------------
+
+ - Improve parameter checking.
+ - Fix potential memory leaks.
+ - Add encoding menu creation and selection handling.
+
+0.05 27-Jun-2005
+----------------
+
+ - Improve encoding alias support, using external data file.
+ - Add StubsG build for A9home users.
+
+0.06 05-Nov-2005
+----------------
+
+ - Modified menu creation API to store indirected text in a
+ user-provided buffer. This change is backwards incompatible.
+
+0.07 11-Feb-2006
+----------------
+
+ - Corrected output values for E2BIG errors.
+ - Fixed input pointer update after successful conversion.
+
+0.08 11-Mar-2007
+----------------
+
+ - Tightened up parameter checking in various places.
+ - Improve aliases hash function.
+ - Make 8bit write function's return values match encoding_write
+ with encoding_WRITE_STRICT set.
+ - Fix bug in 8bit writing which resulted in the remaining buffer
+ size being reduced even if nothing was written.
+ - Improve support for endian-specific Unicode variants.
+ - Work around issue in UnicodeLib where remaining buffer size is
+ reduced if an attempt is made to write an unrepresentable character.
+ - Add rudimentary //TRANSLIT support - simply replaces with '?' for now.
+ - Make UnicodeLib handle raw ISO-8859-{1,2,9,10,15} and not attempt
+ ISO-6937-2-25 shift sequences.
+ - Remove StubsG build as A9home now has a C99 capable C library.
+ - Overhaul documentation.
+
+0.09 XX-XX-2008
+---------------
+
+ - Restructured source tree into cross-platform and RO-specific parts
+ -
diff --git a/doc/Uni->iconv b/doc/Uni->iconv
new file mode 100644
index 0000000..f10b6c7
--- /dev/null
+++ b/doc/Uni->iconv
@@ -0,0 +1,205 @@
+Introduction:
+=============
+
+This file documents an approximate correlation between the data files
+provided in the !Unicode distribution and the encoding headers in GNU
+libiconv 1.9.1.
+
+Those with '?' in the iconv column either are not represented in iconv
+or I've missed the relevant header file ;)
+
+A number of encodings are present in the iconv distribution but not
+in !Unicode. These are documented at the end of this file.
+
+Changelog:
+==========
+
+v 0.01 (09-Sep-2004)
+~~~~~~~~~~~~~~~~~~~~
+Initial Incarnation
+
+v 0.02 (11-Sep-2004)
+~~~~~~~~~~~~~~~~~~~~
+Documented additional encodings supported by the Iconv module.
+Corrected list of !Unicode deficiencies.
+
+
+!Unicode->iconv:
+================
+
+Unicode: iconv: notes:
+
+Acorn.Latin1 riscos1.h
+
+Apple.CentEuro mac_centraleurope.h
+Apple.Cyrillic mac_cyrillic.h
+Apple.Roman mac_roman.h
+Apple.Ukrainian mac_ukraine.h
+
+BigFive big5.h
+
+ISO2022.C0.40[ISO646] ?
+
+ISO2022.C1.43[IS6429] ?
+
+ISO2022.G94.40[646old] iso646_cn.h
+ISO2022.G94.41[646-GB] ?
+ISO2022.G94.42[646IRV] ?
+ISO2022.G94.43[FinSwe] ?
+ISO2022.G94.47[646-SE] ?
+ISO2022.G94.48[646-SE] ?
+ISO2022.G94.49[JS201K] jisx0201.h top of JIS range
+ISO2022.G94.4A[JS201R] jisx0201.h iso646_jp.h bottom of JIS range
+ISO2022.G94.4B[646-DE] ?
+ISO2022.G94.4C[646-PT] ?
+ISO2022.G94.54[GB1988] ?
+ISO2022.G94.56[Teltxt] ?
+ISO2022.G94.59[646-IT] ?
+ISO2022.G94.5A[646-ES] ?
+ISO2022.G94.60[646-NO] ?
+ISO2022.G94.66[646-FR] ?
+ISO2022.G94.69[646-HU] ?
+ISO2022.G94.6B[Arabic] ?
+ISO2022.G94.6C[IS6397] ?
+ISO2022.G94.7A[SerbCr] ?
+
+ISO2022.G94x94.40[JS6226] ?
+ISO2022.G94x94.41[GB2312] gb2312.h
+ISO2022.G94x94.42[JIS208] jis0x208.h
+ISO2022.G94x94.43[KS1001] ksc5601.h
+ISO2022.G94x94.44[JIS212] jis0x212.h
+ISO2022.G94x94.47[CNS1] cns11643_1.h the tables differ
+ISO2022.G94x94.48[CNS2] cns11643_2.h
+ISO2022.G94x94.49[CNS3] cns11643_3.h
+ISO2022.G94x94.4A[CNS4] cns11643_4.h
+ISO2022.G94x94.4B[CNS5] cns11643_5.h
+ISO2022.G94x94.4C[CNS6] cns11643_6.h
+ISO2022.G94x94.4D[CNS7] cns11643_7.h
+
+ISO2022.G96.41[Lat1] iso8859_1.h
+ISO2022.G96.42[Lat2] iso8859_2.h
+ISO2022.G96.43[Lat3] iso8859_3.h
+ISO2022.G96.44[Lat4] iso8859_4.h
+ISO2022.G96.46[Greek] ?
+ISO2022.G96.47[Arabic] iso8859_6.h ISO-8859-6 ignored
+ISO2022.G96.48[Hebrew] ?
+ISO2022.G96.4C[Cyrill] ?
+ISO2022.G96.4D[Lat5] iso8859_5.h
+ISO2022.G96.50[LatSup] ?
+ISO2022.G96.52[IS6397] ?
+ISO2022.G96.54[Thai] tis620.h
+ISO2022.G96.56[Lat6] iso8859_6.h
+ISO2022.G96.58[L6Sami] ?
+ISO2022.G96.59[Lat7] iso8859_7.h
+ISO2022.G96.5C[Welsh] ?
+ISO2022.G96.5D[Sami] ?
+ISO2022.G96.5E[Hebrew] ?
+ISO2022.G96.5F[Lat8] iso8859_8.h
+ISO2022.G96.62[Lat9] iso8859_9.h
+
+KOI8-R koi8_r.h
+
+Microsoft.CP1250 cp1250.h
+Microsoft.CP1251 cp1251.h
+Microsoft.CP1252 cp1252.h
+Microsoft.CP1254 cp1254.h
+Microsoft.CP866 cp866.h
+Microsoft.CP932 cp932.h cp932ext.h
+
+iconv->!Unicode:
+================
+
+Iconv has the following encodings, which are not present in !Unicode.
+Providing a suitable data file for !Unicode is trivial. Whether UnicodeLib
+will then act upon the addition of these is unknown.
+This list is ordered as per libiconv's NOTES file.
+
+European & Semitic languages:
+
+ ISO-8859-16 (iso8859_16.h)
+ KOI8-{U,RU,T} (koi8_xx.h)
+ CP125{3,5,6,7} (cp125n.h)
+ CP850 (cp850.h)
+ CP862 (cp862.h)
+ Mac{Croatian,Romania,Greek,Turkish,Hebrew,Arabic} (mac_foo.h)
+
+Japanese:
+
+ None afaikt.
+
+Simplified Chinese:
+
+ GB18030 (gb18030.h, gb18030ext.h)
+ HZ-GB-2312 (hz.h)
+
+Traditional Chinese:
+
+ CP950 (cp950.h)
+ BIG5-HKSCS (big5hkscs.h)
+
+Korean:
+
+ CP949 (cp949.h)
+
+Armenian:
+
+ ARMSCII-8 (armscii_8.h)
+
+Georgian:
+
+ Georgian-Academy, Georgian-PS (georgian_academy.h, georgian_ps.h)
+
+Thai:
+
+ CP874 (cp874.h)
+ MacThai (mac_thai.h)
+
+Laotian:
+
+ MuleLao-1, CP1133 (mulelao.h, cp1133.h)
+
+Vietnamese:
+
+ VISCII, TCVN (viscii.h, tcvn.h)
+ CP1258 (cp1258.h)
+
+Unicode:
+
+ BE/LE variants of normal encodings. I assume UnicodeLib handles
+ these, but can't be sure.
+ C99 / JAVA - well, yes.
+
+
+Iconv Module:
+=============
+
+The iconv module is effectively a thin veneer around UnicodeLib. However,
+8bit encodings are implemented within the module rather than using the
+support in UnicodeLib. The rationale for this is simply that, although
+UnicodeLib will understand (and act upon - reportedly...) additions to
+the ISO2022 Unicode resource, other encodings are ignored. As the vast
+majority of outstanding encodings fall into this category, and the code
+is fairly simple, it made sense to implement it within the module.
+
+With use of the iconv module, the list of outstanding encodings is
+reduced to:
+
+ ISO-8859-16 (easily implemented, if required)
+ CP1255 (requires state-based transcoding)
+
+ GB18030 (not 8bit - reportedly a requirement of PRC)
+ HZ-GB-2312 (not 8bit - supported by IE4)
+
+ CP950 (not 8bit - a (MS) variant of Big5)
+ BIG5-HKSCS (not 8bit - again, a Big5 variant)
+
+ CP949 (not 8bit)
+
+ ARMSCII-8 (easily implemented, if required)
+
+ VISCII (easily implemented, if required)
+ CP1258, TCVN (requires state-based transcoding)
+
+Additionally, the rest of the CodePage encodings implemented in iconv
+but not listed above (due to omissions from the iconv documentation)
+are implemented by the iconv module. \ No newline at end of file
diff --git a/include/iconv/iconv.h b/include/iconv/iconv.h
new file mode 100644
index 0000000..9bb818b
--- /dev/null
+++ b/include/iconv/iconv.h
@@ -0,0 +1,87 @@
+#ifndef _LIB_ICONV_H
+#define _LIB_ICONV_H
+
+#ifndef EILSEQ
+#define EILSEQ ENOENT
+#endif
+
+#undef iconv_t
+typedef void *iconv_t;
+
+/*
+ * Initialise the iconv library
+ */
+int iconv_initialise(const char *aliases_file);
+
+/*
+ * Finalise the iconv library
+ */
+void iconv_finalise(void);
+
+/*
+ * Allocate a conversion descriptor suitable for converting byte sequences
+ * from encoding from code to encoding tocode.
+ * The resulting conversion descriptor may be used any number of times and
+ * remains valid until deallocated with iconv_close.
+ * A conversion descriptor contains a conversion state. After creation,
+ * the state is in the initial state. Using iconv modifies the descriptor's
+ * conversion state. The state may be reset by calling iconv with NULL
+ * as the inbuf argument.
+ *
+ * Returns the conversion descriptor on success. On error, (iconv_t)(-1) is
+ * returned and errno is set. If errno is set to EINVAL, the implementation
+ * does not provide support for conversion between fromcode and tocode.
+ */
+extern iconv_t iconv_open(const char *tocode, const char *fromcode);
+
+/* Perform character set conversion
+ * cd must be a conversion descriptor as allocated by iconv_open.
+ *
+ * If inbuf is not NULL and *inbuf is not NULL, the multibyte sequence
+ * starting at *inbuf is converted into a multibyte sequence starting at
+ * *outbuf. At most *inbytesleft bytes, starting at *inbuf, will be read.
+ * At most *outbytesleft bytes, starting at *outbuf, will be written.
+ *
+ * One multibyte character is converted at a time. For each conversion,
+ * *inbuf is incremented and *inbytesleft is decremented by the number of
+ * converted input bytes. Similarly, *outbuf is incremented and
+ * *outbytesleft is decremented by the number of converted output bytes.
+ *
+ * Conversion can stop for four reasons:
+ *
+ * 1. An invalid multibyte input sequence is encountered. In this case,
+ * errno is set to EILSEQ and (size_t)(-1) is returned. *inbuf points
+ * to the start of the illegal sequence.
+ *
+ * 2. The input sequence has been entirely converted. In this case, the
+ * number of non-reversible conversions performed is returned.
+ *
+ * 3. An incomplete multibyte sequence is encountered in the input and
+ * the input terminates after it. In this case, errno is set to EINVAL
+ * and (size_t)(-1) is returned.
+ *
+ * 4. The output buffer has no room for the next converted character.
+ * In this case, errno is set to E2BIG and (size_t)(-1) is returned.
+ *
+ * If inbuf is NULL or *inbuf is NULL but outbuf is not NULL and *outbuf is
+ * not NULL, the function attempts to set cd's conversion state to the
+ * initial state and store a corresponding shift sequence in *outbuf.
+ * At most *outbytesleft will be written. If the output buffer is too small
+ * for this reset sequence, errno is set to E2BIG and (size_t)(-1) is
+ * returned. Otherwise *outbuf is incremented and *outbytesleft is
+ * decremented by the number of bytes written.
+ *
+ * If inbuf or *inbuf and outbuf or *outbuf are NULL, cd is reset to the
+ * initial conversion state.
+ */
+extern size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft,
+ char **outbuf, size_t *outbytesleft);
+
+/* Deallocate a conversion descriptor cd.
+ * Returns 0 on success, -1 on error and sets errno
+ */
+extern int iconv_close(iconv_t cd);
+
+
+#endif
+
diff --git a/include/unicode b/include/unicode
new file mode 120000
index 0000000..9e7f587
--- /dev/null
+++ b/include/unicode
@@ -0,0 +1 @@
+../unicode/include/ \ No newline at end of file
diff --git a/libiconv.pc.in b/libiconv.pc.in
new file mode 100644
index 0000000..1b3ccfc
--- /dev/null
+++ b/libiconv.pc.in
@@ -0,0 +1,10 @@
+prefix=PREFIX
+exec_prefix=${prefix}
+libdir=${exec_prefix}/lib
+includedir=${prefix}/include
+
+Name: libiconv
+Description: Character set conversion library
+Version: 0.0.1
+Libs: -L${libdir} -liconv -lunicode
+Cflags: -I${includedir}
diff --git a/module/Mk b/module/Mk
new file mode 100644
index 0000000..de88cbc
--- /dev/null
+++ b/module/Mk
@@ -0,0 +1,80 @@
+# Old RISC OS makefile. This is of no real use now, but is retained
+# so that the useful parts may be integrated into the cross-platform
+# buildsystem at some later date
+
+TARGET = ^.!System.310.Modules.Iconv
+VERSION = 008
+PVERSION = 0.0.8
+CC = cc
+CFLAGS = -Wp -fnah -zM -fussy -ITCPIPLibs:
+CFLAGSDEBUG = -DDEBUG -ITCPIPLibs: -Wnp -fn -fussy
+LD = link
+LDFLAGS = C:o.stubs TCPIPLibs:o.unixlibzm C:unicode.o.ucodelibm-no6937
+CMHG = cmhg -p
+AR = libfile
+
+OBJECTS = eightbit.o header.o iconv.o alias.o aliases.o menu.o utils.o
+
+SOURCES = $(OBJECTS:.o=.c)
+OBJS = $(OBJECTS:%.o=Release.%.o)
+OBJSDEBUG = $(OBJECTS:%.o=Debug.%.o)
+
+all: setup $(TARGET) stubs
+
+debug: setup libiconv/a
+
+$(TARGET): $(OBJS)
+ $(LD) -RMF -o $@ $(LDFLAGS) $^
+ @modsqz -f $@
+
+stubs: Release.stubs.o
+ $(AR) -c -o ^.libiconv/a Release.o.stubs
+ @settype ^.libiconv/a fff
+
+libiconv/a: $(OBJSDEBUG)
+ $(AR) -c -o ^.libiconv/a Debug.o.eightbit Debug.o.iconv Debug.o.alias Debug.o.aliases Debug.o.menu Debug.o.utils ^.ucodelib.o.*
+ @settype ^.libiconv/a fff
+
+Release.header.o:
+ $(CMHG) cmhg.header -o $@ -d header.h
+
+Release.stubs.o: stubs.c
+ $(CC) -c -o $@ $(CFLAGS) $<
+
+Release.%.o: %.c
+ $(CC) -c -o $@ $(CFLAGS) $<
+
+Debug.header.o:
+ $(CMHG) cmhg.header -o $@ -d header.h
+
+Debug.%.o: %.c
+ $(CC) -c -o $@ $(CFLAGSDEBUG) $<
+
+setup:
+ -@cdir Release.o
+ -@cdir Debug.o
+
+clean:
+ -@wipe Release.o ~C~VFR
+ -@wipe Debug.o ~C~VFR
+
+distclean: clean
+ -@wipe ^.!System.310.Modules.Iconv ~C~VFR
+ -@wipe ^.libiconv/a ~C~VFR
+
+zip: all clean
+ -@dir ^
+ -@wipe iconv$(VERSION)/zip ~C~VFR
+ -@zip -9r iconv$(VERSION)/zip !Boot !System libiconv/a ReadMe src doc
+ -@dir src
+
+pkg: all clean
+ -@dir ^
+ -@wipe iconv$(VERSION)pkg ~C~VFR
+ -@copy !Boot.Resources.* pkg.Resources.* ~C~DF~L~N~P~QR~S~T~V
+ -@copy !System.* pkg.System.* ~C~DF~L~N~P~QR~S~T~V
+ -@copy Control pkg.RiscPkg.Control ~C~DF~L~N~P~QR~S~T~V
+ -sed -i -e s/VERSION/$(PVERSION)/ pkg/RiscPkg/Control
+ -@dir pkg
+ -@zip -9r ^.iconv$(VERSION)pkg *
+ -@dir ^.src
diff --git a/module/errors.h b/module/errors.h
new file mode 100644
index 0000000..a5995f5
--- /dev/null
+++ b/module/errors.h
@@ -0,0 +1,11 @@
+#ifndef _ICONV_ERRORS_H_
+#define _ICONV_ERRORS_H_
+
+#define ERROR_BASE 0x81b900
+
+#define ICONV_NOMEM (ERROR_BASE+0)
+#define ICONV_INVAL (ERROR_BASE+1)
+#define ICONV_2BIG (ERROR_BASE+2)
+#define ICONV_ILSEQ (ERROR_BASE+3)
+
+#endif
diff --git a/module/header.cmhg b/module/header.cmhg
new file mode 100644
index 0000000..5f7fe5e
--- /dev/null
+++ b/module/header.cmhg
@@ -0,0 +1,18 @@
+help-string: Iconv 0.08
+
+title-string: Iconv
+
+initialisation-code: mod_init
+
+finalisation-code: mod_fini
+
+swi-chunk-base-number: 0x57540
+
+swi-handler-code: swi_handler
+
+swi-decoding-table: Iconv, Open, Iconv, Close, Convert, CreateMenu, DecodeMenu
+
+command-keyword-table: command_handler
+ ReadAliases(min-args: 0, max-args: 0,
+ invalid-syntax: "Syntax: *ReadAliases",
+ help-text: "*ReadAliases rereads the encoding aliases file.\n")
diff --git a/module/menu.c b/module/menu.c
new file mode 100644
index 0000000..f42f505
--- /dev/null
+++ b/module/menu.c
@@ -0,0 +1,618 @@
+/* Encoding menu */
+
+#include <ctype.h>
+#ifdef MTEST
+#include <stdio.h>
+#endif
+#include <stdlib.h>
+#include <string.h>
+
+#include <unicode/charsets.h>
+#include <unicode/encoding.h>
+
+#include "module.h"
+
+#define menu_HEADER_SIZE (28)
+#define menu_ENTRY_SIZE (24)
+
+typedef struct _menu {
+ char head[menu_HEADER_SIZE]; /* We don't care about this */
+ struct {
+ int menu_flags;
+ int *sub_menu;
+ int icon_flags;
+ struct { /* Only handle indirected text */
+ char *text;
+ char *validation;
+ int size;
+ } indirected_text;
+ } entries[1];
+} wimp_menu;
+
+struct menu_desc {
+ char *title;
+ int n_entries;
+ const char *entries[1];
+};
+
+#define menudesc(N) \
+ struct { \
+ char *title; \
+ int n_entries; \
+ char *entries[(N)]; \
+ }
+
+/* Menu descriptions.
+ * A number of magic characters are permitted at the start of entry names:
+ *
+ * Character: Meaning:
+ * ^ Insert separator after this entry
+ * > Can open submenu, even if shaded
+ * {..} Has submenu, named ".."
+ * $ This entry is shaded
+ *
+ * Magic characters are examined in the order above.
+ * The submenu name is the title of the submenu.
+ * The first alphanumeric character is taken as the start of the entry name.
+ */
+static const char *val = ""; /* Validation string */
+
+#define N_LATIN (30)
+static const menudesc(N_LATIN) latin_menu = {
+ "Latin", N_LATIN,
+ {
+ "Western (ISO 8859-1: Latin-1)",
+ "Eastern (ISO 8859-2: Latin-2)",
+ "Southern (ISO 8859-3: Latin-3)",
+ "Nordic (ISO 8859-4: Latin-4)",
+ "Turkish (ISO 8859-9: Latin-5)",
+ "Nordic (ISO 8859-10: Latin-6)",
+ "Baltic rim (ISO 8859-13: Latin-7)",
+ "Celtic (ISO 8859-14: Latin-8)",
+ "^Western (ISO 8859-15: Latin-9)",
+ "Welsh (ISO-IR 182)",
+ "^Sami (ISO-IR 197)",
+ "Microsoft Latin-1 (CP1252)",
+ "Microsoft Latin-2 (CP1250)",
+ "Microsoft Baltic (CP1257)",
+ "^Microsoft Turkish (CP1254)",
+ "Apple Macintosh Roman",
+ "Apple Macintosh Croatian",
+ "Apple Macintosh Icelandic",
+ "Apple Macintosh Romanian",
+ "Apple Macintosh Turkish",
+ "^Apple Macintosh Central European",
+ "^Acorn Latin-1",
+ "DOS Latin-1 (CP850)",
+ "DOS Latin-2 (CP852)",
+ "DOS Baltic rim (CP775)",
+ "DOS Turkish (CP857)",
+ "DOS Portuguese (CP860)",
+ "DOS Icelandic (CP861)",
+ "DOS CanadaF (CP863)",
+ "DOS Nordic (CP865)",
+ }
+};
+
+#define N_ARABIC (4)
+static const menudesc(N_ARABIC) arabic_menu = {
+ "Arabic", N_ARABIC,
+ {
+ "$ISO 8859-6",
+ "Microsoft Arabic (CP1256)",
+ "Apple Macintosh Arabic",
+ "DOS Arabic (CP864)",
+ }
+};
+
+#define N_CYRILLIC (10)
+static const menudesc(N_CYRILLIC) cyrillic_menu = {
+ "Cyrillic", N_CYRILLIC,
+ {
+ "ISO 8859-5",
+ "KOI8-R",
+ "KOI8-RU",
+ "KOI8-T",
+ "^KOI8-U",
+ "^Microsoft Cyrillic (CP1251)",
+ "Apple Macintosh Cyrillic",
+ "^Apple Macintosh Ukrainian",
+ "DOS Cyrillic (CP855)",
+ "DOS Cyrillic Russian (CP866)",
+ }
+};
+
+#define N_GREEK (5)
+static const menudesc(N_GREEK) greek_menu = {
+ "Greek", N_GREEK,
+ {
+ "ISO 8859-7",
+ "Microsoft Greek (CP1253)",
+ "Apple Macintosh Greek",
+ "DOS Greek (CP737)",
+ "DOS Greek2 (CP869)",
+ }
+};
+
+#define N_HEBREW (4)
+static const menudesc(N_HEBREW) hebrew_menu = {
+ "Hebrew", N_HEBREW,
+ {
+ "ISO 8859-8",
+ "$Microsoft Hebrew (CP1255)",
+ "Apple Macintosh Hebrew",
+ "DOS Hebrew (CP862)",
+ }
+};
+
+#define N_CHINESE (3)
+static const menudesc(N_CHINESE) chinese_menu = {
+ "Chinese", N_CHINESE,
+ {
+ "ISO 2022-CN",
+ "^GB 2312 (EUC-CN)",
+ "Big Five",
+ }
+};
+
+#define N_JAPANESE (3)
+static const menudesc(N_JAPANESE) japanese_menu = {
+ "Japanese", N_JAPANESE,
+ {
+ "ISO 2022-JP",
+ "EUC-JP",
+ "Shift-JIS",
+ }
+};
+
+#define N_KOREAN (3)
+static const menudesc(N_KOREAN) korean_menu = {
+ "Korean", N_KOREAN,
+ {
+ "ISO 2022-KR",
+ "EUC-KR",
+ "Johab",
+ }
+};
+
+#define N_THAI (3)
+static const menudesc(N_THAI) thai_menu = {
+ "Thai", N_THAI,
+ {
+ "ISO 8859-11",
+ "Apple Macintosh Thai",
+ "DOS Thai (CP874)",
+ }
+};
+
+#define N_VIETNAMESE (1)
+static const menudesc(N_VIETNAMESE) vietnamese_menu = {
+ "Vietnamese", N_VIETNAMESE,
+ {
+ "$Microsoft Vietnamese (CP1258)",
+ }
+};
+
+#define N_UNIVERSAL (4)
+static const menudesc(N_UNIVERSAL) universal_menu = {
+ "Universal", N_UNIVERSAL,
+ {
+ "UTF-8 (ASCII-compatible)",
+ "UCS-2 / UTF-16 (16-bit)",
+ "^UCS-4 (31-bit)",
+ "ISO-2022",
+ }
+};
+
+#define N_ENC (11)
+static const menudesc(N_ENC) enc_menu = {
+ "Encodings", N_ENC,
+ {
+ "^{Latin}Latin",
+ "{Arabic}Arabic",
+ "{Cyrillic}Cyrillic",
+ "{Greek}Greek",
+ "^{Hebrew}Hebrew",
+ "{Chinese}Chinese",
+ "{Japanese}Japanese",
+ "{Korean}Korean",
+ "{Thai}Thai",
+ "^>{Vietnamese}$Vietnamese",
+ "{Universal}Universal",
+ }
+};
+
+/* This struct is a lookup table between menu entries and charset numbers
+ * It is ordered as per the menus. */
+static const struct csmap {
+ short latin[N_LATIN];
+ short arabic[N_ARABIC];
+ short greek[N_GREEK];
+ short hebrew[N_HEBREW];
+ short cyrillic[N_CYRILLIC];
+ short chinese[N_CHINESE];
+ short japanese[N_JAPANESE];
+ short korean[N_KOREAN];
+ short thai[N_THAI];
+ short vietnamese[N_VIETNAMESE];
+ short universal[N_UNIVERSAL];
+} csmap = {
+ { csISOLatin1, csISOLatin2, csISOLatin3, csISOLatin4, csISOLatin5,
+ csISOLatin6, csISOLatin7, csISOLatin8, csISOLatin9, csWelsh,
+ csSami, csWindows1252, csWindows1250, csWindows1257, csWindows1254,
+ csMacintosh, 3019, 3022, 3023, 3025, csMacCentEuro, csAcornLatin1,
+ csPC850Multilingual, csPCp852, csPC775Baltic, csIBM857, csIBM860,
+ csIBM861, csIBM863, csIBM865 },
+ { csISOLatinArabic, csWindows1256, 3018, csIBM864 },
+ { csISOLatinGreek, csWindows1253, 3020, 3000, csIBM869 },
+ { csISOLatinHebrew, csWindows1255, 3021, csPC862LatinHebrew },
+ { csISOLatinCyrillic, csKOI8R, 3016, 3017, 2088, csWindows1251,
+ csMacCyrillic, csMacUkrainian, csIBM855, csIBM866 },
+ { csISO2022CN, csGB2312, csBig5 },
+ { csISO2022JP, csEUCPkdFmtJapanese, csShiftJIS },
+ { csISO2022KR, csEUCKR, csJohab },
+ { csISOLatinThai, 3024, 3004 },
+ { csWindows1258 },
+ { csUTF8, csUnicode11, csUCS4, csVenturaMath }
+};
+
+/* Sub menu lookup table - Must be sorted alphabetically */
+static const struct sub_menu {
+ char name[12];
+ const struct menu_desc *desc;
+ const short *lut;
+} sub_menus[] = {
+ { "Arabic", (const struct menu_desc *)&arabic_menu,
+ csmap.arabic },
+ { "Chinese", (const struct menu_desc *)&chinese_menu,
+ csmap.chinese },
+ { "Cyrillic", (const struct menu_desc *)&cyrillic_menu,
+ csmap.cyrillic },
+ { "Greek", (const struct menu_desc *)&greek_menu, csmap.greek },
+ { "Hebrew", (const struct menu_desc *)&hebrew_menu,
+ csmap.hebrew },
+ { "Japanese", (const struct menu_desc *)&japanese_menu,
+ csmap.japanese },
+ { "Korean", (const struct menu_desc *)&korean_menu,
+ csmap.korean },
+ { "Latin", (const struct menu_desc *)&latin_menu, csmap.latin },
+ { "Thai", (const struct menu_desc *)&thai_menu, csmap.thai },
+ { "Universal", (const struct menu_desc *)&universal_menu,
+ csmap.universal },
+ { "Vietnamese", (const struct menu_desc *)&vietnamese_menu,
+ csmap.vietnamese },
+};
+#define SUB_MENU_COUNT (sizeof(sub_menus) / sizeof(sub_menus[0]))
+
+
+
+#define MAX_SUBMENUS (16) /* Maximum number of submenus each menu can have */
+
+#define MENU_COUNT_SIZE (0x00)
+#define MENU_CREATE (0x01)
+#define MENU_CLEAR_SELECTIONS (0x02)
+/**
+ * Perform an operation on a menu
+ *
+ * \param d The description
+ * \param buf Location to write menu to
+ * \param parent Parent menu
+ * \param which Which parent entry this menu is linked from
+ * \param flags Flags word
+ * Bit: Meaning:
+ * 0 Create menu
+ * 1 Clear existing selections (charset != 0)
+ * \param charset Charset identifier of selected charset
+ * \param lut Selection lookup table
+ * \param data Location to write indirected data to
+ * \return Pointer to location after menu data
+ */
+static char *menu_op(const struct menu_desc *d, char *buf,
+ wimp_menu *parent, size_t which, size_t flags,
+ size_t charset, const short *lut, char **data)
+{
+ size_t e, top = 0;
+ struct { size_t e; const char *name; } submenus[MAX_SUBMENUS];
+ char *bp = buf;
+ char *dp;
+
+ if (data)
+ dp = *data;
+
+ if (!buf && (flags & 0x02))
+ return buf;
+
+ if ((flags & MENU_CREATE)) {
+ /* copy menu title */
+ strncpy(bp, d->title, 12);
+ bp += 12;
+
+ /* colours */
+ *bp++ = 7; *bp++ = 2; *bp++ = 7; *bp++ = 0;
+
+ /* width, height, gap */
+ *((int *)bp) = 200; bp += 4;
+ *((int *)bp) = 44; bp += 4;
+ *((int *)bp) = 0; bp += 4;
+
+ memcpy(dp, val, strlen(val) + 1);
+ dp += strlen(val) + 1;
+ } else {
+ bp += menu_HEADER_SIZE;
+ dp += strlen(val) + 1;
+ }
+
+ /* now the entries */
+ for (e = 0; e != d->n_entries; e++) {
+ int menuf = 0, icon = (7 << 24) | 0x121;
+ const char *pos = 0;
+
+ /* parse description string */
+ for (pos = d->entries[e]; !isalnum(*pos); pos++) {
+ if (*pos == '^')
+ menuf |= 0x2;
+ else if (*pos == '>')
+ menuf |= 0x10;
+ else if (*pos == '{') {
+ if (top < MAX_SUBMENUS) {
+ submenus[top].e = e;
+ submenus[top++].name = pos+1;
+ }
+ while (*pos != '}')
+ pos++;
+ }
+ else if (*pos == '$')
+ icon |= (1<<22);
+ }
+
+ if (e == d->n_entries - 1)
+ /* last item */
+ menuf |= 0x80;
+
+ if (charset != 0 && lut && lut[e] == charset) {
+ menuf |= 0x1;
+ if (parent)
+ parent->entries[which].menu_flags |= 0x1;
+ }
+ else
+ menuf &= ~0x1;
+
+ if (flags & MENU_CLEAR_SELECTIONS) {
+ ((wimp_menu *)buf)->entries[e].menu_flags = menuf;
+ }
+
+ if ((flags & MENU_CREATE)) {
+ *((int *)bp) = menuf; bp += 4;
+ *((int *)bp) = -1; bp += 4;
+ *((int *)bp) = icon; bp += 4;
+ *((int *)bp) = (int)(dp); bp += 4;
+ *((int *)bp) = (int)(*data); bp += 4;
+ *((int *)bp) = strlen(pos) + 1; bp += 4;
+
+ memcpy(dp, pos, strlen(pos) + 1);
+ dp += strlen(pos) + 1;
+ } else {
+ bp += menu_ENTRY_SIZE;
+ dp += strlen(pos) + 1;
+ }
+ }
+
+ /* fixup parent's pointer to this menu */
+ if (parent && (flags & MENU_CREATE))
+ parent->entries[which].sub_menu = (int *)buf;
+
+ /* and recurse */
+ for (e = 0; e < top; e++) {
+ struct sub_menu *s;
+ size_t len = (strchr(submenus[e].name, '}') -
+ submenus[e].name);
+ char child[len + 1];
+
+ strncpy(child, submenus[e].name, len);
+ child[len] = '\0';
+
+ s = bsearch(child, sub_menus, SUB_MENU_COUNT,
+ sizeof(sub_menus[0]),
+ (int (*)(const void *, const void *))strcmp);
+ if (s)
+ bp = menu_op(s->desc, bp, (wimp_menu *)buf,
+ submenus[e].e, flags,
+ charset, s->lut, &dp);
+ }
+
+ if (data)
+ (*data) = dp;
+
+ return bp;
+}
+
+/**
+ * Iconv_CreateMenu SWI - Creates a menu structure of supported encodings
+ *
+ * \param flags Flags word - all reserved
+ * \param buf Pointer to buffer in which to store menu data, or NULL to
+ * read required buffer size.
+ * \param len Length of buffer, in bytes
+ * \param selected Pointer to name of selected encoding, or NULL if none
+ * \param data Pointer to buffer in which to store indirected data, or NULL
+ * to read required buffer size.
+ * \param dlen Pointer to length of data buffer, in bytes
+ * \return length of data written in buffer, or 0 if insufficient space
+ */
+size_t iconv_createmenu(size_t flags, char *buf, size_t len,
+ const char *selected, char *data, size_t *dlen)
+{
+ size_t reqlen, datalen;
+ char *bp = buf, *dp = NULL;
+ int sel = 0;
+ struct canon *c;
+
+ UNUSED(flags);
+
+ /* sanity check arguments */
+ if ((!buf && data) || !dlen)
+ return 0;
+
+ /* get required size */
+ reqlen = (int)menu_op((const struct menu_desc *)&enc_menu, 0,
+ NULL, 0, MENU_COUNT_SIZE, 0, NULL, &dp);
+
+ datalen = (size_t)dp;
+
+ /* buffer length requested, so return it */
+ if (!buf) {
+ *dlen = datalen;
+ return reqlen;
+ }
+
+ /* insufficient room in buffer */
+ if (reqlen > len)
+ return 0;
+
+ /* Selected entry? */
+ if (selected) {
+ sel = iconv_eightbit_number_from_name(selected) & ~(1<<30);
+
+ if (!sel) {
+ c = alias_canonicalise(selected);
+ if (c) {
+ sel = encoding_number_from_name(c->name);
+ }
+ }
+ }
+
+#ifdef TEST
+ printf("selected: '%s' : %d\n", selected, sel);
+#endif
+
+ dp = data;
+ bp = menu_op((const struct menu_desc *)&enc_menu, buf,
+ NULL, 0, MENU_CREATE, sel, NULL, &dp);
+
+ (*dlen) = datalen;
+
+ return reqlen;
+}
+
+/**
+ * Iconv_DecodeMenu SWI - Decodes a selection in a menu generated by
+ * Iconv_CreateMenu.
+ *
+ * \param flags Bitfield of flags - all reserved
+ * \param menu Menu definition
+ * \param selections Menu selections
+ * \param buf Pointer to output buffer, or NULL to read required length
+ * \param buflen Length of output buffer
+ * \return Required length of output buffer, or 0 if no selections
+ */
+size_t iconv_decodemenu(size_t flags, void *menu, int *selections,
+ char *buf, size_t buflen)
+{
+ const char *text, *t;
+ size_t len;
+ struct sub_menu *s;
+
+ UNUSED(flags);
+
+ if (!menu || !selections)
+ return 0;
+
+ /* out of range */
+ if (selections[0] == -1 || selections[0] >= enc_menu.n_entries)
+ return 0;
+
+ /* Grab sub menu name */
+ t = strchr(enc_menu.entries[selections[0]], '{') + 1;
+ len = (strchr(t, '}') - t);
+
+ /* copy to temporary buffer */
+ char child[len + 1];
+ strncpy(child, t, len);
+ child[len] = '\0';
+
+ /* look for submenu */
+ s = bsearch(child, sub_menus, SUB_MENU_COUNT, sizeof(sub_menus[0]),
+ (int (*)(const void *, const void *))strcmp);
+ if (!s)
+ return 0;
+
+ if (selections[1] == -1 || selections[1] >= s->desc->n_entries)
+ return 0;
+
+ /* lookup encoding name from number */
+ text = mibenum_to_name(s->lut[selections[1]]);
+
+ /* not found */
+ if (!text)
+ return 0;
+
+#ifdef MTEST
+ printf("%p : '%s'\n", text, text);
+#endif
+
+ if (buf && buflen < strlen(text) + 1)
+ /* insufficient buffer space */
+ return 0;
+
+
+ if (buf) {
+ strcpy(buf, text);
+ buf[strlen(text)] = '\0';
+ }
+
+ menu_op((const struct menu_desc *)&enc_menu, menu, NULL, 0,
+ MENU_CLEAR_SELECTIONS, s->lut[selections[1]],
+ NULL, NULL);
+
+ return strlen(text) + 1;
+}
+
+
+#ifdef MTEST
+int main(void)
+{
+ int len, slen, dlen;
+ char *buf, *dbuf, *selected;
+ int selection[3] = { 0, 5, -1};
+
+ if (!create_alias_data("Unicode:Files.Aliases"))
+ return 1;
+
+
+ len = iconv_createmenu(0, 0, 0, 0, 0, (size_t *)&dlen);
+
+ buf = calloc(len, sizeof(char));
+ if (!buf)
+ return 1;
+
+ dbuf = calloc(dlen, sizeof(char));
+ if (!dbuf)
+ return 1;
+
+ printf("%p: %d\n", buf, iconv_createmenu(0, buf, len, "UTF-16",
+ dbuf, (size_t *)&dlen));
+
+ FILE *fp = fopen("$.dump", "w");
+ fwrite(buf, len, sizeof(char), fp);
+ fclose(fp);
+
+ fp = fopen("$.dump1", "w");
+ fwrite(dbuf, dlen, sizeof(char), fp);
+ fclose(fp);
+
+ slen = iconv_decodemenu(0, (wimp_menu*)buf, selection, 0, 0);
+
+ selected = calloc(slen, sizeof(char));
+ if (!selected)
+ return 1;
+
+ printf("%p: %d\n", selected, iconv_decodemenu(0, (wimp_menu*)buf,
+ selection, selected, slen));
+
+ printf("'%s'\n", selected);
+
+ free_alias_data();
+
+ return 0;
+}
+#endif
diff --git a/module/module.c b/module/module.c
new file mode 100644
index 0000000..ef2a5da
--- /dev/null
+++ b/module/module.c
@@ -0,0 +1,192 @@
+/* Iconv module interface */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/errno.h>
+
+#include <iconv/iconv.h>
+
+#include "swis.h"
+
+#include "errors.h"
+#include "header.h"
+#include "module.h"
+
+#define ALIASES_FILE "Unicode:Files.Aliases"
+
+static _kernel_oserror ErrorGeneric = { 0x0, "" };
+
+static size_t iconv_convert(_kernel_swi_regs *r);
+static int errno_to_iconv_error(int num);
+
+/* Module initialisation */
+_kernel_oserror *mod_init(const char *tail, int podule_base, void *pw)
+{
+ UNUSED(tail);
+ UNUSED(podule_base);
+ UNUSED(pw);
+
+ /* ensure the !Unicode resource exists */
+ if (!getenv("Unicode$Path")) {
+ strncpy(ErrorGeneric.errmess, "!Unicode resource not found.",
+ 252);
+ return &ErrorGeneric;
+ }
+
+ if (iconv_initialise(ALIASES_FILE) == false) {
+ strncpy(ErrorGeneric.errmess, "Unicode:Files.Aliases not "
+ "found. Please read the Iconv installation "
+ "instructions.", 252);
+ return &ErrorGeneric;
+ }
+
+ return NULL;
+}
+
+/* Module finalisation */
+_kernel_oserror *mod_fini(int fatal, int podule_base, void *pw)
+{
+ UNUSED(fatal);
+ UNUSED(podule_base);
+ UNUSED(pw);
+
+ iconv_finalise();
+
+ return NULL;
+}
+
+/* SWI handler */
+_kernel_oserror *swi_handler(int swi_off, _kernel_swi_regs *regs, void *pw)
+{
+ unsigned int ret;
+
+ UNUSED(pw);
+
+ if (swi_off > 5)
+ return error_BAD_SWI;
+
+ switch (swi_off) {
+ case 0: /* Iconv_Open */
+ if ((ret = (unsigned int)
+ iconv_open((const char*)regs->r[0],
+ (const char*)regs->r[1])) == -1) {
+ ErrorGeneric.errnum = errno;
+ return &ErrorGeneric;
+ }
+ regs->r[0] = ret;
+ break;
+ case 1: /* Iconv_Iconv */
+ if ((ret = (unsigned int)
+ iconv((iconv_t)regs->r[0],
+ (char**)regs->r[1],
+ (size_t*)regs->r[2],
+ (char**)regs->r[3],
+ (size_t*)regs->r[4])) == -1) {
+ ErrorGeneric.errnum = errno;
+ return &ErrorGeneric;
+ }
+ regs->r[0] = ret;
+ break;
+ case 2: /* Iconv_Close */
+ if ((ret = (unsigned int)
+ iconv_close((iconv_t)regs->r[0])) == -1) {
+ ErrorGeneric.errnum = errno;
+ return &ErrorGeneric;
+ }
+ regs->r[0] = ret;
+ break;
+ case 3: /* Iconv_Convert */
+ if ((ret = (unsigned int)
+ iconv_convert(regs)) == -1) {
+ ErrorGeneric.errnum = errno;
+ return &ErrorGeneric;
+ }
+ regs->r[0] = ret;
+ break;
+ case 4: /* Iconv_CreateMenu */
+ {
+ size_t dlen = regs->r[5];
+ regs->r[2] = iconv_createmenu(regs->r[0],
+ (char *)regs->r[1],
+ regs->r[2],
+ (const char *)regs->r[3],
+ (char *)regs->r[4],
+ &dlen);
+ regs->r[5] = dlen;
+ }
+ break;
+ case 5: /* Iconv_DecodeMenu */
+ regs->r[4] = iconv_decodemenu(regs->r[0],
+ (void *)regs->r[1],
+ (int *)regs->r[2],
+ (char *)regs->r[3], regs->r[4]);
+ break;
+ }
+
+ return NULL;
+}
+
+/* *command handler */
+_kernel_oserror *command_handler(const char *arg_string, int argc,
+ int cmd_no, void *pw)
+{
+ UNUSED(arg_string);
+ UNUSED(argc);
+ UNUSED(pw);
+
+ switch (cmd_no) {
+ case CMD_ReadAliases:
+ free_alias_data();
+ if (!create_alias_data(ALIASES_FILE)) {
+ strcpy(ErrorGeneric.errmess,
+ "Failed reading Aliases file.");
+ return &ErrorGeneric;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return NULL;
+}
+
+size_t iconv_convert(_kernel_swi_regs *regs)
+{
+ char *inbuf, *outbuf;
+ size_t inbytesleft, outbytesleft;
+ size_t ret;
+
+ inbuf = (char *)regs->r[1];
+ inbytesleft = (size_t)regs->r[2];
+ outbuf = (char *)regs->r[3];
+ outbytesleft = (size_t)regs->r[4];
+
+ ret = iconv((iconv_t)regs->r[0], &inbuf, &inbytesleft,
+ &outbuf, &outbytesleft);
+
+ regs->r[1] = (int)inbuf;
+ regs->r[2] = (int)inbytesleft;
+ regs->r[3] = (int)outbuf;
+ regs->r[4] = (int)outbytesleft;
+
+ return ret;
+}
+
+int errno_to_iconv_error(int num)
+{
+ switch (num) {
+ case ENOMEM:
+ return ICONV_NOMEM;
+ case E2BIG:
+ return ICONV_2BIG;
+ case EILSEQ:
+ return ICONV_ILSEQ;
+ case EINVAL:
+ default:
+ return ICONV_INVAL:
+ }
+}
+
diff --git a/module/module.h b/module/module.h
new file mode 100644
index 0000000..09dcfeb
--- /dev/null
+++ b/module/module.h
@@ -0,0 +1,24 @@
+#ifndef iconv_module_h_
+#define iconv_module_h_
+
+#ifndef DEBUG
+#define LOG(x)
+#else
+#define LOG(x) (printf(__FILE__ " %s %i: ", __func__, __LINE__), printf x, fputc('\n', stdout))
+#endif
+
+#define UNUSED(x) ((x) = (x))
+
+/* In iconv library */
+extern int iconv_eightbit_number_from_name(const char *name);
+extern short mibenum_from_name(const char *alias);
+extern const char *mibenum_to_name(short mibenum);
+
+/* in menu.c */
+size_t iconv_createmenu(size_t flags, char *buf, size_t buflen,
+ const char *selected, char *data, size_t *dlen);
+size_t iconv_decodemenu(size_t flags, void *menu, int *selections,
+ char *buf, size_t buflen);
+
+#endif
+
diff --git a/module/stubs.c b/module/stubs.c
new file mode 100644
index 0000000..96dee4b
--- /dev/null
+++ b/module/stubs.c
@@ -0,0 +1,102 @@
+/* Iconv stubs */
+
+#include <errno.h>
+
+#include <sys/errno.h>
+
+#include "swis.h"
+
+#include "errors.h" /* for error numbers */
+#include "header.h" /* for SWI numbers */
+#include "iconv.h"
+
+iconv_t iconv_open(const char *tocode, const char *fromcode)
+{
+ iconv_t ret;
+ _kernel_oserror *error;
+
+ error = _swix(Iconv_Open, _INR(0,1) | _OUT(0), tocode, fromcode, &ret);
+ if (error) {
+ switch (error->errnum) {
+ case ICONV_NOMEM:
+ errno = ENOMEM;
+ break;
+ case ICONV_INVAL:
+ errno = EINVAL;
+ break;
+ case ICONV_2BIG:
+ errno = E2BIG;
+ break;
+ case ICONV_ILSEQ:
+ errno = EILSEQ;
+ break;
+ default:
+ errno = EINVAL; /* munge BAD_SWI to EINVAL */
+ break;
+ }
+ return (iconv_t)(-1);
+ }
+
+ return ret;
+}
+
+size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf,
+ size_t *outbytesleft)
+{
+ size_t ret;
+ _kernel_oserror *error;
+
+ error = _swix(Iconv_Iconv, _INR(0,4) | _OUT(0), cd, inbuf, inbytesleft, outbuf, outbytesleft, &ret);
+ if (error) {
+ switch (error->errnum) {
+ case ICONV_NOMEM:
+ errno = ENOMEM;
+ break;
+ case ICONV_INVAL:
+ errno = EINVAL;
+ break;
+ case ICONV_2BIG:
+ errno = E2BIG;
+ break;
+ case ICONV_ILSEQ:
+ errno = EILSEQ;
+ break;
+ default:
+ errno = EINVAL; /* munge BAD_SWI to EINVAL */
+ break;
+ }
+ return (size_t)(-1);
+ }
+
+ return ret;
+}
+
+int iconv_close(iconv_t cd)
+{
+ int ret;
+ _kernel_oserror *error;
+
+ error = _swix(Iconv_Close, _IN(0) | _OUT(0), cd, &ret);
+ if (error) {
+ switch (error->errnum) {
+ case ICONV_NOMEM:
+ errno = ENOMEM;
+ break;
+ case ICONV_INVAL:
+ errno = EINVAL;
+ break;
+ case ICONV_2BIG:
+ errno = E2BIG;
+ break;
+ case ICONV_ILSEQ:
+ errno = EILSEQ;
+ break;
+ default:
+ errno = EINVAL; /* munge BAD_SWI to EINVAL */
+ break;
+ }
+ return -1;
+ }
+
+ return ret;
+}
diff --git a/riscos/!Boot/Resources/!Unicode/!Boot,feb b/riscos/!Boot/Resources/!Unicode/!Boot,feb
new file mode 100644
index 0000000..7c0c462
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/!Boot,feb
@@ -0,0 +1,5 @@
+| Unicode Boot file
+|
+Set Unicode$Dir <Obey$Dir>
+SetMacro Unicode$Path <Unicode$Dir>.,Resources:$.Resources.Unicode.
+IconSprites Unicode:!Sprites
diff --git a/riscos/!Boot/Resources/!Unicode/!Help b/riscos/!Boot/Resources/!Unicode/!Help
new file mode 100644
index 0000000..8c04881
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/!Help
@@ -0,0 +1 @@
+This application contains resources for Unicode support in applications.
diff --git a/riscos/!Boot/Resources/!Unicode/!Run,feb b/riscos/!Boot/Resources/!Unicode/!Run,feb
new file mode 100644
index 0000000..bd70e96
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/!Run,feb
@@ -0,0 +1,5 @@
+| Unicode Run file
+|
+Set Unicode$Dir <Obey$Dir>
+SetMacro Unicode$Path <Unicode$Dir>.,Resources:$.Resources.Unicode.
+IconSprites Unicode:!Sprites
diff --git a/riscos/!Boot/Resources/!Unicode/!Sprites,ff9 b/riscos/!Boot/Resources/!Unicode/!Sprites,ff9
new file mode 100644
index 0000000..3eb5b44
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/!Sprites,ff9
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/!Sprites11,ff9 b/riscos/!Boot/Resources/!Unicode/!Sprites11,ff9
new file mode 100644
index 0000000..48986b4
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/!Sprites11,ff9
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/!Sprites22,ff9 b/riscos/!Boot/Resources/!Unicode/!Sprites22,ff9
new file mode 100644
index 0000000..63a6e61
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/!Sprites22,ff9
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/Acorn/Latin1 b/riscos/!Boot/Resources/!Unicode/Encodings/Acorn/Latin1
new file mode 100644
index 0000000..bdf5d3b
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/Acorn/Latin1
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/Apple/CentEuro b/riscos/!Boot/Resources/!Unicode/Encodings/Apple/CentEuro
new file mode 100644
index 0000000..5ab69ff
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/Apple/CentEuro
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/Apple/Cyrillic b/riscos/!Boot/Resources/!Unicode/Encodings/Apple/Cyrillic
new file mode 100644
index 0000000..670fd6c
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/Apple/Cyrillic
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/Apple/Roman b/riscos/!Boot/Resources/!Unicode/Encodings/Apple/Roman
new file mode 100644
index 0000000..017fc5b
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/Apple/Roman
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/Apple/Ukrainian b/riscos/!Boot/Resources/!Unicode/Encodings/Apple/Ukrainian
new file mode 100644
index 0000000..a220587
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/Apple/Ukrainian
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/BigFive b/riscos/!Boot/Resources/!Unicode/Encodings/BigFive
new file mode 100644
index 0000000..c659cef
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/BigFive
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/C0/40[ISO646] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/C0/40[ISO646]
new file mode 100644
index 0000000..cd92b54
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/C0/40[ISO646]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/C1/43[IS6429] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/C1/43[IS6429]
new file mode 100644
index 0000000..74002a1
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/C1/43[IS6429]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/40[646old] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/40[646old]
new file mode 100644
index 0000000..00e2d10
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/40[646old]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/41[646-GB] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/41[646-GB]
new file mode 100644
index 0000000..c293f93
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/41[646-GB]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/42[646IRV] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/42[646IRV]
new file mode 100644
index 0000000..e0b4bca
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/42[646IRV]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/43[FinSwe] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/43[FinSwe]
new file mode 100644
index 0000000..7d46469
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/43[FinSwe]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/47[646-SE] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/47[646-SE]
new file mode 100644
index 0000000..a6b091a
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/47[646-SE]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/48[646-SE] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/48[646-SE]
new file mode 100644
index 0000000..9bd24ab
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/48[646-SE]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/49[JS201K] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/49[JS201K]
new file mode 100644
index 0000000..20ce8d4
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/49[JS201K]
@@ -0,0 +1 @@
+abcdefghijklmnopqrstuvwxyz{|}~ \ No newline at end of file
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/4A[JS201R] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/4A[JS201R]
new file mode 100644
index 0000000..21d2a47
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/4A[JS201R]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/4B[646-DE] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/4B[646-DE]
new file mode 100644
index 0000000..a2e284e
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/4B[646-DE]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/4C[646-PT] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/4C[646-PT]
new file mode 100644
index 0000000..e076e25
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/4C[646-PT]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/54[GB1988] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/54[GB1988]
new file mode 100644
index 0000000..3b43719
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/54[GB1988]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/56[Teltxt] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/56[Teltxt]
new file mode 100644
index 0000000..73ce49e
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/56[Teltxt]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/59[646-IT] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/59[646-IT]
new file mode 100644
index 0000000..f1ae819
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/59[646-IT]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/5A[646-ES] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/5A[646-ES]
new file mode 100644
index 0000000..674fc2d
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/5A[646-ES]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/60[646-NO] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/60[646-NO]
new file mode 100644
index 0000000..fc92892
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/60[646-NO]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/66[646-FR] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/66[646-FR]
new file mode 100644
index 0000000..8dd6046
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/66[646-FR]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/69[646-HU] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/69[646-HU]
new file mode 100644
index 0000000..65300b2
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/69[646-HU]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/6B[Arabic] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/6B[Arabic]
new file mode 100644
index 0000000..c476899
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/6B[Arabic]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/6C[IS6937] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/6C[IS6937]
new file mode 100644
index 0000000..93453f5
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/6C[IS6937]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/7A[SerbCr] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/7A[SerbCr]
new file mode 100644
index 0000000..9740e78
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94/7A[SerbCr]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/40[JS6226] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/40[JS6226]
new file mode 100644
index 0000000..a677dfc
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/40[JS6226]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/41[GB2312] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/41[GB2312]
new file mode 100644
index 0000000..679608a
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/41[GB2312]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/42[JIS208] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/42[JIS208]
new file mode 100644
index 0000000..db014ff
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/42[JIS208]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/43[KS1001] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/43[KS1001]
new file mode 100644
index 0000000..b70f987
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/43[KS1001]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/44[JIS212] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/44[JIS212]
new file mode 100644
index 0000000..de64b1e
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/44[JIS212]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/47[CNS1] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/47[CNS1]
new file mode 100644
index 0000000..da07f45
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/47[CNS1]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/48[CNS2] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/48[CNS2]
new file mode 100644
index 0000000..44ee24c
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/48[CNS2]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/49[CNS3] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/49[CNS3]
new file mode 100644
index 0000000..a8464e5
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/49[CNS3]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/4A[CNS4] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/4A[CNS4]
new file mode 100644
index 0000000..a8f3e32
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/4A[CNS4]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/4B[CNS5] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/4B[CNS5]
new file mode 100644
index 0000000..535b0f4
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/4B[CNS5]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/4C[CNS6] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/4C[CNS6]
new file mode 100644
index 0000000..7bfb2b1
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/4C[CNS6]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/4D[CNS7] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/4D[CNS7]
new file mode 100644
index 0000000..be14c72
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G94x94/4D[CNS7]
@@ -0,0 +1,2 @@
+5UVg6a9E:;U=>??@qABEFzGGH/I1III8K:LLGMQMG788;q>?@AfEEF:I=IJKLL%MJMSM6;>#>C]EZFG I9I7IkJJML5{68d';??ARBQDZEeFHAI)J*JJK~6X9 C|ˁeEGdG#Hٕ4K<@gFGH LLmLpLLM4gZ>BkEGcIJJJJGM4A+CzE FmFGjIlIKL-LEM
+F F.HL1CKKL6r7MtvKyB \ No newline at end of file
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/41[Lat1] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/41[Lat1]
new file mode 100644
index 0000000..97e6b11
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/41[Lat1]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/42[Lat2] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/42[Lat2]
new file mode 100644
index 0000000..b753c40
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/42[Lat2]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/43[Lat3] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/43[Lat3]
new file mode 100644
index 0000000..88d4778
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/43[Lat3]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/44[Lat4] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/44[Lat4]
new file mode 100644
index 0000000..a40662d
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/44[Lat4]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/46[Greek] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/46[Greek]
new file mode 100644
index 0000000..5eac080
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/46[Greek]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/47[Arabic] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/47[Arabic]
new file mode 100644
index 0000000..4507f46
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/47[Arabic]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/48[Hebrew] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/48[Hebrew]
new file mode 100644
index 0000000..70f39cc
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/48[Hebrew]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/4C[Cyrill] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/4C[Cyrill]
new file mode 100644
index 0000000..8ff0115
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/4C[Cyrill]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/4D[Lat5] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/4D[Lat5]
new file mode 100644
index 0000000..6381e60
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/4D[Lat5]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/50[LatSup] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/50[LatSup]
new file mode 100644
index 0000000..a320c7f
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/50[LatSup]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/52[IS6937] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/52[IS6937]
new file mode 100644
index 0000000..dff6ccb
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/52[IS6937]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/54[Thai] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/54[Thai]
new file mode 100644
index 0000000..656dd9b
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/54[Thai]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/56[Lat6] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/56[Lat6]
new file mode 100644
index 0000000..4e3e4f3
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/56[Lat6]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/58[L6Sami] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/58[L6Sami]
new file mode 100644
index 0000000..4dfd918
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/58[L6Sami]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/59[Lat7] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/59[Lat7]
new file mode 100644
index 0000000..256a88e
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/59[Lat7]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/5C[Welsh] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/5C[Welsh]
new file mode 100644
index 0000000..b5e0050
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/5C[Welsh]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/5D[Sami] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/5D[Sami]
new file mode 100644
index 0000000..15734c0
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/5D[Sami]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/5E[Hebrew] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/5E[Hebrew]
new file mode 100644
index 0000000..ae9cdea
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/5E[Hebrew]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/5F[Lat8] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/5F[Lat8]
new file mode 100644
index 0000000..c15713e
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/5F[Lat8]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/62[Lat9] b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/62[Lat9]
new file mode 100644
index 0000000..5bf449d
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/ISO2022/G96/62[Lat9]
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/KOI8-R b/riscos/!Boot/Resources/!Unicode/Encodings/KOI8-R
new file mode 100644
index 0000000..8063cd4
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/KOI8-R
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1250 b/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1250
new file mode 100644
index 0000000..7a0d35c
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1250
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1251 b/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1251
new file mode 100644
index 0000000..3d6009c
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1251
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1252 b/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1252
new file mode 100644
index 0000000..6d3bf29
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1252
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1253 b/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1253
new file mode 100644
index 0000000..50a48be
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1253
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1254 b/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1254
new file mode 100644
index 0000000..45ecfe9
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1254
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1256 b/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1256
new file mode 100644
index 0000000..1dca1e7
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP1256
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP866 b/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP866
new file mode 100644
index 0000000..cd214d2
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP866
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP874 b/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP874
new file mode 100644
index 0000000..26a6fc8
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP874
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP932 b/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP932
new file mode 100644
index 0000000..2c0c111
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Encodings/Microsoft/CP932
Binary files differ
diff --git a/riscos/!Boot/Resources/!Unicode/Files/Aliases b/riscos/!Boot/Resources/!Unicode/Files/Aliases
new file mode 100644
index 0000000..0fa6386
--- /dev/null
+++ b/riscos/!Boot/Resources/!Unicode/Files/Aliases
@@ -0,0 +1,302 @@
+# > Unicode:Files.Aliases
+# Mapping of character set encoding names to their canonical form
+#
+# Lines starting with a '#' are comments, blank lines are ignored.
+#
+# Based on http://www.iana.org/assignments/character-sets and
+# http://www.iana.org/assignments/ianacharset-mib
+#
+# Canonical Form MIBenum Aliases...
+#
+US-ASCII 3 iso-ir-6 ANSI_X3.4-1986 ISO_646.irv:1991 ASCII ISO646-US ANSI_X3.4-1968 us IBM367 cp367 csASCII
+ISO-10646-UTF-1 27 csISO10646UTF1
+ISO_646.basic:1983 28 ref csISO646basic1983
+INVARIANT 29 csINVARIANT
+ISO_646.irv:1983 30 iso-ir-2 irv csISO2IntlRefVersion
+BS_4730 20 iso-ir-4 ISO646-GB gb uk csISO4UnitedKingdom
+NATS-SEFI 31 iso-ir-8-1 csNATSSEFI
+NATS-SEFI-ADD 32 iso-ir-8-2 csNATSSEFIADD
+NATS-DANO 33 iso-ir-9-1 csNATSDANO
+NATS-DANO-ADD 34 iso-ir-9-2 csNATSDANOADD
+SEN_850200_B 35 iso-ir-10 FI ISO646-FI ISO646-SE se csISO10Swedish
+SEN_850200_C 21 iso-ir-11 ISO646-SE2 se2 csISO11SwedishForNames
+KS_C_5601-1987 36 iso-ir-149 KS_C_5601-1989 KSC_5601 korean csKSC56011987
+ISO-2022-KR 37 csISO2022KR
+EUC-KR 38 csEUCKR EUCKR
+ISO-2022-JP 39 csISO2022JP
+ISO-2022-JP-2 40 csISO2022JP2
+ISO-2022-CN 104
+ISO-2022-CN-EXT 105
+JIS_C6220-1969-jp 41 JIS_C6220-1969 iso-ir-13 katakana x0201-7 csISO13JISC6220jp
+JIS_C6220-1969-ro 42 iso-ir-14 jp ISO646-JP csISO14JISC6220ro
+IT 22 iso-ir-15 ISO646-IT csISO15Italian
+PT 43 iso-ir-16 ISO646-PT csISO16Portuguese
+ES 23 iso-ir-17 ISO646-ES csISO17Spanish
+greek7-old 44 iso-ir-18 csISO18Greek7Old
+latin-greek 45 iso-ir-19 csISO19LatinGreek
+DIN_66003 24 iso-ir-21 de ISO646-DE csISO21German
+NF_Z_62-010_(1973) 46 iso-ir-25 ISO646-FR1 csISO25French
+Latin-greek-1 47 iso-ir-27 csISO27LatinGreek1
+ISO_5427 48 iso-ir-37 csISO5427Cyrillic
+JIS_C6226-1978 49 iso-ir-42 csISO42JISC62261978
+BS_viewdata 50 iso-ir-47 csISO47BSViewdata
+INIS 51 iso-ir-49 csISO49INIS
+INIS-8 52 iso-ir-50 csISO50INIS8
+INIS-cyrillic 53 iso-ir-51 csISO51INISCyrillic
+ISO_5427:1981 54 iso-ir-54 ISO5427Cyrillic1981
+ISO_5428:1980 55 iso-ir-55 csISO5428Greek
+GB_1988-80 56 iso-ir-57 cn ISO646-CN csISO57GB1988
+GB_2312-80 57 iso-ir-58 chinese csISO58GB231280
+NS_4551-1 25 iso-ir-60 ISO646-NO no csISO60DanishNorwegian csISO60Norwegian1
+NS_4551-2 58 ISO646-NO2 iso-ir-61 no2 csISO61Norwegian2
+NF_Z_62-010 26 iso-ir-69 ISO646-FR fr csISO69French
+videotex-suppl 59 iso-ir-70 csISO70VideotexSupp1
+PT2 60 iso-ir-84 ISO646-PT2 csISO84Portuguese2
+ES2 61 iso-ir-85 ISO646-ES2 csISO85Spanish2
+MSZ_7795.3 62 iso-ir-86 ISO646-HU hu csISO86Hungarian
+JIS_C6226-1983 63 iso-ir-87 x0208 JIS_X0208-1983 csISO87JISX0208
+greek7 64 iso-ir-88 csISO88Greek7
+ASMO_449 65 ISO_9036 arabic7 iso-ir-89 csISO89ASMO449
+iso-ir-90 66 csISO90
+JIS_C6229-1984-a 67 iso-ir-91 jp-ocr-a csISO91JISC62291984a
+JIS_C6229-1984-b 68 iso-ir-92 ISO646-JP-OCR-B jp-ocr-b csISO92JISC62991984b
+JIS_C6229-1984-b-add 69 iso-ir-93 jp-ocr-b-add csISO93JIS62291984badd
+JIS_C6229-1984-hand 70 iso-ir-94 jp-ocr-hand csISO94JIS62291984hand
+JIS_C6229-1984-hand-add 71 iso-ir-95 jp-ocr-hand-add csISO95JIS62291984handadd
+JIS_C6229-1984-kana 72 iso-ir-96 csISO96JISC62291984kana
+ISO_2033-1983 73 iso-ir-98 e13b csISO2033
+ANSI_X3.110-1983 74 iso-ir-99 CSA_T500-1983 NAPLPS csISO99NAPLPS
+ISO-8859-1 4 iso-ir-100 ISO_8859-1 ISO_8859-1:1987 latin1 l1 IBM819 CP819 csISOLatin1 8859_1 ISO8859-1
+ISO-8859-2 5 iso-ir-101 ISO_8859-2 ISO_8859-2:1987 latin2 l2 csISOLatin2 8859_2 ISO8859-2
+T.61-7bit 75 iso-ir-102 csISO102T617bit
+T.61-8bit 76 T.61 iso-ir-103 csISO103T618bit
+ISO-8859-3 6 iso-ir-109 ISO_8859-3 ISO_8859-3:1988 latin3 l3 csISOLatin3 8859_3 ISO8859-3
+ISO-8859-4 7 iso-ir-110 ISO_8859-4 ISO_8859-4:1988 latin4 l4 csISOLatin4 8859_4 ISO8859-4
+ECMA-cyrillic 77 iso-ir-111 KOI8-E csISO111ECMACyrillic
+CSA_Z243.4-1985-1 78 iso-ir-121 ISO646-CA csa7-1 ca csISO121Canadian1
+CSA_Z243.4-1985-2 79 iso-ir-122 ISO646-CA2 csa7-2 csISO122Canadian2
+CSA_Z243.4-1985-gr 80 iso-ir-123 csISO123CSAZ24341985gr
+ISO-8859-6 9 iso-ir-127 ISO_8859-6 ISO_8859-6:1987 ECMA-114 ASMO-708 arabic csISOLatinArabic
+ISO-8859-6-E 81 csISO88596E ISO_8859-6-E
+ISO-8859-6-I 82 csISO88596I ISO_8859-6-I
+ISO-8859-7 10 iso-ir-126 ISO_8859-7 ISO_8859-7:1987 ELOT_928 ECMA-118 greek greek8 csISOLatinGreek 8859_7 ISO8859-7
+T.101-G2 83 iso-ir-128 csISO128T101G2
+ISO-8859-8 11 iso-ir-138 ISO_8859-8 ISO_8859-8:1988 hebrew csISOLatinHebrew 8859_8 ISO8859-8
+ISO-8859-8-E 84 csISO88598E ISO_8859-8-E
+ISO-8859-8-I 85 csISO88598I ISO_8859-8-I
+CSN_369103 86 iso-ir-139 csISO139CSN369103
+JUS_I.B1.002 87 iso-ir-141 ISO646-YU js yu csISO141JUSIB1002
+ISO_6937-2-add 14 iso-ir-142 csISOTextComm
+IEC_P27-1 88 iso-ir-143 csISO143IECP271
+ISO-8859-5 8 iso-ir-144 ISO_8859-5 ISO_8859-5:1988 cyrillic csISOLatinCyrillic 8859_5 ISO8859-5
+JUS_I.B1.003-serb 89 iso-ir-146 serbian csISO146Serbian
+JUS_I.B1.003-mac 90 macedonian iso-ir-147 csISO147Macedonian
+ISO-8859-9 12 iso-ir-148 ISO_8859-9 ISO_8859-9:1989 latin5 l5 csISOLatin5 8859_9 ISO8859-9
+greek-ccitt 91 iso-ir-150 csISO150 csISO150GreekCCITT
+NC_NC00-10:81 92 cuba iso-ir-151 ISO646-CU csISO151Cuba
+ISO_6937-2-25 93 iso-ir-152 csISO6937Add
+GOST_19768-74 94 ST_SEV_358-88 iso-ir-153 csISO153GOST1976874
+ISO_8859-supp 95 iso-ir-154 latin1-2-5 csISO8859Supp
+ISO_10367-box 96 iso-ir-155 csISO10367Box
+ISO-8859-10 13 iso-ir-157 l6 ISO_8859-10:1992 csISOLatin6 latin6 8859_10 ISO8859-10
+latin-lap 97 lap iso-ir-158 csISO158Lap
+JIS_X0212-1990 98 x0212 iso-ir-159 csISO159JISX02121990
+DS_2089 99 DS2089 ISO646-DK dk csISO646Danish
+us-dk 100 csUSDK
+dk-us 101 csDKUS
+JIS_X0201 15 X0201 csHalfWidthKatakana
+KSC5636 102 ISO646-KR csKSC5636
+ISO-10646-UCS-2 1000 csUnicode UCS-2 UCS2
+ISO-10646-UCS-4 1001 csUCS4 UCS-4 UCS4
+DEC-MCS 2008 dec csDECMCS
+hp-roman8 2004 roman8 r8 csHPRoman8
+macintosh 2027 mac csMacintosh MACROMAN MAC-ROMAN X-MAC-ROMAN
+IBM037 2028 cp037 ebcdic-cp-us ebcdic-cp-ca ebcdic-cp-wt ebcdic-cp-nl csIBM037
+IBM038 2029 EBCDIC-INT cp038 csIBM038
+IBM273 2030 CP273 csIBM273
+IBM274 2031 EBCDIC-BE CP274 csIBM274
+IBM275 2032 EBCDIC-BR cp275 csIBM275
+IBM277 2033 EBCDIC-CP-DK EBCDIC-CP-NO csIBM277
+IBM278 2034 CP278 ebcdic-cp-fi ebcdic-cp-se csIBM278
+IBM280 2035 CP280 ebcdic-cp-it csIBM280
+IBM281 2036 EBCDIC-JP-E cp281 csIBM281
+IBM284 2037 CP284 ebcdic-cp-es csIBM284
+IBM285 2038 CP285 ebcdic-cp-gb csIBM285
+IBM290 2039 cp290 EBCDIC-JP-kana csIBM290
+IBM297 2040 cp297 ebcdic-cp-fr csIBM297
+IBM420 2041 cp420 ebcdic-cp-ar1 csIBM420
+IBM423 2042 cp423 ebcdic-cp-gr csIBM423
+IBM424 2043 cp424 ebcdic-cp-he csIBM424
+IBM437 2011 cp437 437 csPC8CodePage437
+IBM500 2044 CP500 ebcdic-cp-be ebcdic-cp-ch csIBM500
+IBM775 2087 cp775 csPC775Baltic
+IBM850 2009 cp850 850 csPC850Multilingual
+IBM851 2045 cp851 851 csIBM851
+IBM852 2010 cp852 852 csPCp852
+IBM855 2046 cp855 855 csIBM855
+IBM857 2047 cp857 857 csIBM857
+IBM860 2048 cp860 860 csIBM860
+IBM861 2049 cp861 861 cp-is csIBM861
+IBM862 2013 cp862 862 csPC862LatinHebrew
+IBM863 2050 cp863 863 csIBM863
+IBM864 2051 cp864 csIBM864
+IBM865 2052 cp865 865 csIBM865
+IBM866 2086 cp866 866 csIBM866
+IBM868 2053 CP868 cp-ar csIBM868
+IBM869 2054 cp869 869 cp-gr csIBM869
+IBM870 2055 CP870 ebcdic-cp-roece ebcdic-cp-yu csIBM870
+IBM871 2056 CP871 ebcdic-cp-is csIBM871
+IBM880 2057 cp880 EBCDIC-Cyrillic csIBM880
+IBM891 2058 cp891 csIBM891
+IBM903 2059 cp903 csIBM903
+IBM904 2060 cp904 904 csIBBM904
+IBM905 2061 CP905 ebcdic-cp-tr csIBM905
+IBM918 2062 CP918 ebcdic-cp-ar2 csIBM918
+IBM1026 2063 CP1026 csIBM1026
+EBCDIC-AT-DE 2064 csIBMEBCDICATDE
+EBCDIC-AT-DE-A 2065 csEBCDICATDEA
+EBCDIC-CA-FR 2066 csEBCDICCAFR
+EBCDIC-DK-NO 2067 csEBCDICDKNO
+EBCDIC-DK-NO-A 2068 csEBCDICDKNOA
+EBCDIC-FI-SE 2069 csEBCDICFISE
+EBCDIC-FI-SE-A 2070 csEBCDICFISEA
+EBCDIC-FR 2071 csEBCDICFR
+EBCDIC-IT 2072 csEBCDICIT
+EBCDIC-PT 2073 csEBCDICPT
+EBCDIC-ES 2074 csEBCDICES
+EBCDIC-ES-A 2075 csEBCDICESA
+EBCDIC-ES-S 2076 csEBCDICESS
+EBCDIC-UK 2077 csEBCDICUK
+EBCDIC-US 2078 csEBCDICUS
+UNKNOWN-8BIT 2079 csUnknown8BiT
+MNEMONIC 2080 csMnemonic
+MNEM 2081 csMnem
+VISCII 2082 csVISCII
+VIQR 2083 csVIQR
+KOI8-R 2084 csKOI8R
+KOI8-U 2088
+IBM00858 2089 CCSID00858 CP00858 PC-Multilingual-850+euro
+IBM00924 2090 CCSID00924 CP00924 ebcdic-Latin9--euro
+IBM01140 2091 CCSID01140 CP01140 ebcdic-us-37+euro
+IBM01141 2092 CCSID01141 CP01141 ebcdic-de-273+euro
+IBM01142 2093 CCSID01142 CP01142 ebcdic-dk-277+euro ebcdic-no-277+euro
+IBM01143 2094 CCSID01143 CP01143 ebcdic-fi-278+euro ebcdic-se-278+euro
+IBM01144 2095 CCSID01144 CP01144 ebcdic-it-280+euro
+IBM01145 2096 CCSID01145 CP01145 ebcdic-es-284+euro
+IBM01146 2097 CCSID01146 CP01146 ebcdic-gb-285+euro
+IBM01147 2098 CCSID01147 CP01147 ebcdic-fr-297+euro
+IBM01148 2099 CCSID01148 CP01148 ebcdic-international-500+euro
+IBM01149 2100 CCSID01149 CP01149 ebcdic-is-871+euro
+Big5-HKSCS 2101
+IBM1047 2102 IBM-1047
+PTCP154 2103 csPTCP154 PT154 CP154 Cyrillic-Asian
+Amiga-1251 2104 Ami1251 Amiga1251 Ami-1251
+KOI7-switched 2105
+UNICODE-1-1 1010 csUnicode11
+SCSU 1011
+UTF-7 1012
+UTF-16BE 1013
+UTF-16LE 1014
+UTF-16 1015
+CESU-8 1016 csCESU-8
+UTF-32 1017
+UTF-32BE 1018
+UTF-32LE 1019
+BOCU-1 1020 csBOCU-1
+UNICODE-1-1-UTF-7 103 csUnicode11UTF7
+UTF-8 106 UNICODE-1-1-UTF-8 UNICODE-2-0-UTF-8 utf8
+ISO-8859-13 109 8859_13 ISO8859-13
+ISO-8859-14 110 iso-ir-199 ISO_8859-14:1998 ISO_8859-14 latin8 iso-celtic l8 8859_14 ISO8859-14
+ISO-8859-15 111 ISO_8859-15 Latin-9 8859_15 ISO8859-15
+ISO-8859-16 112 iso-ir-226 ISO_8859-16:2001 ISO_8859-16 latin10 l10
+GBK 113 CP936 MS936 windows-936
+GB18030 114
+OSD_EBCDIC_DF04_15 115
+OSD_EBCDIC_DF03_IRV 116
+OSD_EBCDIC_DF04_1 117
+JIS_Encoding 16 csJISEncoding
+Shift_JIS 17 MS_Kanji csShiftJIS X-SJIS Shift-JIS
+EUC-JP 18 csEUCPkdFmtJapanese Extended_UNIX_Code_Packed_Format_for_Japanese EUCJP
+Extended_UNIX_Code_Fixed_Width_for_Japanese 19 csEUCFixWidJapanese
+ISO-10646-UCS-Basic 1002 csUnicodeASCII
+ISO-10646-Unicode-Latin1 1003 csUnicodeLatin1 ISO-10646
+ISO-Unicode-IBM-1261 1005 csUnicodeIBM1261
+ISO-Unicode-IBM-1268 1006 csUnicodeIBM1268
+ISO-Unicode-IBM-1276 1007 csUnicodeIBM1276
+ISO-Unicode-IBM-1264 1008 csUnicodeIBM1264
+ISO-Unicode-IBM-1265 1009 csUnicodeIBM1265
+ISO-8859-1-Windows-3.0-Latin-1 2000 csWindows30Latin1
+ISO-8859-1-Windows-3.1-Latin-1 2001 csWindows31Latin1
+ISO-8859-2-Windows-Latin-2 2002 csWindows31Latin2
+ISO-8859-9-Windows-Latin-5 2003 csWindows31Latin5
+Adobe-Standard-Encoding 2005 csAdobeStandardEncoding
+Ventura-US 2006 csVenturaUS
+Ventura-International 2007 csVenturaInternational
+PC8-Danish-Norwegian 2012 csPC8DanishNorwegian
+PC8-Turkish 2014 csPC8Turkish
+IBM-Symbols 2015 csIBMSymbols
+IBM-Thai 2016 csIBMThai
+HP-Legal 2017 csHPLegal
+HP-Pi-font 2018 csHPPiFont
+HP-Math8 2019 csHPMath8
+Adobe-Symbol-Encoding 2020 csHPPSMath
+HP-DeskTop 2021 csHPDesktop
+Ventura-Math 2022 csVenturaMath
+Microsoft-Publishing 2023 csMicrosoftPublishing
+Windows-31J 2024 csWindows31J
+GB2312 2025 csGB2312 EUC-CN EUCCN CN-GB
+Big5 2026 csBig5 BIG-FIVE BIG-5 CN-BIG5 BIG_FIVE
+windows-1250 2250 CP1250 MS-EE
+windows-1251 2251 CP1251 MS-CYRL
+windows-1252 2252 CP1252 MS-ANSI
+windows-1253 2253 CP1253 MS-GREEK
+windows-1254 2254 CP1254 MS-TURK
+windows-1255 2255
+windows-1256 2256 CP1256 MS-ARAB
+windows-1257 2257 CP1257 WINBALTRIM
+windows-1258 2258
+TIS-620 2259
+HZ-GB-2312 2085
+
+# Additional encodings not defined by IANA
+
+# Arbitrary allocations
+#CP737 3001
+#CP853 3002
+#CP856 3003
+CP874 3004 WINDOWS-874
+#CP922 3005
+#CP1046 3006
+#CP1124 3007
+#CP1125 3008 WINDOWS-1125
+#CP1129 3009
+#CP1133 3010 IBM-CP1133
+#CP1161 3011 IBM-1161 IBM1161 CSIBM1161
+#CP1162 3012 IBM-1162 IBM1162 CSIBM1162
+#CP1163 3013 IBM-1163 IBM1163 CSIBM1163
+#GEORGIAN-ACADEMY 3014
+#GEORGIAN-PS 3015
+#KOI8-RU 3016
+#KOI8-T 3017
+#MACARABIC 3018 X-MAC-ARABIC MAC-ARABIC
+#MACCROATIAN 3019 X-MAC-CROATIAN MAC-CROATIAN
+#MACGREEK 3020 X-MAC-GREEK MAC-GREEK
+#MACHEBREW 3021 X-MAC-HEBREW MAC-HEBREW
+#MACICELAND 3022 X-MAC-ICELAND MAC-ICELAND
+#MACROMANIA 3023 X-MAC-ROMANIA MAC-ROMANIA
+#MACTHAI 3024 X-MAC-THAI MAC-THAI
+#MACTURKISH 3025 X-MAC-TURKISH MAC-TURKISH
+#MULELAO-1 3026
+
+# From Unicode Lib
+ISO-IR-182 4000
+ISO-IR-197 4002
+ISO-2022-JP-1 4008
+MACCYRILLIC 4009 X-MAC-CYRILLIC MAC-CYRILLIC
+MACUKRAINE 4010 X-MAC-UKRAINIAN MAC-UKRAINIAN
+MACCENTRALEUROPE 4011 X-MAC-CENTRALEURROMAN MAC-CENTRALEURROMAN
+JOHAB 4012
+ISO-8859-11 4014 iso-ir-166 ISO_8859-11 ISO8859-11 8859_11
+X-CURRENT 4999 X-SYSTEM
+X-ACORN-LATIN1 5001
+X-ACORN-FUZZY 5002
diff --git a/riscos/ReadMe b/riscos/ReadMe
new file mode 100644
index 0000000..5419b3f
--- /dev/null
+++ b/riscos/ReadMe
@@ -0,0 +1,47 @@
+What is Iconv?
+==============
+
+Iconv is a module which provides character set conversion akin to that provided
+by the C iconv() function.
+
+Iconv Installation instructions
+===============================
+
+To install the Iconv module, simply use the System merge utility provided by
+Configure to merge the !System directory provided with the one on your system.
+
+Use the Boot merge facility in Configure to merge the provided !Boot directory
+with the one on your system. If there is no !Boot merge facility provided on
+your system, simply drag the !Boot directory over your existing boot structure.
+
+Module source can be found in the "src" directory.
+Further documentation can be found in the "doc" directory.
+
+Note for developers:
+~~~~~~~~~~~~~~~~~~~~
+The libiconv stubs provided are suitable for use with the SCL.
+UnixLib users should be aware that an interface for this module is provided by
+UnixLib itself.
+
+Licence
+=======
+
+Iconv is Copyright 2004-7 J-M Bell
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..f9d136b
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,49 @@
+# Child makefile fragment
+#
+# Toolchain is provided by top-level makefile
+#
+# Variables provided by top-level makefile
+#
+# COMPONENT The name of the component
+# EXPORT The location of the export directory
+# TOP The location of the source tree root
+# RELEASEDIR The place to put release objects
+# DEBUGDIR The place to put debug objects
+#
+# do_include Canned command sequence to include a child makefile
+#
+# Variables provided by parent makefile:
+#
+# DIR The name of the directory we're in, relative to $(TOP)
+#
+# Variables we can manipulate:
+#
+# ITEMS_CLEAN The list of items to remove for "make clean"
+# ITEMS_DISTCLEAN The list of items to remove for "make distclean"
+# TARGET_TESTS The list of target names to run for "make test"
+#
+# SOURCES The list of sources to build for $(COMPONENT)
+#
+# Plus anything from the toolchain
+
+# Push parent directory onto the directory stack
+sp := $(sp).x
+dirstack_$(sp) := $(d)
+d := $(DIR)
+
+# Manipulate include paths
+CFLAGS := $(CFLAGS) -I$(d)
+
+# Sources
+SRCS_$(d) := alias.c aliases.c eightbit.c iconv.c utils.c
+
+# Append to sources for component
+SOURCES += $(addprefix $(d), $(SRCS_$(d)))
+
+# Now include any children we may have
+MAKE_INCLUDES := $(wildcard $(d)*/Makefile)
+$(eval $(foreach INC, $(MAKE_INCLUDES), $(call do_include,$(INC))))
+
+# Finally, pop off the directory stack
+d := $(dirstack_$(sp))
+sp := $(basename $(sp))
diff --git a/src/alias.c b/src/alias.c
new file mode 100644
index 0000000..ebc1b78
--- /dev/null
+++ b/src/alias.c
@@ -0,0 +1,89 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "unicode/charsets.h"
+#include "unicode/encoding.h"
+
+#include "internal.h"
+
+struct table_entry {
+ const char *alias;
+ const char *encname;
+};
+
+/* This table contains special cases to allow us to use UnicodeLib sensibly. */
+static const struct table_entry mapping_table[] = {
+ {"/UTF-7/UNICODE-1-1-UTF-7/UNICODE-2-0-UTF-7/", "UTF-7" },
+ {"/ISO-10646-UCS-4/UCS-4/UTF-32/", "ISO-10646-UCS-4" },
+ {"/UTF-16/UCS-2/ISO-10646-UCS-2/UNICODE-1-1/UNICODE-2-0/", "UTF-16" },
+ {"/ISO-2022/", "ISO-2022" },
+};
+
+#define TABLE_SIZE (sizeof(mapping_table) / sizeof(mapping_table[0]))
+
+/**
+ * Look up an encoding number, based on its name
+ *
+ * \param name The encoding name
+ * \return The encoding number, or 0 if not found.
+ */
+int iconv_encoding_number_from_name(const char *name)
+{
+ unsigned int i;
+ char buf[256];
+ struct canon *c;
+
+ if (!name)
+ return 0;
+
+ snprintf(buf, sizeof buf, "/%s/", name);
+
+ /* convert to upper case */
+ for (i = 0; i != strlen(buf); i++) {
+ if (buf[i] >= 'a' && buf[i] <= 'z')
+ buf[i] = buf[i] - 32;
+ }
+
+ for (i = 0; i != TABLE_SIZE; i++)
+ if (strstr(mapping_table[i].alias, buf) != NULL)
+ return encoding_number_from_name(mapping_table[i].encname);
+
+ c = alias_canonicalise(name);
+ if (!c)
+ return 0;
+
+ return encoding_number_from_name(c->name);
+}
+
+/**
+ * Look up an encoding name, based on its MIB number
+ *
+ * \param number The encoding MIB number
+ * \return Pointer to encoding name, or NULL if not found
+ */
+const char *iconv_encoding_name_from_number(int number)
+{
+ const char *ret = NULL;
+ /* This is a PITA - UnicodeLib doesn't have a call to do this,
+ * so implement it ourselves. */
+ switch (number) {
+ case csUnicode11UTF7:
+ ret = mapping_table[0].alias;
+ break;
+ case csUCS4:
+ ret = mapping_table[1].alias;
+ break;
+ case csUnicode11:
+ ret = mapping_table[2].alias;
+ break;
+ case csVenturaMath:
+ ret = mapping_table[3].alias;
+ break;
+ default:
+ ret = mibenum_to_name(number);
+ break;
+ }
+
+ return ret;
+}
diff --git a/src/aliases.c b/src/aliases.c
new file mode 100644
index 0000000..1292685
--- /dev/null
+++ b/src/aliases.c
@@ -0,0 +1,364 @@
+#include <ctype.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "internal.h"
+
+struct alias {
+ struct alias *next;
+ struct canon *canon;
+ unsigned short name_len;
+ char name[1];
+};
+
+#define HASH_SIZE (43)
+static struct canon *canon_tab[HASH_SIZE];
+static struct alias *alias_tab[HASH_SIZE];
+
+static bool create_alias(const char *alias, struct canon *c);
+static struct canon *create_canon(const char *canon, short mibenum);
+static int hash_val(const char *alias);
+
+#ifdef TEST
+static void dump_alias_data(void);
+
+int main (void)
+{
+ struct canon *c;
+
+ create_alias_data("Unicode:Files.Aliases");
+
+ dump_alias_data();
+
+ c = alias_canonicalise("moose");
+ if (c)
+ printf("!!!\n");
+
+ c = alias_canonicalise("csinvariant");
+ if (c)
+ printf("%s %d\n", c->name, c->mib_enum);
+
+ c = alias_canonicalise("nats-sefi-add");
+ if (c)
+ printf("%s %d\n", c->name, c->mib_enum);
+
+ printf("%d\n", mibenum_from_name(c->name));
+
+ printf("%s\n", mibenum_to_name(c->mib_enum));
+
+ free_alias_data();
+
+ return 0;
+}
+#endif
+
+/**
+ * Create an alias
+ *
+ * \param alias The alias name
+ * \param c The canonical form
+ * \return true on success, false otherwise
+ */
+bool create_alias(const char *alias, struct canon *c)
+{
+ struct alias *a;
+ int hash;
+
+ if (!alias || !c)
+ return false;
+
+ a = malloc(sizeof(struct alias) + strlen(alias) + 1);
+ if (!a)
+ return false;
+
+ a->canon = c;
+ a->name_len = strlen(alias);
+ strcpy(a->name, alias);
+ a->name[a->name_len] = '\0';
+
+ hash = hash_val(alias);
+
+ a->next = alias_tab[hash];
+ alias_tab[hash] = a;
+
+ return true;
+}
+
+/**
+ * Create a canonical form
+ *
+ * \param canon The canonical name
+ * \param mibenum The MIB enum value
+ * \return Pointer to struct canon or NULL on error
+ */
+struct canon *create_canon(const char *canon, short mibenum)
+{
+ struct canon *c;
+ int hash, len;
+
+ if (!canon)
+ return NULL;
+
+ len = strlen(canon);
+
+ c = malloc(sizeof(struct canon) + len + 1);
+ if (!c)
+ return NULL;
+
+ c->mib_enum = mibenum;
+ c->name_len = len;
+ strcpy(c->name, canon);
+ c->name[len] = '\0';
+
+ hash = hash_val(canon);
+
+ c->next = canon_tab[hash];
+ canon_tab[hash] = c;
+
+ return c;
+}
+
+/**
+ * Hash function
+ *
+ * \param alias String to hash
+ * \return The hashed value
+ */
+int hash_val(const char *alias)
+{
+ const char *s = alias;
+ unsigned int h = 5381;
+
+ if (!alias)
+ return 0;
+
+ while (*s)
+ h = (h * 33) ^ (*s++ & ~0x20); /* case insensitive */
+
+ return h % HASH_SIZE;
+}
+
+/**
+ * Free all alias data
+ */
+void free_alias_data(void)
+{
+ struct canon *c, *d;
+ struct alias *a, *b;
+ int i;
+
+ for (i = 0; i != HASH_SIZE; i++) {
+ for (c = canon_tab[i]; c; c = d) {
+ d = c->next;
+ free(c);
+ }
+ canon_tab[i] = NULL;
+
+ for (a = alias_tab[i]; a; a = b) {
+ b = a->next;
+ free(a);
+ }
+ alias_tab[i] = NULL;
+ }
+}
+
+#ifdef TEST
+/**
+ * Dump all alias data to stdout
+ */
+void dump_alias_data(void)
+{
+ struct canon *c;
+ struct alias *a;
+ int i;
+ size_t size = 0;
+
+ for (i = 0; i != HASH_SIZE; i++) {
+ for (c = canon_tab[i]; c; c = c->next) {
+ printf("%d %s\n", i, c->name);
+ size += offsetof(struct canon, name) + c->name_len;
+ }
+
+ for (a = alias_tab[i]; a; a = a->next) {
+ printf("%d %s\n", i, a->name);
+ size += offsetof(struct alias, name) + a->name_len;
+ }
+ }
+
+ size += (sizeof(canon_tab) / sizeof(canon_tab[0]));
+ size += (sizeof(alias_tab) / sizeof(alias_tab[0]));
+
+ printf("%d\n", size);
+}
+#endif
+
+/**
+ * Create alias data from Aliases file
+ *
+ * \param filename The path to the Aliases file
+ * \return 1 on success, 0 on failure.
+ */
+int create_alias_data(const char *filename)
+{
+ char buf[300];
+ FILE *fp;
+
+ if (!filename)
+ return 0;
+
+ fp = fopen(filename, "r");
+ if (!fp)
+ return 0;
+
+ while (fgets(buf, sizeof buf, fp)) {
+ char *p, *aliases = 0, *mib, *end;
+ struct canon *cf;
+
+ if (buf[0] == 0 || buf[0] == '#')
+ /* skip blank lines or comments */
+ continue;
+
+ buf[strlen(buf) - 1] = 0; /* lose terminating newline */
+ end = buf + strlen(buf);
+
+ /* find end of canonical form */
+ for (p = buf; *p && !isspace(*p) && !iscntrl(*p); p++)
+ ; /* do nothing */
+ if (p >= end)
+ continue;
+ *p++ = '\0'; /* terminate canonical form */
+
+ /* skip whitespace */
+ for (; *p && isspace(*p); p++)
+ ; /* do nothing */
+ if (p >= end)
+ continue;
+ mib = p;
+
+ /* find end of mibenum */
+ for (; *p && !isspace(*p) && !iscntrl(*p); p++)
+ ; /* do nothing */
+ if (p < end)
+ *p++ = '\0'; /* terminate mibenum */
+
+ cf = create_canon(buf, atoi(mib));
+ if (!cf)
+ continue;
+
+ /* skip whitespace */
+ for (; p < end && *p && isspace(*p); p++)
+ ; /* do nothing */
+ if (p >= end)
+ continue;
+ aliases = p;
+
+ while (p < end) {
+ /* find end of alias */
+ for (; *p && !isspace(*p) && !iscntrl(*p); p++)
+ ; /* do nothing */
+ if (p > end)
+ /* stop if we've gone past the end */
+ break;
+ /* terminate current alias */
+ *p++ = '\0';
+
+ if (!create_alias(aliases, cf))
+ break;
+
+ /* in terminating, we may have advanced
+ * past the end - check this here */
+ if (p >= end)
+ break;
+
+ /* skip whitespace */
+ for (; *p && isspace(*p); p++)
+ ; /* do nothing */
+
+ if (p >= end)
+ /* gone past end => stop */
+ break;
+
+ /* update pointer to current alias */
+ aliases = p;
+ }
+ }
+
+ fclose(fp);
+
+ return 1;
+}
+
+/**
+ * Retrieve the canonical form of an alias name
+ *
+ * \param alias The alias name
+ * \return Pointer to struct canon or NULL if not found
+ */
+struct canon *alias_canonicalise(const char *alias)
+{
+ int hash, len;
+ struct canon *c;
+ struct alias *a;
+
+ if (!alias)
+ return NULL;
+
+ hash = hash_val(alias);
+ len = strlen(alias);
+
+ for (c = canon_tab[hash]; c; c = c->next)
+ if (c->name_len == len && strcasecmp(c->name, alias) == 0)
+ break;
+ if (c)
+ return c;
+
+ for (a = alias_tab[hash]; a; a = a->next)
+ if (a->name_len == len && strcasecmp(a->name, alias) == 0)
+ break;
+ if (a)
+ return a->canon;
+
+ return NULL;
+}
+
+/**
+ * Retrieve the MIB enum value assigned to an encoding name
+ *
+ * \param alias The alias to lookup
+ * \return The MIB enum value, or 0 if not found
+ */
+short mibenum_from_name(const char *alias)
+{
+ struct canon *c;
+
+ if (!alias)
+ return 0;
+
+ c = alias_canonicalise(alias);
+ if (!c)
+ return 0;
+
+ return c->mib_enum;
+}
+
+/**
+ * Retrieve the canonical name of an encoding from the MIB enum
+ *
+ * \param mibenum The MIB enum value
+ * \return Pointer to canonical name, or NULL if not found
+ */
+const char *mibenum_to_name(short mibenum)
+{
+ int i;
+ struct canon *c;
+
+ for (i = 0; i != HASH_SIZE; i++)
+ for (c = canon_tab[i]; c; c = c->next)
+ if (c->mib_enum == mibenum)
+ return c->name;
+
+ return NULL;
+}
diff --git a/src/eightbit.c b/src/eightbit.c
new file mode 100644
index 0000000..3ff3470
--- /dev/null
+++ b/src/eightbit.c
@@ -0,0 +1,280 @@
+/* stateless 8bit encoding support => no support for CP1255, 1258 or TCVN
+ * functions in this file have an identical API to the encoding functions
+ * in UnicodeLib. see unicode/encoding.h for documentation. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "internal.h"
+
+struct table_entry {
+ const char *canon;
+ const char *filename;
+};
+
+/* Table should be ordered by enc_num */
+static const struct table_entry mapping_table[] = {
+ { "US-ASCII", 0 },
+ { "HP-ROMAN8", "HPR8" },
+ { "MACINTOSH", "Apple.Roman"},
+ { "IBM437", "Microsoft.CP437" },
+ { "IBM775", "Microsoft.CP775" },
+ { "IBM850", "Microsoft.CP850" },
+ { "IBM852", "Microsoft.CP852" },
+ { "IBM855", "Microsoft.CP855" },
+ { "IBM857", "Microsoft.CP857" },
+ { "IBM860", "Microsoft.CP860" },
+ { "IBM861", "Microsoft.CP861" },
+ { "IBM862", "Microsoft.CP862" },
+ { "IBM863", "Microsoft.CP863" },
+ { "IBM864", "Microsoft.CP864" },
+ { "IBM865", "Microsoft.CP865" },
+ { "IBM866", "Microsoft.CP866" },
+ { "IBM869", "Microsoft.CP869" },
+ { "KOI8-R", "KOI8-R" },
+ { "KOI8-U", "KOI8-U" },
+ { "IBM00858", "Microsoft.CP858" },
+ { "WINDOWS-1250", "Microsoft.CP1250" },
+ { "WINDOWS-1251", "Microsoft.CP1251" },
+ { "WINDOWS-1252", "Microsoft.CP1252" },
+ { "WINDOWS-1253", "Microsoft.CP1253" },
+ { "WINDOWS-1254", "Microsoft.CP1254" },
+ { "WINDOWS-1256", "Microsoft.CP1256" },
+ { "WINDOWS-1257", "Microsoft.CP1257" },
+ { "CP737", "Microsoft.CP737" },
+ { "CP853", "Microsoft.CP853" },
+ { "CP856", "Microsoft.CP856" },
+ { "CP874", "Microsoft.CP874" },
+ { "CP922", "Microsoft.CP922" },
+ { "CP1046", "Microsoft.CP1046" },
+ { "CP1124", "Microsoft.CP1124" },
+ { "CP1125", "Microsoft.CP1125" },
+ { "CP1129", "Microsoft.CP1129" },
+ { "CP1133", "Microsoft.CP1133" },
+ { "CP1161", "Microsoft.CP1161" },
+ { "CP1162", "Microsoft.CP1162" },
+ { "CP1163", "Microsoft.CP1163" },
+ { "GEORGIAN-ACADEMY", "GeorgA" },
+ { "GEORGIAN-PS", "GeorgPS" },
+ { "KOI8-RU", "KOI8-RU" },
+ { "KOI8-T", "KOI8-T" },
+ { "MACARABIC", "Apple.Arabic" },
+ { "MACCROATIAN", "Apple.Croatian" },
+ { "MACGREEK", "Apple.Greek" },
+ { "MACHEBREW", "Apple.Hebrew" },
+ { "MACICELAND", "Apple.Iceland" },
+ { "MACROMANIA", "Apple.Romania" },
+ { "MACTHAI", "Apple.Thai" },
+ { "MACTURKISH", "Apple.Turkish" },
+ { "MULELAO-1", "Mulelao" },
+ { "MACCYRILLIC", "Apple.Cyrillic" },
+ { "MACUKRAINE", "Apple.Ukrainian" },
+ { "MACCENTRALEUROPE", "Apple.CentEuro" },
+};
+
+#define TABLE_SIZE (sizeof(mapping_table) / sizeof(mapping_table[0]))
+
+/**
+ * Look up an encoding number, based on its name
+ *
+ * \param name The encoding name
+ * \return The encoding number, or 0 if not found
+ */
+int iconv_eightbit_number_from_name(const char *name)
+{
+ struct canon *c;
+ int i;
+
+ if (!name)
+ return 0;
+
+ c = alias_canonicalise(name);
+ if (!c)
+ return 0;
+
+ LOG(("searching for: %s", name));
+
+ for (i = 0; i != TABLE_SIZE; i++) {
+ if (strcasecmp(mapping_table[i].canon, c->name) == 0) {
+ LOG(("found: %d", c->mib_enum | (1<<30)));
+ return c->mib_enum | (1<<30);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Read an 8bit encoded string
+ *
+ * \param e The encoding context
+ * \param callback Callback function to handle generated UCS characters
+ * \param s The input string
+ * \param n The length (in bytes) of the input
+ * \param handle Callback private data pointer
+ * \return The number of characters processed
+ */
+unsigned iconv_eightbit_read(struct encoding_context *e,
+ int (*callback)(void *handle, UCS4 c), const char *s,
+ unsigned int n, void *handle)
+{
+ UCS4 c;
+ unsigned int pos;
+
+ if (!e || !callback || !s)
+ return 0;
+
+ for (pos = 0; pos != n; pos++) {
+
+ c = s[pos];
+
+ LOG(("read: %d (%d)", c, pos));
+
+ if (c < 0x80) {
+ /* ASCII */
+ if (callback(handle, c))
+ break;
+ }
+ else if (c < 0x100 && e->intab) {
+ LOG(("maps to: %x", e->intab[c - 0x80]));
+ /* Look up in mapping table */
+ if (e->intab[c - 0x80] != 0xffff) {
+ if (callback(handle, e->intab[c - 0x80]))
+ break;
+ }
+ else {
+ /* character not defined in this encoding */
+ return pos;
+ }
+ }
+ }
+
+ return pos;
+}
+
+/**
+ * Write a UCS character in an 8bit encoding
+ *
+ * \param e The encoding context
+ * \param c The UCS4 character
+ * \param buf Indirect pointer to output buffer
+ * \param bufsize Pointer to size of output buffer
+ * \return 1 on success, 0 if bufsize is too small, -1 if unrepresentable.
+ */
+int iconv_eightbit_write(struct encoding_context *e, UCS4 c,
+ char **buf, int *bufsize)
+{
+ int i;
+
+ /* sanity check input */
+ if (!e || !bufsize || !buf || !*buf)
+ return 0;
+
+ /* buffer full */
+ if (--*bufsize < 0)
+ return 0;
+
+ if (c < 0x0080)
+ /* ASCII */
+ *(*buf)++ = (char)c;
+ else {
+ /* Perform reverse table lookup */
+ for (i = 0; i != 0x80; i++) {
+ if (e->outtab && e->outtab[i] == c) {
+ *(*buf)++ = (char)(i+0x80);
+ break;
+ }
+ }
+ if (i == 0x80) {
+ /* Nothing was written => fixup bufsize */
+ ++*bufsize;
+ return -1;
+ }
+ }
+
+ LOG(("written: %d", *(*buf-1)));
+
+ return 1;
+}
+
+/**
+ * Load an 8bit encoding
+ *
+ * \param enc_num The encoding number to load
+ * \return Pointer to lookup table for encoding, or NULL on error
+ */
+unsigned short *iconv_eightbit_new(int enc_num)
+{
+ char filename[64];
+ const char *name;
+ FILE *fp;
+ unsigned int len;
+ int i;
+ unsigned short *ret;
+
+ name = mibenum_to_name(enc_num);
+ if (!name)
+ return NULL;
+
+ /* Lookup filename in table */
+ for (i = 0; i != TABLE_SIZE; i++)
+ if (strcasecmp(mapping_table[i].canon, name) == 0) {
+ if (mapping_table[i].filename == 0)
+ return NULL;
+
+ snprintf(filename, sizeof filename,
+ "Unicode:Encodings.%s",
+ mapping_table[i].filename);
+
+ break;
+ }
+
+ LOG(("opening: %s", filename));
+
+ /* Open */
+ fp = fopen(filename, "rb");
+ if (!fp) {
+ return NULL;
+ }
+
+ /* Get extent */
+ fseek(fp, 0, SEEK_END);
+ len = (unsigned int)ftell(fp);
+ fseek(fp, 0, SEEK_SET);
+
+ /* Unexpected length => give up */
+ if (len != 256) {
+ fclose(fp);
+ return NULL;
+ }
+
+ /* Create buffer */
+ ret = calloc(128, sizeof(short));
+ if (!ret) {
+ fclose(fp);
+ return NULL;
+ }
+
+ fread(ret, 128, sizeof(short), fp);
+
+ fclose(fp);
+
+ return ret;
+}
+
+/**
+ * Delete any 8bit encodings used by a context
+ *
+ * \param e The encoding context
+ */
+void iconv_eightbit_delete(struct encoding_context *e)
+{
+ if (!e)
+ return;
+
+ if (e->intab)
+ free(e->intab);
+ if (e->outtab)
+ free(e->outtab);
+}
diff --git a/src/iconv.c b/src/iconv.c
new file mode 100644
index 0000000..aa18fa5
--- /dev/null
+++ b/src/iconv.c
@@ -0,0 +1,457 @@
+/* iconv implementation - see iconv.h for docs */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/errno.h>
+
+#include <unicode/charsets.h>
+#include <unicode/encoding.h>
+
+#include <iconv/iconv.h>
+
+#include "internal.h"
+
+static struct encoding_context *context_list;
+
+static int character_callback(void *handle, UCS4 c);
+static void parse_parameters(struct encoding_context *e, const char *params,
+ bool destination);
+static void parse_parameter(struct encoding_context *e, const char *param,
+ int length, bool destination);
+
+int iconv_initialise(const char *aliases_file)
+{
+ if (aliases_file == NULL)
+ return false;
+
+ if (create_alias_data(aliases_file) == false)
+ return false;
+
+ encoding_initialise();
+
+ return true;
+}
+
+void iconv_finalise(void)
+{
+ struct encoding_context *a, *b;
+
+ /* clients may quit / die without cleaning up. */
+ for (a = context_list; a; a = b) {
+ b = a->next;
+ if (a->in)
+ encoding_delete(a->in);
+ if (a->out)
+ encoding_delete(a->out);
+ iconv_eightbit_delete(a);
+ free(a);
+ }
+
+ free_alias_data();
+
+ /* finalise the unicode library */
+ encoding_tidyup();
+}
+
+iconv_t iconv_open(const char *tocode, const char *fromcode)
+{
+ int to = 0, from = 0;
+ struct encoding_context *e;
+ struct canon *c;
+ bool to_force_le = false, from_force_le = false;
+ char totemp[128], fromtemp[128];
+ const char *slash;
+ unsigned int len;
+
+ /* can't do anything without these */
+ if (!tocode || !fromcode) {
+ errno = EINVAL;
+ return (iconv_t)(-1);
+ }
+
+ e = calloc(1, sizeof(*e));
+ if (!e) {
+ LOG(("malloc failed"));
+ errno = ENOMEM;
+ return (iconv_t)(-1);
+ }
+
+ /* strip any parameters off the end of the tocode string */
+ slash = strchr(tocode, '/');
+ len = slash ? (unsigned) (slash - tocode) : strlen(tocode);
+ snprintf(totemp, sizeof totemp, "%.*s", len, tocode);
+
+ /* parse parameters */
+ if (slash && *(slash + 1) == '/' && *(slash + 2) != '\0')
+ parse_parameters(e, slash + 2, true);
+
+ /* strip any parameters off the end of the fromcode string */
+ slash = strchr(fromcode, '/');
+ len = slash ? (unsigned) (slash - fromcode) : strlen(fromcode);
+ snprintf(fromtemp, sizeof fromtemp, "%.*s", len, fromcode);
+
+ /* parse parameters */
+ if (slash && *(slash + 1) == '/' && *(slash + 2) != '\0')
+ parse_parameters(e, slash + 2, false);
+
+ /* try our own 8bit charset code first */
+ to = iconv_eightbit_number_from_name(totemp);
+ from = iconv_eightbit_number_from_name(fromtemp);
+
+ /* if that failed, try the UnicodeLib functionality */
+ if (!to)
+ to = iconv_encoding_number_from_name(totemp);
+
+ if (!from)
+ from = iconv_encoding_number_from_name(fromtemp);
+
+ /* if that failed, perhaps it was an endian-specific variant of
+ * something UnicodeLib can handle? */
+ if (!to) {
+ c = alias_canonicalise(totemp);
+ if (c) {
+ switch(c->mib_enum) {
+ case 1013: /* UTF-16BE */
+ to = csUnicode11;
+ break;
+ case 1014: /* UTF-16LE */
+ to = csUnicode11;
+ to_force_le = true;
+ break;
+ case 1018: /* UTF-32BE */
+ to = csUCS4;
+ break;
+ case 1019: /* UTF-32LE */
+ to = csUCS4;
+ to_force_le = true;
+ break;
+ }
+ }
+ }
+
+ if (!from) {
+ c = alias_canonicalise(fromtemp);
+ if (c) {
+ switch(c->mib_enum) {
+ case 1013: /* UTF-16BE */
+ from = csUnicode11;
+ break;
+ case 1014: /* UTF-16LE */
+ from = csUnicode11;
+ from_force_le = true;
+ break;
+ case 1018: /* UTF-32BE */
+ from = csUCS4;
+ break;
+ case 1019: /* UTF-32LE */
+ from = csUCS4;
+ from_force_le = true;
+ break;
+ }
+ }
+ }
+
+ LOG(("to: %d(%s) from: %d(%s)", to, totemp, from, fromtemp));
+
+ /* ensure both encodings are recognised */
+ if (to == 0 || from == 0) {
+ free(e);
+ errno = EINVAL;
+ return (iconv_t)(-1);
+ }
+
+ /* bit 30 set indicates that this is an 8bit encoding */
+ if (from & (1<<30))
+ e->intab = iconv_eightbit_new(from & ~(1<<30));
+ else {
+ e->in = encoding_new(from, encoding_READ);
+ if (e->in) {
+ /* Set encoding flags */
+ unsigned int flags = 0;
+ if (from_force_le)
+ flags |= encoding_FLAG_LITTLE_ENDIAN;
+
+ c = alias_canonicalise(fromtemp);
+ if (c && (c->mib_enum == csUCS4 ||
+ c->mib_enum == csUnicode))
+ flags |= encoding_FLAG_NO_HEADER;
+
+ encoding_set_flags(e->in, flags, flags);
+ }
+ }
+
+ /* neither created => memory error or somesuch. assume ENOMEM */
+ /* no table is ever generated for ASCII */
+ if (!e->in && !e->intab && (from & ~(1<<30)) != csASCII) {
+ free(e);
+ errno = ENOMEM;
+ return (iconv_t)(-1);
+ }
+
+ if (to & (1<<30))
+ e->outtab = iconv_eightbit_new(to & ~(1<<30));
+ else {
+ e->out = encoding_new(to, encoding_WRITE_STRICT);
+ if (e->out) {
+ /* Set encoding flags */
+ unsigned int flags = 0;
+ if (to_force_le)
+ flags |= encoding_FLAG_LITTLE_ENDIAN;
+
+ c = alias_canonicalise(totemp);
+ if (c && (c->mib_enum == csUCS4 ||
+ c->mib_enum == csUnicode))
+ flags |= encoding_FLAG_NO_HEADER;
+
+ encoding_set_flags(e->out, flags, flags);
+ }
+ }
+
+ /* neither created => ENOMEM */
+ if (!e->out && !e->outtab && (to & ~(1<<30)) != csASCII) {
+ if (e->in)
+ encoding_delete(e->in);
+ iconv_eightbit_delete(e);
+ free(e);
+ errno = ENOMEM;
+ return (iconv_t)(-1);
+ }
+
+ /* add to list */
+ e->prev = 0;
+ e->next = context_list;
+ if (context_list)
+ context_list->prev = e;
+ context_list = e;
+
+ return (iconv_t)e;
+}
+
+size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf,
+ size_t *outbytesleft)
+{
+ struct encoding_context *e;
+ unsigned read;
+
+ /* search for cd in list */
+ for (e = context_list; e; e = e->next)
+ if (e == (struct encoding_context *)cd)
+ break;
+
+ /* not found => invalid */
+ if (!e) {
+ errno = EINVAL;
+ return (size_t)(-1);
+ }
+
+ if (inbuf == NULL || *inbuf == NULL) {
+ if (e->in)
+ encoding_reset(e->in);
+ return 0;
+ }
+
+ /* Is there any point doing anything? */
+ if (!outbuf || !(*outbuf) || !outbytesleft) {
+ errno = EINVAL;
+ return (size_t)(-1);
+ }
+
+ e->outbuf = outbuf;
+ e->outbytesleft = outbytesleft;
+
+ LOG(("reading"));
+
+ if (e->in)
+ read = encoding_read(e->in, character_callback, *inbuf,
+ *inbytesleft, e);
+ else
+ read = iconv_eightbit_read(e, character_callback, *inbuf,
+ *inbytesleft, e);
+
+ LOG(("done"));
+
+ LOG(("read: %d, ibl: %d, obl: %d", read, *inbytesleft, *outbytesleft));
+
+ /* 2 */
+ if (read == *inbytesleft) {
+ *inbuf += read;
+ *inbytesleft = 0;
+ return 0;
+ }
+ /* 4 */
+ else if ((int)*outbytesleft < 0) {
+ LOG(("e2big"));
+ *outbytesleft = 0;
+ *inbuf += read - 1;
+ *inbytesleft -= read - 1;
+ errno = E2BIG;
+ }
+ /** \todo find a mechanism for distinguishing between 1 & 3 */
+ /* 1 */
+ else if (read != *inbytesleft) {
+ *inbuf += read;
+ *inbytesleft -= read;
+ LOG(("eilseq"));
+ errno = EILSEQ;
+ }
+ /* 3 */
+ else if ((int)*outbytesleft >= 0) {
+ *inbuf += read;
+ *inbytesleft -= read;
+ LOG(("einval"));
+ errno = EINVAL;
+ }
+
+ LOG(("errno: %d", errno));
+
+ return (size_t)(-1);
+}
+
+int iconv_close(iconv_t cd)
+{
+ struct encoding_context *e;
+
+ /* search for cd in list */
+ for (e = context_list; e; e = e->next)
+ if (e == (struct encoding_context *)cd)
+ break;
+
+ /* not found => invalid */
+ if (!e)
+ return 0;
+
+ if (e->in)
+ encoding_delete(e->in);
+ if (e->out)
+ encoding_delete(e->out);
+ iconv_eightbit_delete(e);
+
+ /* remove from list */
+ if (e->next)
+ e->next->prev = e->prev;
+ if (e->prev)
+ e->prev->next = e->next;
+ else
+ context_list = e->next;
+
+ free(e);
+
+ /* reduce our memory usage somewhat */
+ encoding_table_remove_unused(8 /* recommended value */);
+
+ return 0;
+}
+
+/* this is called for each converted character */
+int character_callback(void *handle, UCS4 c)
+{
+ struct encoding_context *e;
+ int ret;
+
+ e = (struct encoding_context*)handle;
+
+ LOG(("outbuf: %p, free: %d", *e->outbuf, *e->outbytesleft));
+ LOG(("writing: %d", c));
+
+ if (e->out) {
+ char *prev_outbuf = *e->outbuf;
+ size_t prev_outbytesleft = *e->outbytesleft;
+
+ ret = encoding_write(e->out, c, e->outbuf,
+ (int*)e->outbytesleft);
+
+ LOG(("ret: %d", ret));
+
+ /* Why the need for this nonsense? UnicodeLib appears to
+ * decrease the count of free space in the buffer even
+ * if it doesn't write into it. This is a bug, as the
+ * documentation says that the buffer pointer AND free
+ * space count are left unmodified if nothing is written.
+ * Therefore, we have this hack until UnicodeLib gets fixed.
+ */
+ if (ret == -1) {
+ *e->outbytesleft = prev_outbytesleft -
+ (*e->outbuf - prev_outbuf);
+ }
+ } else {
+ ret = iconv_eightbit_write(e, c, e->outbuf,
+ (int*)e->outbytesleft);
+ }
+
+ if (ret == -1) {
+ /* Transliterate, if we've been asked to.
+ * Assumes that output is 8bit/8bit multibyte with ASCII G0.
+ * This should be fine as the only <>8bit encodings are
+ * UCS{2,4}, UTF-{16,32}, neither of which return -1.
+ * Also, afaiaa, all supported multibyte encodings are ASCII
+ * compatible. */
+ /** \todo Actually perform some kind of transliteration */
+ if (e->transliterate && (int)*e->outbytesleft > 0) {
+ if (e->out) {
+ /* Reset encoding write state */
+ /** \todo this is a bit dodgy, as we only
+ * really need to ensure that the ASCII set
+ * is mapped into G0 in ISO2022 encodings.
+ * This will reset G1->G3, too, which may
+ * break things. If so, we may have to
+ * perform some dirty hackery which relies
+ * upon knowledge of UnicodeLib's internals
+ */
+ encoding_write(e->out, NULL_UCS4, e->outbuf,
+ (int*)e->outbytesleft);
+ }
+
+ if ((int)*e->outbytesleft > 0) {
+ *(*e->outbuf)++ = '?';
+ --*e->outbytesleft;
+
+ ret = 1;
+ } else {
+ ret = 0;
+ }
+ } else {
+ ret = 1;
+ }
+ }
+
+ return (!ret);
+}
+
+void parse_parameters(struct encoding_context *e, const char *params,
+ bool destination)
+{
+ char *slash = NULL, *prev = NULL;
+ int len;
+
+ len = strlen(params);
+
+ while (slash - params < len &&
+ (slash = strchr(params, '/')) != NULL) {
+ parse_parameter(e, prev == NULL ? params : prev,
+ slash - (prev == NULL ? params : prev),
+ destination);
+
+ prev = slash + 2;
+ slash += 2;
+ }
+
+ if (slash == NULL)
+ parse_parameter(e, prev == NULL ? params : prev,
+ (params + len) -
+ (prev == NULL ? params : prev),
+ destination);
+}
+
+void parse_parameter(struct encoding_context *e, const char *param,
+ int length, bool destination)
+{
+ if (length == 8 && strncasecmp(param, "TRANSLIT", 8) == 0) {
+ if (destination)
+ e->transliterate = 1;
+ }
+}
+
diff --git a/src/internal.h b/src/internal.h
new file mode 100644
index 0000000..d19bd09
--- /dev/null
+++ b/src/internal.h
@@ -0,0 +1,58 @@
+#ifndef _ICONV_INTERNAL_H_
+#define _ICONV_INTERNAL_H_
+
+#ifndef unicode_encoding_h
+#include <unicode/encoding.h>
+#endif
+
+#ifndef DEBUG
+#define LOG(x)
+#else
+#define LOG(x) (printf(__FILE__ " %s %i: ", __func__, __LINE__), printf x, fputc('\n', stdout))
+#endif
+
+#define UNUSED(x) ((x) = (x))
+
+struct encoding_context {
+ Encoding *in;
+ Encoding *out;
+ unsigned short *intab, *outtab;
+ char **outbuf;
+ size_t *outbytesleft;
+ char transliterate;
+ struct encoding_context *prev, *next;
+};
+
+/* in eightbit.c */
+int iconv_eightbit_number_from_name(const char *name);
+unsigned iconv_eightbit_read(struct encoding_context *e,
+ int (*callback)(void *handle, UCS4 c), const char *s,
+ unsigned int n, void *handle);
+int iconv_eightbit_write(struct encoding_context *e, UCS4 c,
+ char **buf, int *bufsize);
+unsigned short *iconv_eightbit_new(int enc_num);
+void iconv_eightbit_delete(struct encoding_context *e);
+
+/* in alias.c */
+int iconv_encoding_number_from_name(const char *name);
+const char *iconv_encoding_name_from_number(int number);
+
+struct canon {
+ struct canon *next;
+ short mib_enum;
+ unsigned short name_len;
+ char name[1];
+};
+
+/* in aliases.c */
+int create_alias_data(const char *filename);
+void free_alias_data(void);
+struct canon *alias_canonicalise(const char *alias);
+short mibenum_from_name(const char *alias);
+const char *mibenum_to_name(short mibenum);
+
+/* in utils.c */
+int strcasecmp(const char *s1, const char *s2);
+int strncasecmp(const char *s1, const char *s2, size_t len);
+
+#endif
diff --git a/src/utils.c b/src/utils.c
new file mode 100644
index 0000000..5403816
--- /dev/null
+++ b/src/utils.c
@@ -0,0 +1,53 @@
+#include <ctype.h>
+
+#include "internal.h"
+
+/**
+ * Case insensitive string comparison
+ *
+ * \param s1 Pointer to string
+ * \param s2 Pointer to string
+ * \return 0 if strings match, <> 0 if no match
+ */
+int strcasecmp(const char *s1, const char *s2)
+{
+ int i;
+
+ if (!s1 || !s2)
+ return 1; /* this is arbitrary */
+
+ if (s1 == s2)
+ return 0;
+
+ while ((i = tolower(*s1)) && i == tolower(*s2))
+ s1++, s2++;
+
+ return ((unsigned char) tolower(*s1) - (unsigned char) tolower(*s2));
+}
+
+/**
+ * Length-limited case insensitive string comparison
+ *
+ * \param s1 Pointer to string
+ * \param s2 Pointer to string
+ * \param len Length to compare
+ * \return 0 if strings match, <> 0 if no match
+ */
+int strncasecmp(const char *s1, const char *s2, size_t len)
+{
+ int i;
+
+ if (!s1 || !s2)
+ return 1; /* this is arbitrary */
+
+ if (len == 0)
+ return 0;
+
+ if (s1 == s2)
+ return 0;
+
+ while (len-- && (i = tolower(*s1)) && i == tolower(*s2))
+ s1++, s2++;
+
+ return ((unsigned char) tolower(*s1) - (unsigned char) tolower(*s2));
+}
diff --git a/test/INDEX b/test/INDEX
new file mode 100644
index 0000000..522f1cd
--- /dev/null
+++ b/test/INDEX
@@ -0,0 +1,5 @@
+# Index for testcases
+#
+# Test Description DataDir
+
+# Regression tests
diff --git a/test/Makefile b/test/Makefile
new file mode 100644
index 0000000..c935707
--- /dev/null
+++ b/test/Makefile
@@ -0,0 +1,103 @@
+# Child makefile fragment
+#
+# Toolchain is provided by top-level makefile
+#
+# Variables provided by top-level makefile
+#
+# COMPONENT The name of the component
+# EXPORT The location of the export directory
+# TOP The location of the source tree root
+# RELEASEDIR The place to put release objects
+# DEBUGDIR The place to put debug objects
+#
+# do_include Canned command sequence to include a child makefile
+#
+# Variables provided by parent makefile:
+#
+# DIR The name of the directory we're in, relative to $(TOP)
+#
+# Variables we can manipulate:
+#
+# ITEMS_CLEAN The list of items to remove for "make clean"
+# ITEMS_DISTCLEAN The list of items to remove for "make distclean"
+# TARGET_TESTS The list of target names to run for "make test"
+#
+# SOURCES The list of sources to build for $(COMPONENT)
+#
+# Plus anything from the toolchain
+
+# Push parent directory onto the directory stack
+sp := $(sp).x
+dirstack_$(sp) := $(d)
+d := $(DIR)
+
+# Extend toolchain settings
+CFLAGS := $(CFLAGS) -I$(TOP)/src/ -I$(d)
+
+# Tests
+TESTS_$(d) :=
+TESTS_$(d) := $(TESTS_$(d))
+
+# Items for top-level makefile to use
+ITEMS_CLEAN := $(ITEMS_CLEAN) \
+ $(addprefix $(d), $(addsuffix $(EXEEXT), $(TESTS_$(d)))) \
+ $(addprefix $(d), $(addsuffix .gcda, $(TESTS_$(d)))) \
+ $(addprefix $(d), $(addsuffix .gcno, $(TESTS_$(d)))) \
+ $(addprefix $(d), $(addsuffix .d, $(TESTS_$(d))))
+ITEMS_DISTCLEAN := $(ITEMS_DISTCLEAN) $(d)log
+
+# Targets for top-level makefile to run
+TARGET_TESTS := $(TARGET_TESTS) test_$(d)
+
+# Now we get to hack around so that we know what directory we're in.
+# $(d) no longer exists when running the commands for a target, so we can't
+# simply use it verbatim. Assigning to a variable doesn't really help, as
+# there's no guarantee that someone else hasn't overridden that variable.
+# So, what we do is make the target depend on $(d), then pick it out of the
+# dependency list when running commands. This isn't pretty, but is effective.
+test_$(d): $(d) $(addprefix $(d), $(TESTS_$(d)))
+ @$(PERL) $(TOP)/$<testrunner.pl $(TOP)/$< $(EXEEXT)
+
+DEP_$(d) :=
+
+define dep_test
+DEP_$(d) += $(2)
+$(2): $(1)
+ @$$(RM) $$(RMFLAGS) $(2)
+ @$$(CC) $$(DEBUGCFLAGS) -MM -MT '$(2) $(3)' -MF $(2) $(1)
+
+endef
+
+# Build rules for each test binary -- they all depend on the debug library
+# Except when building on RISC OS, of course because then make utterly fails to
+# detect that the debug library exists.
+define compile_test
+ifeq ($(HOST),riscos)
+$(2): $(1)
+else
+$(2): $$(TOP)/$$(COMPONENT)-debug.a $(1)
+endif
+ @$$(ECHO) $$(ECHOFLAGS) "==> $(1)"
+ @$$(CC) -c -g $$(DEBUGCFLAGS) -o $$@.o $(1)
+ @$$(LD) -g -o $$@ $$@.o $$(LDFLAGS) -liconv-debug -lunicode
+ @$$(RM) $$(RMFLAGS) $$@.o
+
+endef
+
+$(eval $(foreach TEST,$(addprefix $(d), $(TESTS_$(d))), \
+ $(call dep_test,$(addsuffix .c, $(TEST)),$(addsuffix .d, $(TEST)),$(TEST))))
+
+ifneq ($(findstring clean,$(MAKECMDGOALS)),clean)
+-include $(sort $(DEP_$(d)))
+endif
+
+$(eval $(foreach TEST,$(addprefix $(d), $(TESTS_$(d))), \
+ $(call compile_test,$(addsuffix .c, $(TEST)),$(TEST))))
+
+# Now include any children we may have
+MAKE_INCLUDES := $(wildcard $(d)*/Makefile)
+$(eval $(foreach INC, $(MAKE_INCLUDES), $(call do_include,$(INC))))
+
+# Finally, pop off the directory stack
+d := $(dirstack_$(sp))
+sp := $(basename $(sp))
diff --git a/test/README b/test/README
new file mode 100644
index 0000000..6493de9
--- /dev/null
+++ b/test/README
@@ -0,0 +1,84 @@
+Iconv testcases
+===============
+
+Testcases for Iconv are self-contained binaries which test various parts
+of the charset library. These may make use of external data files to drive
+the testing.
+
+Testcase command lines
+----------------------
+
+Testcase command lines are in a unified format, thus:
+
+ <aliases_file> [ <data_file> ]
+
+The aliases file parameter will always be specified (as it is required for
+the library to work at all).
+
+The data file parameter is optional and may be provided on a test-by-test
+basis.
+
+Testcase output
+---------------
+
+Testcases may output anything at all to stdout. The final line of the
+output must begin with either PASS or FAIL (case sensitive), indicating
+the success status of the test.
+
+Test Index
+----------
+
+In the test sources directory, is a file, named INDEX, which provides an
+index of all available test binaries. Any new test applications should be
+added to this index as they are created.
+
+The test index file format is as follows:
+
+ file = *line
+
+ line = ( entry / comment / blank ) LF
+
+ entry = testname 1*HTAB description [ 1*HTAB datadir ]
+ comment = "#" *non-newline
+ blank = 0<OCTET>
+
+ testname = 1*non-reserved
+ description = 1*non-reserved
+ datadir = 1*non-reserved
+
+ non-newline = VCHAR / WSP
+ non-reserved = VCHAR / SP
+
+Each entry contains a mandatory binary name and description followed by
+an optional data directory specifier. The data directory specifier is
+used to state the name of the directory containing data files for the
+test name. This directory will be searched for within the "data"
+directory in the source tree.
+
+If a data directory is specified, the test binary will be invoked for
+each data file listed within the data directory INDEX, passing the
+filename as the second parameter (<data_file>, above).
+
+Data Index
+----------
+
+Each test data directory contains a file, named INDEX, which provides an
+index of all available test data files.
+
+The data index file format is as follows:
+
+ file = *line
+
+ line = ( entry / comment / blank ) LF
+
+ entry = dataname 1*HTAB description
+ comment = "#" *non-newline
+ blank = 0<OCTET>
+
+ dataname = 1*non-reserved
+ description = 1*non-reserved
+
+ non-newline = VCHAR / WSP
+ non-reserved = VCHAR / SP
+
+Each entry contains a mandatory data file name and description.
diff --git a/test/data/Aliases b/test/data/Aliases
new file mode 100644
index 0000000..db61ff1
--- /dev/null
+++ b/test/data/Aliases
@@ -0,0 +1,302 @@
+# > Unicode:Files.Aliases
+# Mapping of character set encoding names to their canonical form
+#
+# Lines starting with a '#' are comments, blank lines are ignored.
+#
+# Based on http://www.iana.org/assignments/character-sets and
+# http://www.iana.org/assignments/ianacharset-mib
+#
+# Canonical Form MIBenum Aliases...
+#
+US-ASCII 3 iso-ir-6 ANSI_X3.4-1986 ISO_646.irv:1991 ASCII ISO646-US ANSI_X3.4-1968 us IBM367 cp367 csASCII
+ISO-10646-UTF-1 27 csISO10646UTF1
+ISO_646.basic:1983 28 ref csISO646basic1983
+INVARIANT 29 csINVARIANT
+ISO_646.irv:1983 30 iso-ir-2 irv csISO2IntlRefVersion
+BS_4730 20 iso-ir-4 ISO646-GB gb uk csISO4UnitedKingdom
+NATS-SEFI 31 iso-ir-8-1 csNATSSEFI
+NATS-SEFI-ADD 32 iso-ir-8-2 csNATSSEFIADD
+NATS-DANO 33 iso-ir-9-1 csNATSDANO
+NATS-DANO-ADD 34 iso-ir-9-2 csNATSDANOADD
+SEN_850200_B 35 iso-ir-10 FI ISO646-FI ISO646-SE se csISO10Swedish
+SEN_850200_C 21 iso-ir-11 ISO646-SE2 se2 csISO11SwedishForNames
+KS_C_5601-1987 36 iso-ir-149 KS_C_5601-1989 KSC_5601 korean csKSC56011987
+ISO-2022-KR 37 csISO2022KR
+EUC-KR 38 csEUCKR EUCKR
+ISO-2022-JP 39 csISO2022JP
+ISO-2022-JP-2 40 csISO2022JP2
+ISO-2022-CN 104
+ISO-2022-CN-EXT 105
+JIS_C6220-1969-jp 41 JIS_C6220-1969 iso-ir-13 katakana x0201-7 csISO13JISC6220jp
+JIS_C6220-1969-ro 42 iso-ir-14 jp ISO646-JP csISO14JISC6220ro
+IT 22 iso-ir-15 ISO646-IT csISO15Italian
+PT 43 iso-ir-16 ISO646-PT csISO16Portuguese
+ES 23 iso-ir-17 ISO646-ES csISO17Spanish
+greek7-old 44 iso-ir-18 csISO18Greek7Old
+latin-greek 45 iso-ir-19 csISO19LatinGreek
+DIN_66003 24 iso-ir-21 de ISO646-DE csISO21German
+NF_Z_62-010_(1973) 46 iso-ir-25 ISO646-FR1 csISO25French
+Latin-greek-1 47 iso-ir-27 csISO27LatinGreek1
+ISO_5427 48 iso-ir-37 csISO5427Cyrillic
+JIS_C6226-1978 49 iso-ir-42 csISO42JISC62261978
+BS_viewdata 50 iso-ir-47 csISO47BSViewdata
+INIS 51 iso-ir-49 csISO49INIS
+INIS-8 52 iso-ir-50 csISO50INIS8
+INIS-cyrillic 53 iso-ir-51 csISO51INISCyrillic
+ISO_5427:1981 54 iso-ir-54 ISO5427Cyrillic1981
+ISO_5428:1980 55 iso-ir-55 csISO5428Greek
+GB_1988-80 56 iso-ir-57 cn ISO646-CN csISO57GB1988
+GB_2312-80 57 iso-ir-58 chinese csISO58GB231280
+NS_4551-1 25 iso-ir-60 ISO646-NO no csISO60DanishNorwegian csISO60Norwegian1
+NS_4551-2 58 ISO646-NO2 iso-ir-61 no2 csISO61Norwegian2
+NF_Z_62-010 26 iso-ir-69 ISO646-FR fr csISO69French
+videotex-suppl 59 iso-ir-70 csISO70VideotexSupp1
+PT2 60 iso-ir-84 ISO646-PT2 csISO84Portuguese2
+ES2 61 iso-ir-85 ISO646-ES2 csISO85Spanish2
+MSZ_7795.3 62 iso-ir-86 ISO646-HU hu csISO86Hungarian
+JIS_C6226-1983 63 iso-ir-87 x0208 JIS_X0208-1983 csISO87JISX0208
+greek7 64 iso-ir-88 csISO88Greek7
+ASMO_449 65 ISO_9036 arabic7 iso-ir-89 csISO89ASMO449
+iso-ir-90 66 csISO90
+JIS_C6229-1984-a 67 iso-ir-91 jp-ocr-a csISO91JISC62291984a
+JIS_C6229-1984-b 68 iso-ir-92 ISO646-JP-OCR-B jp-ocr-b csISO92JISC62991984b
+JIS_C6229-1984-b-add 69 iso-ir-93 jp-ocr-b-add csISO93JIS62291984badd
+JIS_C6229-1984-hand 70 iso-ir-94 jp-ocr-hand csISO94JIS62291984hand
+JIS_C6229-1984-hand-add 71 iso-ir-95 jp-ocr-hand-add csISO95JIS62291984handadd
+JIS_C6229-1984-kana 72 iso-ir-96 csISO96JISC62291984kana
+ISO_2033-1983 73 iso-ir-98 e13b csISO2033
+ANSI_X3.110-1983 74 iso-ir-99 CSA_T500-1983 NAPLPS csISO99NAPLPS
+ISO-8859-1 4 iso-ir-100 ISO_8859-1 ISO_8859-1:1987 latin1 l1 IBM819 CP819 csISOLatin1 8859_1 ISO8859-1
+ISO-8859-2 5 iso-ir-101 ISO_8859-2 ISO_8859-2:1987 latin2 l2 csISOLatin2 8859_2 ISO8859-2
+T.61-7bit 75 iso-ir-102 csISO102T617bit
+T.61-8bit 76 T.61 iso-ir-103 csISO103T618bit
+ISO-8859-3 6 iso-ir-109 ISO_8859-3 ISO_8859-3:1988 latin3 l3 csISOLatin3 8859_3 ISO8859-3
+ISO-8859-4 7 iso-ir-110 ISO_8859-4 ISO_8859-4:1988 latin4 l4 csISOLatin4 8859_4 ISO8859-4
+ECMA-cyrillic 77 iso-ir-111 KOI8-E csISO111ECMACyrillic
+CSA_Z243.4-1985-1 78 iso-ir-121 ISO646-CA csa7-1 ca csISO121Canadian1
+CSA_Z243.4-1985-2 79 iso-ir-122 ISO646-CA2 csa7-2 csISO122Canadian2
+CSA_Z243.4-1985-gr 80 iso-ir-123 csISO123CSAZ24341985gr
+ISO-8859-6 9 iso-ir-127 ISO_8859-6 ISO_8859-6:1987 ECMA-114 ASMO-708 arabic csISOLatinArabic
+ISO-8859-6-E 81 csISO88596E ISO_8859-6-E
+ISO-8859-6-I 82 csISO88596I ISO_8859-6-I
+ISO-8859-7 10 iso-ir-126 ISO_8859-7 ISO_8859-7:1987 ELOT_928 ECMA-118 greek greek8 csISOLatinGreek 8859_7 ISO8859-7
+T.101-G2 83 iso-ir-128 csISO128T101G2
+ISO-8859-8 11 iso-ir-138 ISO_8859-8 ISO_8859-8:1988 hebrew csISOLatinHebrew 8859_8 ISO8859-8
+ISO-8859-8-E 84 csISO88598E ISO_8859-8-E
+ISO-8859-8-I 85 csISO88598I ISO_8859-8-I
+CSN_369103 86 iso-ir-139 csISO139CSN369103
+JUS_I.B1.002 87 iso-ir-141 ISO646-YU js yu csISO141JUSIB1002
+ISO_6937-2-add 14 iso-ir-142 csISOTextComm
+IEC_P27-1 88 iso-ir-143 csISO143IECP271
+ISO-8859-5 8 iso-ir-144 ISO_8859-5 ISO_8859-5:1988 cyrillic csISOLatinCyrillic 8859_5 ISO8859-5
+JUS_I.B1.003-serb 89 iso-ir-146 serbian csISO146Serbian
+JUS_I.B1.003-mac 90 macedonian iso-ir-147 csISO147Macedonian
+ISO-8859-9 12 iso-ir-148 ISO_8859-9 ISO_8859-9:1989 latin5 l5 csISOLatin5 8859_9 ISO8859-9
+greek-ccitt 91 iso-ir-150 csISO150 csISO150GreekCCITT
+NC_NC00-10:81 92 cuba iso-ir-151 ISO646-CU csISO151Cuba
+ISO_6937-2-25 93 iso-ir-152 csISO6937Add
+GOST_19768-74 94 ST_SEV_358-88 iso-ir-153 csISO153GOST1976874
+ISO_8859-supp 95 iso-ir-154 latin1-2-5 csISO8859Supp
+ISO_10367-box 96 iso-ir-155 csISO10367Box
+ISO-8859-10 13 iso-ir-157 l6 ISO_8859-10:1992 csISOLatin6 latin6 8859_10 ISO8859-10
+latin-lap 97 lap iso-ir-158 csISO158Lap
+JIS_X0212-1990 98 x0212 iso-ir-159 csISO159JISX02121990
+DS_2089 99 DS2089 ISO646-DK dk csISO646Danish
+us-dk 100 csUSDK
+dk-us 101 csDKUS
+JIS_X0201 15 X0201 csHalfWidthKatakana
+KSC5636 102 ISO646-KR csKSC5636
+ISO-10646-UCS-2 1000 csUnicode UCS-2 UCS2
+ISO-10646-UCS-4 1001 csUCS4 UCS-4 UCS4
+DEC-MCS 2008 dec csDECMCS
+hp-roman8 2004 roman8 r8 csHPRoman8
+macintosh 2027 mac csMacintosh MACROMAN MAC-ROMAN X-MAC-ROMAN
+IBM037 2028 cp037 ebcdic-cp-us ebcdic-cp-ca ebcdic-cp-wt ebcdic-cp-nl csIBM037
+IBM038 2029 EBCDIC-INT cp038 csIBM038
+IBM273 2030 CP273 csIBM273
+IBM274 2031 EBCDIC-BE CP274 csIBM274
+IBM275 2032 EBCDIC-BR cp275 csIBM275
+IBM277 2033 EBCDIC-CP-DK EBCDIC-CP-NO csIBM277
+IBM278 2034 CP278 ebcdic-cp-fi ebcdic-cp-se csIBM278
+IBM280 2035 CP280 ebcdic-cp-it csIBM280
+IBM281 2036 EBCDIC-JP-E cp281 csIBM281
+IBM284 2037 CP284 ebcdic-cp-es csIBM284
+IBM285 2038 CP285 ebcdic-cp-gb csIBM285
+IBM290 2039 cp290 EBCDIC-JP-kana csIBM290
+IBM297 2040 cp297 ebcdic-cp-fr csIBM297
+IBM420 2041 cp420 ebcdic-cp-ar1 csIBM420
+IBM423 2042 cp423 ebcdic-cp-gr csIBM423
+IBM424 2043 cp424 ebcdic-cp-he csIBM424
+IBM437 2011 cp437 437 csPC8CodePage437
+IBM500 2044 CP500 ebcdic-cp-be ebcdic-cp-ch csIBM500
+IBM775 2087 cp775 csPC775Baltic
+IBM850 2009 cp850 850 csPC850Multilingual
+IBM851 2045 cp851 851 csIBM851
+IBM852 2010 cp852 852 csPCp852
+IBM855 2046 cp855 855 csIBM855
+IBM857 2047 cp857 857 csIBM857
+IBM860 2048 cp860 860 csIBM860
+IBM861 2049 cp861 861 cp-is csIBM861
+IBM862 2013 cp862 862 csPC862LatinHebrew
+IBM863 2050 cp863 863 csIBM863
+IBM864 2051 cp864 csIBM864
+IBM865 2052 cp865 865 csIBM865
+IBM866 2086 cp866 866 csIBM866
+IBM868 2053 CP868 cp-ar csIBM868
+IBM869 2054 cp869 869 cp-gr csIBM869
+IBM870 2055 CP870 ebcdic-cp-roece ebcdic-cp-yu csIBM870
+IBM871 2056 CP871 ebcdic-cp-is csIBM871
+IBM880 2057 cp880 EBCDIC-Cyrillic csIBM880
+IBM891 2058 cp891 csIBM891
+IBM903 2059 cp903 csIBM903
+IBM904 2060 cp904 904 csIBBM904
+IBM905 2061 CP905 ebcdic-cp-tr csIBM905
+IBM918 2062 CP918 ebcdic-cp-ar2 csIBM918
+IBM1026 2063 CP1026 csIBM1026
+EBCDIC-AT-DE 2064 csIBMEBCDICATDE
+EBCDIC-AT-DE-A 2065 csEBCDICATDEA
+EBCDIC-CA-FR 2066 csEBCDICCAFR
+EBCDIC-DK-NO 2067 csEBCDICDKNO
+EBCDIC-DK-NO-A 2068 csEBCDICDKNOA
+EBCDIC-FI-SE 2069 csEBCDICFISE
+EBCDIC-FI-SE-A 2070 csEBCDICFISEA
+EBCDIC-FR 2071 csEBCDICFR
+EBCDIC-IT 2072 csEBCDICIT
+EBCDIC-PT 2073 csEBCDICPT
+EBCDIC-ES 2074 csEBCDICES
+EBCDIC-ES-A 2075 csEBCDICESA
+EBCDIC-ES-S 2076 csEBCDICESS
+EBCDIC-UK 2077 csEBCDICUK
+EBCDIC-US 2078 csEBCDICUS
+UNKNOWN-8BIT 2079 csUnknown8BiT
+MNEMONIC 2080 csMnemonic
+MNEM 2081 csMnem
+VISCII 2082 csVISCII
+VIQR 2083 csVIQR
+KOI8-R 2084 csKOI8R
+KOI8-U 2088
+IBM00858 2089 CCSID00858 CP00858 PC-Multilingual-850+euro
+IBM00924 2090 CCSID00924 CP00924 ebcdic-Latin9--euro
+IBM01140 2091 CCSID01140 CP01140 ebcdic-us-37+euro
+IBM01141 2092 CCSID01141 CP01141 ebcdic-de-273+euro
+IBM01142 2093 CCSID01142 CP01142 ebcdic-dk-277+euro ebcdic-no-277+euro
+IBM01143 2094 CCSID01143 CP01143 ebcdic-fi-278+euro ebcdic-se-278+euro
+IBM01144 2095 CCSID01144 CP01144 ebcdic-it-280+euro
+IBM01145 2096 CCSID01145 CP01145 ebcdic-es-284+euro
+IBM01146 2097 CCSID01146 CP01146 ebcdic-gb-285+euro
+IBM01147 2098 CCSID01147 CP01147 ebcdic-fr-297+euro
+IBM01148 2099 CCSID01148 CP01148 ebcdic-international-500+euro
+IBM01149 2100 CCSID01149 CP01149 ebcdic-is-871+euro
+Big5-HKSCS 2101
+IBM1047 2102 IBM-1047
+PTCP154 2103 csPTCP154 PT154 CP154 Cyrillic-Asian
+Amiga-1251 2104 Ami1251 Amiga1251 Ami-1251
+KOI7-switched 2105
+UNICODE-1-1 1010 csUnicode11
+SCSU 1011
+UTF-7 1012
+UTF-16BE 1013
+UTF-16LE 1014
+UTF-16 1015
+CESU-8 1016 csCESU-8
+UTF-32 1017
+UTF-32BE 1018
+UTF-32LE 1019
+BOCU-1 1020 csBOCU-1
+UNICODE-1-1-UTF-7 103 csUnicode11UTF7
+UTF-8 106 UNICODE-1-1-UTF-8 UNICODE-2-0-UTF-8 utf8
+ISO-8859-13 109 8859_13 ISO8859-13
+ISO-8859-14 110 iso-ir-199 ISO_8859-14:1998 ISO_8859-14 latin8 iso-celtic l8 8859_14 ISO8859-14
+ISO-8859-15 111 ISO_8859-15 Latin-9 8859_15 ISO8859-15
+ISO-8859-16 112 iso-ir-226 ISO_8859-16:2001 ISO_8859-16 latin10 l10
+GBK 113 CP936 MS936 windows-936
+GB18030 114
+OSD_EBCDIC_DF04_15 115
+OSD_EBCDIC_DF03_IRV 116
+OSD_EBCDIC_DF04_1 117
+JIS_Encoding 16 csJISEncoding
+Shift_JIS 17 MS_Kanji csShiftJIS X-SJIS Shift-JIS
+EUC-JP 18 csEUCPkdFmtJapanese Extended_UNIX_Code_Packed_Format_for_Japanese EUCJP
+Extended_UNIX_Code_Fixed_Width_for_Japanese 19 csEUCFixWidJapanese
+ISO-10646-UCS-Basic 1002 csUnicodeASCII
+ISO-10646-Unicode-Latin1 1003 csUnicodeLatin1 ISO-10646
+ISO-Unicode-IBM-1261 1005 csUnicodeIBM1261
+ISO-Unicode-IBM-1268 1006 csUnicodeIBM1268
+ISO-Unicode-IBM-1276 1007 csUnicodeIBM1276
+ISO-Unicode-IBM-1264 1008 csUnicodeIBM1264
+ISO-Unicode-IBM-1265 1009 csUnicodeIBM1265
+ISO-8859-1-Windows-3.0-Latin-1 2000 csWindows30Latin1
+ISO-8859-1-Windows-3.1-Latin-1 2001 csWindows31Latin1
+ISO-8859-2-Windows-Latin-2 2002 csWindows31Latin2
+ISO-8859-9-Windows-Latin-5 2003 csWindows31Latin5
+Adobe-Standard-Encoding 2005 csAdobeStandardEncoding
+Ventura-US 2006 csVenturaUS
+Ventura-International 2007 csVenturaInternational
+PC8-Danish-Norwegian 2012 csPC8DanishNorwegian
+PC8-Turkish 2014 csPC8Turkish
+IBM-Symbols 2015 csIBMSymbols
+IBM-Thai 2016 csIBMThai
+HP-Legal 2017 csHPLegal
+HP-Pi-font 2018 csHPPiFont
+HP-Math8 2019 csHPMath8
+Adobe-Symbol-Encoding 2020 csHPPSMath
+HP-DeskTop 2021 csHPDesktop
+Ventura-Math 2022 csVenturaMath
+Microsoft-Publishing 2023 csMicrosoftPublishing
+Windows-31J 2024 csWindows31J
+GB2312 2025 csGB2312 EUC-CN EUCCN CN-GB
+Big5 2026 csBig5 BIG-FIVE BIG-5 CN-BIG5 BIG_FIVE
+windows-1250 2250 CP1250 MS-EE
+windows-1251 2251 CP1251 MS-CYRL
+windows-1252 2252 CP1252 MS-ANSI
+windows-1253 2253 CP1253 MS-GREEK
+windows-1254 2254 CP1254 MS-TURK
+windows-1255 2255
+windows-1256 2256 CP1256 MS-ARAB
+windows-1257 2257 CP1257 WINBALTRIM
+windows-1258 2258
+TIS-620 2259
+HZ-GB-2312 2085
+
+# Additional encodings not defined by IANA
+
+# Arbitrary allocations
+#CP737 3001
+#CP853 3002
+#CP856 3003
+CP874 3004 WINDOWS-874
+#CP922 3005
+#CP1046 3006
+#CP1124 3007
+#CP1125 3008 WINDOWS-1125
+#CP1129 3009
+#CP1133 3010 IBM-CP1133
+#CP1161 3011 IBM-1161 IBM1161 CSIBM1161
+#CP1162 3012 IBM-1162 IBM1162 CSIBM1162
+#CP1163 3013 IBM-1163 IBM1163 CSIBM1163
+#GEORGIAN-ACADEMY 3014
+#GEORGIAN-PS 3015
+#KOI8-RU 3016
+#KOI8-T 3017
+#MACARABIC 3018 X-MAC-ARABIC MAC-ARABIC
+#MACCROATIAN 3019 X-MAC-CROATIAN MAC-CROATIAN
+#MACGREEK 3020 X-MAC-GREEK MAC-GREEK
+#MACHEBREW 3021 X-MAC-HEBREW MAC-HEBREW
+#MACICELAND 3022 X-MAC-ICELAND MAC-ICELAND
+#MACROMANIA 3023 X-MAC-ROMANIA MAC-ROMANIA
+#MACTHAI 3024 X-MAC-THAI MAC-THAI
+#MACTURKISH 3025 X-MAC-TURKISH MAC-TURKISH
+#MULELAO-1 3026
+
+# From Unicode Lib
+ISO-IR-182 4000
+ISO-IR-197 4002
+ISO-2022-JP-1 4008
+MACCYRILLIC 4009 X-MAC-CYRILLIC MAC-CYRILLIC
+MACUKRAINE 4010 X-MAC-UKRAINIAN MAC-UKRAINIAN
+MACCENTRALEUROPE 4011 X-MAC-CENTRALEURROMAN MAC-CENTRALEURROMAN
+JOHAB 4012
+ISO-8859-11 4014 iso-ir-166 ISO_8859-11 ISO8859-11 8859_11
+X-CURRENT 4999 X-SYSTEM
+X-ACORN-LATIN1 5001
+X-ACORN-FUZZY 5002
diff --git a/test/testrunner.pl b/test/testrunner.pl
new file mode 100644
index 0000000..1c6c66d
--- /dev/null
+++ b/test/testrunner.pl
@@ -0,0 +1,167 @@
+#!/bin/perl
+#
+# Testcase runner
+#
+# Usage: testrunner <directory> [<executable extension>]
+#
+# Operates upon INDEX files described in the README.
+# Locates and executes testcases, feeding data files to programs
+# as appropriate.
+# Logs testcase output to file.
+# Aborts test sequence on detection of error.
+#
+
+use warnings;
+use strict;
+use File::Spec;
+use IPC::Open3;
+
+if (@ARGV < 1) {
+ print "Usage: testrunner.pl <directory> [<exeext>]\n";
+ exit;
+}
+
+# Get directory
+my $directory = shift @ARGV;
+
+# Get EXE extension (if any)
+my $exeext = "";
+$exeext = shift @ARGV if (@ARGV > 0);
+
+# Open log file and /dev/null
+open(LOG, ">$directory/log") or die "Failed opening test log";
+open(NULL, "+<", File::Spec->devnull) or die "Failed opening /dev/null";
+
+# Open testcase index
+open(TINDEX, "<$directory/INDEX") or die "Failed opening test INDEX";
+
+# Parse testcase index, looking for testcases
+while (my $line = <TINDEX>) {
+ next if ($line =~ /^(#.*)?$/);
+
+ # Found one; decompose
+ (my $test, my $desc, my $data) = split /\t+/, $line;
+
+ # Strip whitespace
+ $test =~ s/^\s+|\s+$//g;
+ $desc =~ s/^\s+|\s+$//g;
+ $data =~ s/^\s+|\s+$//g if ($data);
+
+ # Append EXE extension to binary name
+ $test = $test . $exeext;
+
+ print "Test: $desc\n";
+
+ my $pid;
+
+ if ($data) {
+ # Testcase has external data files
+
+ # Open datafile index
+ open(DINDEX, "<$directory/data/$data/INDEX") or
+ die "Failed opening $directory/data/$data/INDEX";
+
+ # Parse datafile index, looking for datafiles
+ while (my $dentry = <DINDEX>) {
+ next if ($dentry =~ /^(#.*)?$/);
+
+ # Found one; decompose
+ (my $dtest, my $ddesc) = split /\t+/, $dentry;
+
+ # Strip whitespace
+ $dtest =~ s/^\s+|\s+$//g;
+ $ddesc =~ s/^\s+|\s+$//g;
+
+ print LOG "Running $directory/$test " .
+ "$directory/data/Aliases " .
+ "$directory/data/$data/$dtest\n";
+
+ # Make message fit on an 80 column terminal
+ my $msg = " ==> $test [$data/$dtest]";
+ $msg = $msg . "." x (80 - length($msg) - 8);
+
+ print $msg;
+
+ # Run testcase
+ $pid = open3("&<NULL", \*OUT, \*ERR,
+ "$directory/$test",
+ "$directory/data/Aliases",
+ "$directory/data/$data/$dtest");
+
+ my $last = "FAIL";
+
+ # Marshal testcase output to log file
+ while (my $output = <OUT>) {
+ print LOG " $output";
+ $last = $output;
+ }
+
+ # Wait for child to finish
+ waitpid($pid, 0);
+
+ print substr($last, 0, 4) . "\n";
+
+ # Bail, noisily, on failure
+ if (substr($last, 0, 4) eq "FAIL") {
+ # Write any stderr output to the log
+ while (my $errors = <ERR>) {
+ print LOG " $errors";
+ }
+
+ print "\n\nFailure detected: " .
+ "consult log file\n\n\n";
+
+ exit(1);
+ }
+ }
+
+ close(DINDEX);
+ } else {
+ # Testcase has no external data files
+ print LOG "Running $directory/$test $directory/data/Aliases\n";
+
+ # Make message fit on an 80 column terminal
+ my $msg = " ==> $test";
+ $msg = $msg . "." x (80 - length($msg) - 8);
+
+ print $msg;
+
+ # Run testcase
+ $pid = open3("&<NULL", \*OUT, \*ERR,
+ "$directory/$test", "$directory/data/Aliases");
+
+ my $last = "FAIL";
+
+ # Marshal testcase output to log file
+ while (my $output = <OUT>) {
+ print LOG " $output";
+ $last = $output;
+ }
+
+ # Wait for child to finish
+ waitpid($pid, 0);
+
+ print substr($last, 0, 4) . "\n";
+
+ # Bail, noisily, on failure
+ if (substr($last, 0, 4) eq "FAIL") {
+ # Write any stderr output to the log
+ while (my $errors = <ERR>) {
+ print LOG " $errors";
+ }
+
+ print "\n\nFailure detected: " .
+ "consult log file\n\n\n";
+
+ exit(1);
+ }
+ }
+
+ print "\n";
+}
+
+# Clean up
+close(TINDEX);
+
+close(NULL);
+close(LOG);
diff --git a/test/testutils.h b/test/testutils.h
new file mode 100644
index 0000000..77cbd6a
--- /dev/null
+++ b/test/testutils.h
@@ -0,0 +1,123 @@
+#ifndef test_testutils_h_
+#define test_testutils_h_
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef UNUSED
+#define UNUSED(x) ((x) = (x))
+#endif
+
+/* Redefine assert, so we can simply use the standard assert mechanism
+ * within testcases and exit with the right output for the testrunner
+ * to do the right thing. */
+void __assert2(const char *expr, const char *function,
+ const char *file, int line);
+
+void __assert2(const char *expr, const char *function,
+ const char *file, int line)
+{
+ UNUSED(function);
+ UNUSED(file);
+
+ printf("FAIL - %s at line %d\n", expr, line);
+
+ exit(EXIT_FAILURE);
+}
+
+#define assert(expr) \
+ ((void) ((expr) || (__assert2 (#expr, __func__, __FILE__, __LINE__), 0)))
+
+
+typedef bool (*line_func)(const char *data, size_t datalen, void *pw);
+
+static size_t parse_strlen(const char *str, size_t limit);
+bool parse_testfile(const char *filename, line_func callback, void *pw);
+size_t parse_filesize(const char *filename);
+
+/**
+ * Testcase datafile parser driver
+ *
+ * \param filename Name of file to parse
+ * \param callback Pointer to function to handle each line of input data
+ * \param pw Pointer to client-specific private data
+ * \return true on success, false otherwise.
+ */
+bool parse_testfile(const char *filename, line_func callback, void *pw)
+{
+ FILE *fp;
+ char buf[300];
+
+ fp = fopen(filename, "rb");
+ if (fp == NULL) {
+ printf("Failed opening %s\n", filename);
+ return false;
+ }
+
+ while (fgets(buf, sizeof buf, fp)) {
+ if (buf[0] == '\n')
+ continue;
+
+ if (!callback(buf, parse_strlen(buf, sizeof buf - 1), pw)) {
+ fclose(fp);
+ return false;
+ }
+ }
+
+ fclose(fp);
+
+ return true;
+}
+
+/**
+ * Utility string length measurer; assumes strings are '\n' terminated
+ *
+ * \param str String to measure length of
+ * \param limit Upper bound on string length
+ * \return String length
+ */
+size_t parse_strlen(const char *str, size_t limit)
+{
+ size_t len = 0;
+
+ if (str == NULL)
+ return 0;
+
+ while (len < limit - 1 && *str != '\n') {
+ len++;
+ str++;
+ }
+
+ len++;
+
+ return len;
+}
+
+/**
+ * Read the size of a file
+ *
+ * \param filename Name of file to read size of
+ * \return File size (in bytes), or 0 on error
+ */
+size_t parse_filesize(const char *filename)
+{
+ FILE *fp;
+ size_t len = 0;
+
+ fp = fopen(filename, "rb");
+ if (fp == NULL) {
+ printf("Failed opening %s\n", filename);
+ return 0;
+ }
+
+ fseek(fp, 0, SEEK_END);
+ len = ftell(fp);
+
+ fclose(fp);
+
+ return len;
+}
+
+
+#endif
diff --git a/unicode/ReadMe b/unicode/ReadMe
new file mode 100644
index 0000000..527bdd8
--- /dev/null
+++ b/unicode/ReadMe
@@ -0,0 +1,23 @@
+Unicode library
+---------------
+
+The contents of this directory must be obtained from RISC OS Open.
+
+The include directory should contain the exported headers:
+
+ + autojp.h
+ + charsets.h
+ + combine.h
+ + encoding.h
+ + iso10646.h
+ + iso3166.h
+ + languages.h
+ + unictype.h
+ + utf8.h
+ + utf16.h
+
+The lib directory should contain the compiled library:
+
+ + libunicode.a (renamed from ucodelib.a)
+ + libunicode-m.a (renamed from ucodelibm.a)
+