#!/usr/bin/perl # Complete check of a stateless encoding. # Usage: check-stateless BUILDDIR SRCDIR CHARSET use warnings; use strict; if (@ARGV < 3) { print "Usage: check-stateless \n"; exit; } my $builddir = shift @ARGV; my $srcdir = shift @ARGV; my $charset = shift @ARGV; my $prefix = $builddir . "/test_GNU_"; # charset, modified for use in filenames. my $charsetf = $charset; $charsetf =~ s/:/-/g; # iconv in one direction. command("${prefix}table-from $charset > $builddir/tmp-$charsetf.TXT"); # iconv in the other direction. command("${prefix}table-to $charset | sort > $builddir/tmp-$charsetf.INVERSE.TXT"); # Check 1: charmap and iconv forward should be identical. command("cmp $srcdir/$charsetf.TXT $builddir/tmp-$charsetf.TXT 2> /dev/null"); # Check 2: the difference between the charmap and iconv backward. command("sed -e '/ .* 0x/d' < $srcdir/$charsetf.TXT > $builddir/tmp-noprecomposed-$charsetf.TXT"); if (-f "$srcdir/$charsetf.IRREVERSIBLE.TXT") { command("cat $builddir/tmp-noprecomposed-$charsetf.TXT $srcdir/$charsetf.IRREVERSIBLE.TXT | sort | uniq -u > $builddir/tmp-orig-$charsetf.INVERSE.TXT"); } else { command("cp $builddir/tmp-noprecomposed-$charsetf.TXT $builddir/tmp-orig-$charsetf.INVERSE.TXT"); } command("cmp $builddir/tmp-orig-$charsetf.INVERSE.TXT $builddir/tmp-$charsetf.INVERSE.TXT 2> /dev/null"); command("rm -f $builddir/tmp-$charsetf.TXT $builddir/tmp-$charsetf.INVERSE.TXT $builddir/tmp-noprecomposed-$charsetf.TXT $builddir/tmp-orig-$charsetf.INVERSE.TXT"); sub command { my $cmd = shift; print "> $cmd\n"; my @output = `$cmd 2>&1`; foreach my $line (@output) { print "| $line"; } my $status = $? / 256; die "$cmd:\nexit status $status\n" if $status; return @output; } # For a new encoding: # You can create the "$charsetf".TXT like this: # ./table-from "$charset" > "$charsetf".TXT # You can create the "$charsetf".IRREVERSIBLE.TXT like this: # ./table-to "$charset" | sort > "$charsetf".INVERSE.TXT # diff "$charsetf".TXT "$charsetf".INVERSE.TXT | grep '^[<>]' | sed -e 's,^. ,,' > "$charsetf".IRREVERSIBLE.TXT