#!/usr/bin/perl # Complete check of a stateless encoding. # Usage: check-stateless TOP SRCDIR CHARSET use warnings; use strict; if (@ARGV < 3) { print "Usage: check-stateless $srcdir/tmp-$charsetf.TXT"); # iconv in the other direction. command("$srcdir/table-to $charset | sort > $srcdir/tmp-$charsetf.INVERSE.TXT"); # Check 1: charmap and iconv forward should be identical. command("cmp $srcdir/$charsetf.TXT $srcdir/tmp-$charsetf.TXT 2> /dev/null"); # Check 2: the difference between the charmap and iconv backward. command("sed -e '/ .* 0x/d' < $srcdir/$charsetf.TXT > $srcdir/tmp-noprecomposed-$charsetf.TXT"); if (-f "$srcdir/$charsetf.IRREVERSIBLE.TXT") { command("cat $srcdir/tmp-noprecomposed-$charsetf.TXT $srcdir/$charsetf.IRREVERSIBLE.TXT | sort | uniq -u > $srcdir/tmp-orig-$charsetf.INVERSE.TXT"); } else { command("cp $srcdir/tmp-noprecomposed-$charsetf.TXT $srcdir/tmp-orig-$charsetf.INVERSE.TXT"); } command("cmp $srcdir/tmp-orig-$charsetf.INVERSE.TXT $srcdir/tmp-$charsetf.INVERSE.TXT 2> /dev/null"); command("rm -f $srcdir/tmp-$charsetf.TXT $srcdir/tmp-$charsetf.INVERSE.TXT $srcdir/tmp-noprecomposed-$charsetf.TXT $srcdir/tmp-orig-$charsetf.INVERSE.TXT"); sub command { my $cmd = shift; print "> $cmd\n"; my @output = `$cmd 2>&1`; foreach my $line (@output) { print "| $line"; } my $status = $? / 256; die "$cmd:\nexit status $status\n" if $status; return @output; } # For a new encoding: # You can create the "$charsetf".TXT like this: # ./table-from "$charset" > "$charsetf".TXT # You can create the "$charsetf".IRREVERSIBLE.TXT like this: # ./table-to "$charset" | sort > "$charsetf".INVERSE.TXT # diff "$charsetf".TXT "$charsetf".INVERSE.TXT | grep '^[<>]' | sed -e 's,^. ,,' > "$charsetf".IRREVERSIBLE.TXT