#!/usr/bin/perl # # Copyright 2013 Vivek Dasmohapatra # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # * The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. =head1 Filter the NetSurf combined messages (i10n) file according to language and platform and generate output in a selection of formats for use both internally within netsurf and externally for translation services. =cut use strict; use Getopt::Long (); use Fcntl qw( O_CREAT O_EXCL O_WRONLY O_APPEND O_RDONLY O_WRONLY ); use IO::Compress::Gzip; use constant GETOPT_OPTS => qw( auto_abbrev no_getopt_compat bundling ); use constant GETOPT_SPEC => qw( output|o=s input|i=s lang|l=s dlang|d=s plat|platform|p=s format|fmt|f=s warning|W=s gzip|z help|h|? ); # default option values: my %opt = qw( dlang en plat any format messages warning none ); sub input_stream (); sub output_stream (); sub formatter (); sub static_section($); sub usage (); sub main () { my $input; my $output; my $format; my $header; my $footer; my $opt_ok; # option parsing: Getopt::Long::Configure( GETOPT_OPTS ); $opt_ok = Getopt::Long::GetOptions( \%opt, GETOPT_SPEC ); # allow input and output to be specified as non-option arguments: if( @ARGV ) { $opt{input } ||= shift( @ARGV ) } if( @ARGV ) { $opt{output} ||= shift( @ARGV ) } # open the appropriate streams and get the formatter and headers: if( $opt_ok ) { $input = input_stream(); $output = output_stream(); $format = formatter(); $header = static_section('header'); $footer = static_section('footer'); } # double check the options are sane (and we weren't asked for the help) if( !$opt_ok || $opt{help} || $opt{lang} !~ /^[a-z]{2}$/ || $opt{dlang} !~ /^[a-z]{2}$/ ) { usage(); } # we are good to go: print( $output $header ); my $cur_key; my $dlang_key; my $dlang_val; my $tran_out = 1; my $tran_val; my $tran_key; while (<$input>) { # skip comment and empty lines /^#/ && next; /^\s*$/ && next; # only parsing things that look like message lines: if( /^([a-z]{2}).([^.]+).([^:]+):(.*)/ ) { my( $lang, $plat, $key, $val ) = ( $1, $2, $3, $4 ); # skip the line if it is not for our target platform if( $opt{plat} ne 'any' && $opt{plat} ne $plat && 'all' ne $plat ) { next; } # On key change ensure a translation has been generated if ($cur_key ne $key) { if ($tran_out == 0) { # No translaton for previous key if ($cur_key eq $dlang_key) { print( $output $format->( $dlang_key, $dlang_val ) ); if( $opt{warning} eq "fb" ) { warn( "warning: $dlang_key missing translation in $opt{lang} using $opt{dlang} instead" ); } } else { # No translation and nothing in default language warn( "warning: $dlang_key missing translation in $opt{lang} and no fallback in $opt{dlang}" ); } } else { if (($opt{dlang} ne $opt{lang} ) && ($tran_key eq $dlang_key) && ($tran_val eq $dlang_val)) { if( $opt{warning} eq "dup" ) { warn( "warning: $tran_key value in $opt{lang} is same as in default $opt{dlang}" ); } } } $cur_key = $key; $tran_out = 0; } # capture the key/value in the default language if( $lang eq $opt{dlang} ) { $dlang_key = $key; $dlang_val = $val; } # output if its the target language if( $lang eq $opt{lang} ) { print( $output $format->( $key, $val ) ); $tran_out = 1; $tran_val = $val; $tran_key = $key; } } else { warn( "Malformed entry: $_" ); } } print( $output $footer ); } main(); sub usage () { my @fmt = map { s/::$//; $_ } keys(%{$::{'msgfmt::'}}); print(STDERR < 1, -Level => 9 ); } return $ofh; } sub formatter () { my $name = $opt{format}; my $func = "msgfmt::$name"->UNIVERSAL::can("format"); return $func || die( "No handler found for format '$name'\n" ); } sub static_section ($) { my $name = $opt{format}; my $sect = shift(); my $func = "msgfmt::$name"->UNIVERSAL::can( $sect ); return $func ? $func->() : ""; } # format implementations: { package msgfmt::java; sub escape { $_[0] =~ s/([:'\\])/\\$1/g; $_[0] } sub format { return join(' = ', $_[0], escape( $_[1] ) ) . "\n" } sub header { "# autogenerated from " . ($opt{input} || '-stdin-') . "\n" } } { package msgfmt::messages; # native netsurf format sub format { return join( ":", @_ ) . "\n" } sub header { my $in = $opt{input} || '-stdin-'; return <&"' ); qq| $escaped\n|; } }