#!/usr/pkg/bin/perl
#
# make_encmap
#
# Copyright (C) 1998 Clark Cooper.  All rights reserved.
# Copyright (C) 2008-2009, 2014 Steve Hay.  All rights reserved.
#
# This script is free software; you can redistribute it and/or modify it under
# the same terms as Perl itself, i.e. under the terms of either the GNU General
# Public License or the Artistic License, as specified in the LICENCE file.
#

use 5.008001;

use strict;
use warnings;

my $name = shift;
my $file = shift;

my $except_str = '$@\^`{}~';
my %Exceptions;

foreach (unpack('c*', $except_str)) {
  $Exceptions{$_} = 1;
}

if (not defined($name) or not defined($file)) {
  $! = 2;
  die "Usage is:\n\tmake_encmap name file\n";
}

my $map;
open($map, '<', $file) or do {
  $! = 3;
  die "Couldn't open $file";
};

my @byte1;
my $minpos = 256;

while (<$map>) {
  next if /^\#/;

  next unless /0x([\da-f]{2,4})\s+0x([\da-f]{4})\s*\#\s*(.*)\s*$/i;

  my ($from, $to, $name) = ($1, $2, $3);

  my $flen = length($from);
  if ($flen == 3) {
    $! = 4;
    die "Bad line at $., from must be either 2 or 4 digits:\n$_";
  }

  my $toval = hex($to);
  my $f1 = substr($from, 0, 2);
  my $f1val = hex($f1);

  if ($flen == 2) {
    if ($f1val < 128) {
        next if $f1val == $toval;

        warn "The byte '0x$f1' mapped to 0x$to\n"
          unless defined($Exceptions{$f1val});
      }

    if (defined($byte1[$f1val])) {
      if ($byte1[$f1val] != $toval) {
        $! = 5;
        die "Multiple mappings for 0x$f1val: $to & " . sprintf("0x%x",
                                                             $byte1[$f1val]);
      }
    }
    else {
      $byte1[$f1val] = $toval;
      $minpos = $f1val
        if $f1val < $minpos;
    }
  }
  else {
    my $b1 = $byte1[$f1val];

    if (defined($b1)) {
      if (not ref($b1)) {
        $! = 6;
        die "The 1st byte of '$from' overlaps a single byte definition.";
      }
    }
    else {
      $b1 = $byte1[$f1val] = [];
      $minpos = $f1val
        if $f1val < $minpos;
    }

    my $f2 = substr($from, 2, 2);
    my $f2val = hex($f2);

    $b1->[$f2val] = $toval;
  }
}

close($map);

if (not $minpos < 256) {
  $! = 7;
  die "Minpos never set";
}

print "<encmap name='$name'>\n";

process_byte(2, $minpos, \@byte1);

print "</encmap>\n";

exit 0;

####
## End main
####

sub emit {
  my ($pre, $start, $lim, $val) = @_;

  my $len = $lim - $start;

  if ($len == 1) {
    printf("$pre<ch byte='x%02x' uni='x%04x'/>\n", $start, $val);
  }
  else {
    printf("$pre<range byte='x%02x' len='%d' uni='x%04x'/>\n",
           $start, $len, $val);
  }
}  # End emit

sub process_byte {
  my ($lead, $minpos, $aref) = @_;

  my $rngstrt;
  my $rngval;
  my $i;

  my $prefix = ' ' x $lead;

  for ($i = $minpos; $i <= $#{$aref}; $i++) {
    my $v = $ {$aref}[$i];
  
    if (defined($v)) {
      if (ref($v)) {
        emit($prefix, $rngstrt, $i, $rngval)
          if defined($rngstrt);

        $rngstrt = undef;
        printf "$prefix<prefix byte='x%02x'>\n", $i;
        process_byte($lead + 2, 0, $v);
        print "$prefix</prefix>\n";
      }
      else {
        next if (defined($rngstrt) and ($v - $rngval == $i - $rngstrt));

        emit($prefix, $rngstrt, $i, $rngval)
          if defined($rngstrt);

        $rngstrt = $i;
        $rngval = $v;
      }
    }
    else {
      emit($prefix, $rngstrt, $i, $rngval)
        if defined($rngstrt);

      $rngstrt = undef;
    }
  }

  emit($prefix, $rngstrt, $i, $rngval)
    if defined($rngstrt);

}  # End process_byte

__END__

=head1 NAME

make_encmap - create an XML representation from an Unicode mapping file

=head1 SYNOPSIS

    make_encmap <name> <file>

=head1 DESCRIPTION

B<make_encmap> creates a XML encmap file with a given name from an Unicode
mapping file, received e.g. from L<ftp://ftp.unicode.org>.  The result by
default is output to F<stdout>.

=head1 ARGUMENTS

=over 4

=item E<lt>nameE<gt>

The name to set in the XML encmap file created.

=item E<lt>fileE<gt>

The Unicode mapping file to create the XML encmap file from.

=back

=head1 OPTIONS

I<None>.

=head1 EXAMPLES

The following example shows the usage of B<make_encmap> for the ISO/IEC 8859-15
table.

    wget ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-15.TXT
    make_encmap 8859-15 8859-15.TXT E<gt> 8859-15.encmap

=head1 EXIT STATUS

    0   The script exited normally.

    2   Invalid command-line arguments.

    >2  An error occurred.

=head1 KNOWN BUGS

I<None>.

=head1 SEE ALSO

L<compile_encoding(1)>,
L<XML::Encoding(3pm)>.

=head1 AUTHOR

Clark Cooper E<lt>L<coopercc@netheaven.com|mailto:coopercc@netheaven.com>E<gt>.

Steve Hay E<lt>L<shay@cpan.org|mailto:shay@cpan.org>E<gt> is now maintaining
XML::Encoding as of version 2.00.

This manual page was written by Daniel Leidert
E<lt>L<daniel.leidert@wgdd.de|mailto:daniel.leidert@wgdd.de>E<gt> for the Debian
project (but may be used by others).

=head1 COPYRIGHT

Copyright (C) 1998 Clark Cooper.  All rights reserved.
Copyright (C) 2008-2009, 2014 Steve Hay.  All rights reserved.

=head1 LICENCE

This script is free software; you can redistribute it and/or modify it under the
same terms as Perl itself, i.e. under the terms of either the GNU General Public
License or the Artistic License, as specified in the F<LICENCE> file.

=head1 VERSION

Version 2.11

=head1 DATE

08 Dec 2020

=head1 HISTORY

See the F<Changes> file.

=cut
