#!/usr/pkg/bin/perl -w

=pod

=head1 NAME

tv_merge - Merge (combine) two XMLTV files.

=head1 SYNOPSIS

tv_merge -i FILE -m FILE -o FILE

=head1 DESCRIPTION

Read XMLTV listings from two files and merge them together.
Unlike tv_cat (which just joins files) this will update (add/replace/delete)
the origin XMLTV file with channels and programmes contained in the second
file.

It works with multiple channels, and will insert any new programmes
and delete any overlapping programmes.

B<IMPORTANT>
The input files must be pre-sorted into datetime within channel order
by using the "--by-channel" option to tv_sort
  e.g. tv_sort --by-channel --output FILE  FILE

All programmes must have start and stop times.

(Note: Programmes in the merged-in file I<replace> any in the master file,
i.e. data are not updated I<within> programmes)

This program uses XML::TreePP which doesn't write <DOCTYPE> definitions in the
output file. If you need to add a suitable <DOCTYPE> tag then use the -t parameter.
  e.g. tv_merge -i FILE -m FILE -o FILE -t

=head1 EXAMPLE

Use C<tv_merge -i master.xml -m newadditions.xml -o newmaster.xml> to
merge all channels/programmes in F<newadditions.xml>.

=head1 SEE ALSO

L<xmltv(5)>

=head1 AUTHOR

Copyright Geoff Westcott, February 2013.

This code is distributed under the GNU General Public License v2 (GPLv2) .

=cut


#
#  Inspired by xmltvmerger.py ( http://niels.dybdahl.dk/xmltvdk/index.php/Xmltvmerger.py ) with bug fixes and enhancements.
#
# IMPORTANT: the input files must be pre-sorted into datetime within channel order by using the "--by-channel" option to tv_sort
#  e.g. tv_sort --by-channel --output FILE  FILE
#
# Help:  tv_merge -h
#

my $_version 	= '$Id: tv_merge,v 1.2 2015/07/12 00:46:37 knowledgejunkie Exp $';

use strict;
use XML::TreePP;		# http://search.cpan.org/~kawasaki/XML-TreePP-0.41/lib/XML/TreePP.pm
use Date::Parse;
use POSIX qw(strftime);
use Getopt::Std;
use Data::Dumper;

# Process command line arguments
my %opts = ();              			# hash to store input args
getopts("dDhi:o:m:qt",\%opts); 		# load the args into %opts
my $input_file     = ($opts{'i'}) ? $opts{'i'} : $ARGV[0];     # main file
my $merge_file     = ($opts{'m'}) ? $opts{'m'} : $ARGV[1];     # file to merge in
my $outfile        = ($opts{'o'}) ? $opts{'o'} : "merged.xml";
my $debug          = ($opts{'d'}) ? $opts{'d'} : "";    # print out debugging when set to true (1)
my $debugmore      = ($opts{'D'}) ? $opts{'D'} : "";    # print out debugging when set to true (1)
my $quiet_mode     = ($opts{'q'}) ? $opts{'q'} : "";
my $doctype		     = ($opts{'t'}) ? $opts{'t'} : "";

if ((!%opts && ! -r $ARGV[0]) || $opts{'h'}) {
    # Print usage message
    usage();
}

# Parse the XMLTV file
my $tpp = XML::TreePP->new();
$tpp->set( force_array => [ 'channel', 'programme' ] );  # force array ref for some fields
$tpp->set( indent => 2 );
$tpp->set( first_out => [ 'channel', 'programme', 'title', 'sub-title', 'desc', 'credits', 'date', 'category', 'language', 'orig-language', 'length', 'icon', 'url', 'country', 'episode-num', 'video', 'audio', 'previously-shown', 'premiere', 'last-chance', 'new', 'subtitles', 'rating', 'star-rating', 'review' ] );

my $xmltv = $tpp->parsefile( $input_file );
if ($debugmore) {  print Dumper($xmltv); }

my $xmltv_merge = $tpp->parsefile( $merge_file );
if ($debugmore) {  print Dumper($xmltv_merge); }


my %xmltv_new_channels;
my @xmltv_merged_channels;
my @xmltv_merged_progs;

# Merge the channels
&merge_channels();

# Merge the programmes
&merge_programmes();

# format and output the data
&write_newxml();



###############################################################################
###############################################################################

sub merge_programmes {
    # Process the XMLTV data structure
    #
    # Assumes all data has start and stop times (note: this isn't a requirement of XMLTV DTD)
    #
    # If the files contain >1 channel then they must be pre-sorted into datetime within channel
    #   e.g. tv_sort --by-channel --output FILE  FILE
    #
    # Rules:
    # 1. always use the new programme details (todo: bit a bit more clever with merging)
    #

    my $prog1   = $xmltv->{tv}->{programme};
    my $prog2   = $xmltv_merge->{tv}->{programme};
    if ($debugmore) { print Dumper("Incoming programmes :", $prog1 ); }
    if ($debugmore) { print Dumper("Merging programmes :", $prog2 ); }

    my ($p1_channel, $p1_start, $p1_stop) = ('', 0 , 0);
    my ($p2_channel, $p2_start, $p2_stop) = ('', 0 , 0);

    my $i = 0;
    my ($thischannel, $lastchannel) = ('', '');
    my $p2count = scalar (@{ $prog2 });

    for my $p (@{ $prog1 }) {
        $thischannel = $p->{-channel} if $thischannel eq '';
        $p1_channel = $p->{-channel};
        $p1_start   = to_timestamp( $p->{-start} );
        $p1_stop    = to_timestamp( $p->{-stop} );

        if ($p2_stop == -1) {
            # 'new' file channel has changed
            # copy any remaining 'old' records for this channel, then insert the new channel's programmes
            if ($p1_channel eq $lastchannel) {
                push @xmltv_merged_progs, $p;   # copy the old record
                next;
            }
            # old channel has changed = so we have finished copying old records and can now insert new channel
            my $insertchannel = $p2_channel;
            while ($p2_channel eq $insertchannel) {
                push @xmltv_merged_progs, $prog2->[$i]; $i++;
                if ($i < $p2count) {
                    $p2_channel = $prog2->[$i]->{-channel};
                } else {
                    $p2_channel = '';   # no more new records, end the loop
                }
            }
            $p2_stop = 0;
        }

        if ($p2_stop != 0 && $p1_start < $p2_stop && $p1_channel eq $p2_channel) {
            # skip old recs until start time >= last new rec's stop time
            if ($debug) { print "skippy $p1_start $p2_stop\n"; }
            next;
        }

        if ($i < $p2count) {
            $p2_channel = $prog2->[$i]->{-channel};
            $p2_start   = to_timestamp( $prog2->[$i]->{-start} );
            $p2_stop    = to_timestamp( $prog2->[$i]->{-stop} );

            if ($debug) { print "$p1_start $p2_start || $p1_stop $p2_stop \n"; }

            if ($p1_channel eq $p2_channel) {
                $lastchannel = $thischannel; $thischannel = $p1_channel;     # remember the 'current' channel

                if ($p1_start == $p2_start) {
                    # either
                    # (a) progs identical - replace old with new
                    # (b) new prog is longer - replace old with new & delete all old progs until new stop time
                    # (c) new prog is shorter - insert new prog
                    #
                    push @xmltv_merged_progs, $prog2->[$i]; $i++;
                    next;

                } elsif ($p1_start < $p2_start) {
                    # get here either when new schedule hasn't commenced yet, or when there is a gap in the new schedule
                    # keep current old prog
                    # unless this would cause an overlap
                    if ($p1_stop > $p2_start) {
                        # uh oh overlap - we could possibly set the stop time to equal new prog's start, but that
                        # is just making up schedules!  On balance I think a hole is better than an overlap:
                        # skip this old prog
                        next;
                    }
                    $p2_stop = 0;   # ensure we *don't* skip the next old record

                } elsif ($p1_start > $p2_start) {
                    # get here when additional prog in new schedule that's not in the old
                    # insert new prog & keep current old prog
                    push @xmltv_merged_progs, $prog2->[$i]; $i++;
                    redo;
                }

            } else {
                # channel has changed on one (or both) of the files
                if ($p1_channel ne $thischannel) {
                # 'old' file channel has changed
                    # copy any remaining 'new' records for this channel
                    while ($p2_channel eq $thischannel) {
                        push @xmltv_merged_progs, $prog2->[$i]; $i++;
                        if ($i < $p2count) {
                            $p2_channel = $prog2->[$i]->{-channel};
                        } else {
                            $p2_channel = '';   # no more new records, end the loop
                        }
                    }
                    $lastchannel = $thischannel; $thischannel = $p1_channel;     # remember the new 'current' channel
                    redo;
                } elsif ($p2_channel ne $thischannel) {
                    # 'new' file channel has changed
                    # copy any remaining 'old' records for this channel
                    # then, if this is a totally new channel, insert the new channel's programmes
                    if (defined $xmltv_new_channels{$p2_channel}) {
                        $p2_stop = -1;
                        redo;
                    } else {
                        $p2_stop = 0;
                    }
                }

            }
        }
        # copy the old record
        push @xmltv_merged_progs, $p;

    }

    # no more old progs
    # addend any remaining new progs
    while ($i < $p2count) {
        push @xmltv_merged_progs, $prog2->[$i];
        $i++;
    }


		if ($debugmore) { print Dumper("Merged programmes :", @xmltv_merged_progs); }
}


sub merge_channels {
    # Merge the channels in the two input files
    #
    #   - always use the newer channel details
    #

    my $chan1   = $xmltv->{tv}->{channel};
    my $chan2   = $xmltv_merge->{tv}->{channel};
    if ($debugmore) { print Dumper("Incoming channels :", $chan1 ); }
    if ($debugmore) { print Dumper("Merging channels :", $chan2 ); }

    my ($c1_id);
    my ($c2_id);

    my %channels;

    for my $c1 (@{ $chan1 }) {
        $c1_id = $c1->{-id};
        $channels{$c1_id} = $c1;
    }

    for my $c2 (@{ $chan2 }) {
        $c2_id = $c2->{-id};
        $xmltv_new_channels{$c2_id} = 1  if !defined $channels{$c2_id}; # array of new channels not in current file
        $channels{$c2_id} = $c2;
    }

    foreach my $key ( keys %channels ) {
        push @xmltv_merged_channels, $channels{$key};
    }

		if ($debugmore) { print Dumper("Merged channels :", @xmltv_merged_channels); }

}


sub to_timestamp {
    my ($ts) = @_;
    use DateTime::Format::Strptime;

    my $format = DateTime::Format::Strptime->new(
        pattern   => '%Y%m%d%H%M%S %z',       # "20130320060000 +0000"
        time_zone => 'UTC',         # (better than 'local' - e.g. handles DST)
        on_error  => 'croak',
    );

    return ($format->parse_datetime($ts))->epoch();
}


sub write_newxml {
		# Write the output xml files
		#
    # create an xml container
    my %xml = ();

    $xml{'tv'}{'-generator-info-name'} = $xmltv->{tv}->{'-generator-info-name'}  if $xmltv->{tv}->{'-generator-info-name'};
    $xml{'tv'}{'-generator-info-url'} = $xmltv->{tv}->{'-generator-info-url'}  if $xmltv->{tv}->{'-generator-info-url'};
    $xml{'tv'}{'-source-info-name'} = $xmltv->{tv}->{'-source-info-name'}  if $xmltv->{tv}->{'-source-info-name'};
    $xml{'tv'}{'-source-info-url'} = $xmltv->{tv}->{'-source-info-url'}  if $xmltv->{tv}->{'-source-info-url'};

    # add the <channel> elements
    $xml{'tv'}{'channel'} = \@xmltv_merged_channels;

    # add the <programme> elements
    $xml{'tv'}{'programme'} = \@xmltv_merged_progs;

    #
    my $ccount = scalar @{ $xml{'tv'}{'channel'} };
    my $pcount = scalar @{ $xml{'tv'}{'programme'} };
    if (!$quiet_mode) { print "Writing : $ccount channels $pcount programmes \n"; }

    # write the output xml file
    if ($debugmore) {  print Dumper(\%xml); }
		if ($doctype) {
			# TreePP doesn't write  a DOCTYPE - add one if the user requests it
			my $xmlout = $tpp->write( \%xml, 'UTF-8' );
			$xmlout =~ s/^(<\?xml.*>)/$1\n<!DOCTYPE tv SYSTEM "xmltv.dtd">\n/;
			open OUT, "> $outfile"  or die "Failed to open $outfile for writing";
			printf OUT $xmlout;
			close OUT;
		} else {
			$tpp->writefile( $outfile, \%xml, 'UTF-8' );
		}
}

sub usage {
    #
    # print Usage message
    #
    my $filename = (split(/\//,$0))[-1];
		print STDERR << "EOF";

Merge (combine) two XMLTV files

Files should be sorted into datetime within channel order
  e.g. tv_sort --by-channel --output FILE  FILE

Assumes all data has start and stop times

Usage: $filename [-dDh] -i input_xmltv_file -m merge_xmltv_file [-o output_file ]

-h        : this (help) message
-d        : print debugging messages
-D        : print even more debugging messages
-q        : quiet mode (no STDOUT messages)
-i file   : input XMLTV file (or filename as first arg to script)
-m file   : merge XMLTV file (or filename as second arg to script)
-o file   : output file (default: merged.xml)
-t        : add DOCTYPE to output file

example: $filename -i xmltv.xml -m xmltv_add.xml -o xmltv_new.xml

EOF
exit;
}

