diff -Nru xmltv-0.6.3/Changes xmltv-1.0.0/Changes --- xmltv-0.6.3/Changes 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/Changes 2021-02-09 10:49:46.000000000 +0000 @@ -1,3 +1,21 @@ +1.0.0 2021-02-07 + + - tv_grab_ch_search: re-enable fixed grabber + - tv_grab_eu_xmltvse: fetch listings over SSL + - tv_grab_fi: many improvements to listings parsers + - tv_grab_na_dtv: fetch listings over SSL + - tv_grab_pt_vodafone: more reliable SSL conections using recent + OpenSSL versions + - tv_grab_uk_tvguide: improvements to XMLTV ID compliance + + - tv_grab_dk_dr: disable grabber (source site gone) + - tv_grab_uk_bleb: disable grabber (source site gone) + + - tv_grep: allow regex filtering on channel ID + - tv_imdb: significant reductions in memory consumption + + - Windows build: migrate to PAR::Packer + 0.6.3 2020-08-22 - tv_grab_ch_search: disable deprecated grabber diff -Nru xmltv-0.6.3/debian/changelog xmltv-1.0.0/debian/changelog --- xmltv-0.6.3/debian/changelog 2020-11-08 11:07:44.000000000 +0000 +++ xmltv-1.0.0/debian/changelog 2021-09-06 21:30:42.000000000 +0000 @@ -1,8 +1,24 @@ -xmltv (0.6.3-1~ppa18.04+1) bionic; urgency=medium +xmltv (1.0.0-1~ppa18.04+1) bionic; urgency=medium * Backport from Debian Unstable. - -- Nicolas Derive Sun, 08 Nov 2020 12:07:44 +0100 + -- Nicolas Derive Mon, 06 Sep 2021 23:30:42 +0200 + +xmltv (1.0.0-1) unstable; urgency=medium + + * New upstream version 1.0.0 + - tv_grab_dk_dr: grabber removed + - tv_grab_uk_bleb: grabber removed + * d/control: + - Declare compliance with Debian Policy 4.5.1 + * d/copyright: + - Refresh years of Debian copyright + * d/patches: + - Drop patches applied upstream + * d/xmltv-util.install: + - Refresh list of available grabbers + + -- Nick Morrott Tue, 09 Feb 2021 11:26:09 +0000 xmltv (0.6.3-1) unstable; urgency=medium diff -Nru xmltv-0.6.3/debian/compat xmltv-1.0.0/debian/compat --- xmltv-0.6.3/debian/compat 2020-11-08 11:07:42.000000000 +0000 +++ xmltv-1.0.0/debian/compat 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -9 diff -Nru xmltv-0.6.3/debian/control xmltv-1.0.0/debian/control --- xmltv-0.6.3/debian/control 2020-11-08 11:07:38.000000000 +0000 +++ xmltv-1.0.0/debian/control 2021-09-06 21:30:39.000000000 +0000 @@ -2,7 +2,7 @@ Section: interpreters Priority: optional Maintainer: Nick Morrott -Build-Depends: debhelper (>= 9) +Build-Depends: debhelper-compat (= 12) Build-Depends-Indep: libarchive-zip-perl, libcgi-pm-perl, @@ -53,7 +53,7 @@ libxml-writer-perl, perl, perl-tk, -Standards-Version: 4.5.0 +Standards-Version: 4.5.1 Testsuite: autopkgtest-pkg-perl Vcs-Browser: https://salsa.debian.org/nickm/xmltv Vcs-Git: https://salsa.debian.org/nickm/xmltv.git diff -Nru xmltv-0.6.3/debian/copyright xmltv-1.0.0/debian/copyright --- xmltv-0.6.3/debian/copyright 2020-09-09 23:38:01.000000000 +0000 +++ xmltv-1.0.0/debian/copyright 2021-02-09 11:26:09.000000000 +0000 @@ -67,7 +67,7 @@ Files: debian/* Copyright: 2002-2006 Kenneth Pronovici 2006-2012 Chris Butler - 2015-2020 Nick Morrott + 2015-2021 Nick Morrott License: GPL-2 License: GPL-2 diff -Nru xmltv-0.6.3/debian/patches/ch_search_reenable xmltv-1.0.0/debian/patches/ch_search_reenable --- xmltv-0.6.3/debian/patches/ch_search_reenable 2020-09-09 23:38:01.000000000 +0000 +++ xmltv-1.0.0/debian/patches/ch_search_reenable 1970-01-01 00:00:00.000000000 +0000 @@ -1,53 +0,0 @@ -Description: Re-enable tv_grab_ch_search after upstream patch -Author: Patric Mueller -Origin: upstream -Bug: https://github.com/XMLTV/xmltv/issues/109 -Applied-Upstream: cb029fc6ea0b7ec688d7b881806e699119353458 -Last-Update: 2020-09-07 ---- ---- a/Makefile.PL -+++ b/Makefile.PL -@@ -280,19 +280,19 @@ - 'HTTP::Cookies' => 0, }, - }, - -- # { name => 'tv_grab_ch_search', -- # blurb => 'Grabber for Switzerland', -- # exes => [ 'grab/ch_search/tv_grab_ch_search' ], -- # deps => [ 'grab/ch_search/tv_grab_ch_search' => [ 'grab/ch_search/tv_grab_ch_search.in' ] ], -- # pl_files => { 'grab/ch_search/tv_grab_ch_search.PL' => 'grab/ch_search/tv_grab_ch_search' }, -- # to_clean => [ 'grab/ch_search/tv_grab_ch_search' ], -- # grab_need_share => [ 'ch_search' ], -- # prereqs => { 'HTML::Entities' => 1.27, -- # 'HTML::TreeBuilder' => 0, -- # 'HTTP::Cookies' => 0, -- # 'URI::Escape' => 0, -- # 'URI::URL' => 0, }, -- # }, -+ { name => 'tv_grab_ch_search', -+ blurb => 'Grabber for Switzerland', -+ exes => [ 'grab/ch_search/tv_grab_ch_search' ], -+ deps => [ 'grab/ch_search/tv_grab_ch_search' => [ 'grab/ch_search/tv_grab_ch_search.in' ] ], -+ pl_files => { 'grab/ch_search/tv_grab_ch_search.PL' => 'grab/ch_search/tv_grab_ch_search' }, -+ to_clean => [ 'grab/ch_search/tv_grab_ch_search' ], -+ grab_need_share => [ 'ch_search' ], -+ prereqs => { 'HTML::Entities' => 1.27, -+ 'HTML::TreeBuilder' => 0, -+ 'HTTP::Cookies' => 0, -+ 'URI::Escape' => 0, -+ 'URI::URL' => 0, }, -+ }, - - { name => 'tv_grab_dk_dr', - blurb => 'Grabber for Denmark (dr.dk)', ---- a/grab/ch_search/tv_grab_ch_search.in -+++ b/grab/ch_search/tv_grab_ch_search.in -@@ -374,7 +374,7 @@ - foreach my $tv_channel ( $tb->look_down('class' => 'sl-card tv-index-channel') ) { - my $channel_id = substr($tv_channel->attr('id'), 3); # tv-sf1 -> sf1 - if ( defined($channel_id) ) { -- foreach my $tv_show ( $tv_channel ->look_down('class' => 'tv-tooltip') ) { -+ foreach my $tv_show ( $tv_channel ->look_down('class', qr/(^| )tv-tooltip( |$)/) ) { - my %show; - $show{channel} = channel_id($channel_id); - diff -Nru xmltv-0.6.3/debian/patches/series xmltv-1.0.0/debian/patches/series --- xmltv-0.6.3/debian/patches/series 2020-09-09 23:38:01.000000000 +0000 +++ xmltv-1.0.0/debian/patches/series 2021-02-09 11:26:09.000000000 +0000 @@ -1,5 +1,3 @@ it_dvb_linux_warning 11_makefile_pl_debian_changes.diff autopkgtest.patch -ch_search_reenable -typo-in-manual-page diff -Nru xmltv-0.6.3/debian/patches/typo-in-manual-page xmltv-1.0.0/debian/patches/typo-in-manual-page --- xmltv-0.6.3/debian/patches/typo-in-manual-page 2020-09-09 23:38:01.000000000 +0000 +++ xmltv-1.0.0/debian/patches/typo-in-manual-page 1970-01-01 00:00:00.000000000 +0000 @@ -1,40 +0,0 @@ -Description: Fix some typos detected by lintian -Author: Nick Morrott -Forwarded: not-needed -Applied-Upstream: ea01fcb293b1d95ce89fe055bef38b54253de26e -Last-Update: 2020-09-09 ---- ---- a/filter/tv_imdb -+++ b/filter/tv_imdb -@@ -32,9 +32,9 @@ - - B<--output FILE> write to FILE rather than standard output. - --B<--with-keywords> include IDMb keywords in the output file. -+B<--with-keywords> include IMDb keywords in the output file. - --B<--with-plot> include IDMb plot summary in the output file. -+B<--with-plot> include IMDb plot summary in the output file. - - B<--actors NUMBER> number of actors from IMDb to add (default=3). - -@@ -115,7 +115,7 @@ - hand. See for the download sites. - Then once you have the files rerun without '--download'. - --Note: '--prepStage' sucks a bit of memeory, but you can run each -+Note: '--prepStage' sucks a bit of memory, but you can run each - prepStage separately by running --prepStage with each of the stages - (see --help for details). - ---- a/grab/zz_sdjson_sqlite/tv_grab_zz_sdjson_sqlite -+++ b/grab/zz_sdjson_sqlite/tv_grab_zz_sdjson_sqlite -@@ -6129,7 +6129,7 @@ - - B<--force-download> Deletes most existing local database data and - forces a download of the data. If there is a suspicion that the --data is currupt (and not being automatically corrected), forcing -+data is corrupt (and not being automatically corrected), forcing - a new download might be necessary. - - B<--days N> When grabbing, grab N days rather than all available days. diff -Nru xmltv-0.6.3/debian/xmltv-util.install xmltv-1.0.0/debian/xmltv-util.install --- xmltv-0.6.3/debian/xmltv-util.install 2020-09-09 23:38:01.000000000 +0000 +++ xmltv-1.0.0/debian/xmltv-util.install 2021-02-09 11:26:09.000000000 +0000 @@ -8,7 +8,6 @@ debian/tmp/usr/bin/tv_grab_ar usr/bin debian/tmp/usr/bin/tv_grab_ch_search usr/bin debian/tmp/usr/bin/tv_grab_combiner usr/bin -debian/tmp/usr/bin/tv_grab_dk_dr usr/bin debian/tmp/usr/bin/tv_grab_eu_epgdata usr/bin debian/tmp/usr/bin/tv_grab_eu_xmltvse usr/bin debian/tmp/usr/bin/tv_grab_fi usr/bin @@ -23,7 +22,6 @@ debian/tmp/usr/bin/tv_grab_na_tvmedia usr/bin debian/tmp/usr/bin/tv_grab_pt_vodafone usr/bin debian/tmp/usr/bin/tv_grab_tr usr/bin -debian/tmp/usr/bin/tv_grab_uk_bleb usr/bin debian/tmp/usr/bin/tv_grab_uk_tvguide usr/bin debian/tmp/usr/bin/tv_grab_zz_sdjson usr/bin debian/tmp/usr/bin/tv_grab_zz_sdjson_sqlite usr/bin @@ -48,7 +46,6 @@ debian/tmp/usr/share/man/man1/tv_grab_ar.1p usr/share/man/man1 debian/tmp/usr/share/man/man1/tv_grab_ch_search.1p usr/share/man/man1 debian/tmp/usr/share/man/man1/tv_grab_combiner.1p usr/share/man/man1 -debian/tmp/usr/share/man/man1/tv_grab_dk_dr.1p usr/share/man/man1 debian/tmp/usr/share/man/man1/tv_grab_eu_epgdata.1p usr/share/man/man1 debian/tmp/usr/share/man/man1/tv_grab_eu_xmltvse.1p usr/share/man/man1 debian/tmp/usr/share/man/man1/tv_grab_fi.1p usr/share/man/man1 @@ -63,7 +60,6 @@ debian/tmp/usr/share/man/man1/tv_grab_na_tvmedia.1p usr/share/man/man1 debian/tmp/usr/share/man/man1/tv_grab_pt_vodafone.1p usr/share/man/man1 debian/tmp/usr/share/man/man1/tv_grab_tr.1p usr/share/man/man1 -debian/tmp/usr/share/man/man1/tv_grab_uk_bleb.1p usr/share/man/man1 debian/tmp/usr/share/man/man1/tv_grab_uk_tvguide.1p usr/share/man/man1 debian/tmp/usr/share/man/man1/tv_grab_zz_sdjson.1p usr/share/man/man1 debian/tmp/usr/share/man/man1/tv_grab_zz_sdjson_sqlite.1p usr/share/man/man1 diff -Nru xmltv-0.6.3/doc/README.win32 xmltv-1.0.0/doc/README.win32 --- xmltv-0.6.3/doc/README.win32 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/doc/README.win32 2021-02-09 10:49:46.000000000 +0000 @@ -1,4 +1,4 @@ -XMLTV 0.6.3, Windows binary release +XMLTV 1.0.0, Windows binary release Gather television listings, process them and organize your viewing. XMLTV is a file format for storing TV listings, defined in xmltv.dtd. @@ -18,34 +18,27 @@ of those you'll need to install Perl and the necessary modules and use the full distribution. -* Major Changes in this release (0.6.3) +* Major Changes in this release (1.0.0) -tv_grab_ch_search: disable broken grabber +tv_grab_ch_search: re-enable fixed grabber +tv_grab_eu_xmltvse: fetch listings over SSL +tv_grab_fi: many improvements to listings parsers +tv_grab_na_dtv: fetch listings over SSL +tv_grab_pt_vodafone: more reliable SSL conections using recent + OpenSSL versions +tv_grab_uk_tvguide: improvements to XMLTV ID compliance + +tv_grab_dk_dr: disable grabber (source site gone) +tv_grab_uk_bleb: disable grabber (source site gone) -* Major Changes in previous release (0.6.2) +tv_grep: allow regex filtering on channel ID +tv_imdb: significant reductions in memory consumption -tv_grab_dotmedia: disable deprecated grabber -tv_grab_se_tvzon: disable deprecated grabber +Windows build: migrate to PAR::Packer -tv_grab_dtv_la: disable broken grabber -tv_grab_il: disable broken grabber -tv_grab_pt_meo: disable broken grabber -tv_grab_se_swedb: disable broken grabber - -XMLTV.pm: update handling of reading from STDIN due to - XML::Parser adopting 3-arg open -tv_grab_ch_search: handle upstream cookies -tv_grab_eu_epgdata: various fixes and improvements -tv_grab_fi: various fixes and improvements -tv_grab_fr: update grabber due to upstream changes -tv_grab_huro: use https source site URLs -tv_grab_it: fix overlapping/duplicate programmes -tv_grab_na_dd: use https source site URLs -tv_grab_na_dtv: various fixes and improvements -tv_grab_pt_vodafone: various fixes and improvements -tv_grab_uk_tvguide: various fixes and improvements -tv_grab_zz_sdjson_sqlite: - many fixes and improvements +* Major Changes in this release (0.6.3) + +tv_grab_ch_search: disable broken grabber And many other changes (see the git log for details) @@ -111,4 +104,4 @@ xmltv-users - how to use XMLTV xmltv-devel - detailed discussions among developers --- Nick Morrott, knowledgejunkie@gmail.com, 2020-08-22 +-- Nick Morrott, knowledgejunkie@gmail.com, 2021-02-07 diff -Nru xmltv-0.6.3/filter/tv_grep.in xmltv-1.0.0/filter/tv_grep.in --- xmltv-0.6.3/filter/tv_grep.in 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/filter/tv_grep.in 2021-02-09 10:49:46.000000000 +0000 @@ -76,7 +76,7 @@ =head2 CHANNEL TESTS -There are two tests for channels. These filter both and +There are three tests for channels. These filter both and elements: if a channel is filtered out then all programmes on that channel are too. @@ -84,6 +84,8 @@ B<--channel-id CHANNEL_ID> True if the channelE<39>s XMLTV id is exactly equal to CHANNEL_ID. +B<--channel-id-exp REGEXP> True if the channel has a whose content matches REGEXP. + =head2 TIME TESTS Normally you donE<39>t want to test time strings with a regular @@ -229,12 +231,14 @@ # Hash mapping regexp -> channel id -> true/undef (see later) my %ch_name; my @ch_regexps; # regexps to populate %ch_name with +my @chid_regexps; # regexps for matching with channel id + # Prepare an OptionAbbrev object with all the long options we expect # to find. # my $oa = new OptionAbbrev(qw(--ignore-case --help --output - --channel-id --channel-name + --channel-id --channel-name --channel-id-exp --on-after --on-before --eval --and --or --not)); @@ -413,6 +417,19 @@ next; } + if (defined $lo and $lo eq '--channel-id-exp') { + my $regexp = shift @ARGV; + die "--channel-id-exp requires an argument, a Perl regular expression\n" + if not defined $regexp; + # reuses some --channel-name processing + # + $add_to_prog_conj->(sub { $ch_name{$regexp}->{$_->{channel}} }); + $add_to_chan_conj->(sub { $ch_name{$regexp}->{$_->{id}} }); + $not = 0; + push @chid_regexps, $regexp; + next; + } + if (defined $lo and $lo eq '--channel-name') { my $regexp = shift @ARGV; die "--channel name requires an argument, a Perl regular expression\n" @@ -550,6 +567,25 @@ } } +# Prepare the channel id lookup. +my %seen_chid_id; +foreach my $ch_id (keys %$ch) { + $seen_chid_id{$ch_id}++ && die "duplicate channel id $ch_id\n"; + my $ch = $ch->{$ch_id}; die if not defined $ch; + my %seen_re; + foreach my $re (@chid_regexps) { + next if $seen_re{$re}++; + my $matched = 0; + if ($re eq '' + or ($ignore_case ? $ch_id =~ /$re/i : $ch_id =~ /$re/)) { + $matched = 1; + } + if ($matched) { + $ch_name{$re}->{$ch_id}++ && die; + } + } +} + # Filter channels. This has an effect only for the --channel-id and # --channel-name predicates; we do not drop channels simply because no # programmes remained on them after filtering. @@ -644,6 +680,7 @@ (channel matches) --channel-name REGEXP --channel-id CHANNEL_ID + --channel-id-exp REGEXP (special tests) --on-after DATE --on-before DATE diff -Nru xmltv-0.6.3/filter/tv_imdb xmltv-1.0.0/filter/tv_imdb --- xmltv-0.6.3/filter/tv_imdb 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/filter/tv_imdb 2021-02-09 10:49:46.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/perl -w +#!/usr/bin/perl =pod @@ -9,6 +9,7 @@ =head1 SYNOPSIS tv_imdb --imdbdir [--help] [--quiet] [--download] + [--movies-only] [--filesort] [--nosystemsort] [--prepStage (1-9,all)] tv_imdb --imdbdir [--help] [--quiet] @@ -25,16 +26,15 @@ =head1 DESCRIPTION -Very similar to tv_cat in semantics (see tv_cat), +tv_imdb is very similar to tv_cat in semantics (see tv_cat), except whenever a programme appears with "date" entry the -title and date are used to look up extra data by using the -XMLTV::IMDB package. +title and date are used to look up extra data using XMLTV::IMDB. B<--output FILE> write to FILE rather than standard output. -B<--with-keywords> include IDMb keywords in the output file. +B<--with-keywords> include IMDb keywords in the output file. -B<--with-plot> include IDMb plot summary in the output file. +B<--with-plot> include IMDb plot summary in the output file. B<--actors NUMBER> number of actors from IMDb to add (default=3). @@ -115,30 +115,34 @@ hand. See for the download sites. Then once you have the files rerun without '--download'. -Note: '--prepStage' sucks a bit of memeory, but you can run each -prepStage separately by running --prepStage with each of the stages -(see --help for details). +Note: '--prepStage' requires up to 520MB of memory. This can be reduced a little +by running each prepStage separately, using --prepStage with each of the stages +individually (see --help for details). +Memory use can be reduced further by using --filesort option when building the +database. This will try to use the operating system to sort the interim data files +rather than sorting in memory. If this system sort does not work for you then you +can use the File::Sort package if it is installed on your system, by also adding the +option --nosystemsort (however this method of sorting is very slow). If you specify +neither option then Perl will sort the files in memory. + +If you are only interested in movies, you can reduce the memory required and the +size of the database by passing the --movies-only option to the database build, +which will exclude tv-series from the database. B<3.> Once you have the database loaded try E<39>cat tv.xml | tv_imdb --imdbdir > tv1.xmlE<39>. -Feel free to report any problems with these steps to xmltv-devel@lists.sf.net. +Feel free to report any problems with these steps at https://github.com/XMLTV/xmltv/issues. =head1 TESTING The --validate-title and --validate-year flags can be used to validate the information in the tv_imdb database. For exmple: - tv_imdb --imdbdir . --validate-title 'Army of Darness' --validate-year 1994 + tv_imdb --imdbdir . --validate-title 'Army of Darkness' --validate-year 1994 =head1 BUGS -The '--prepStage' needs a lot of memory to run at a reasonable speed, -over 250 megabytes with the current imdb data files. For there to be -250 megabytes free for tv_imdb, the system will need at least 512 megabytes -of RAM. Running with less can take hours (or days!) - although fortunately -this stage needs to be run only once after downloading the data files. - Could use a --configure step just like the grabbers so you do not have to specify the --imdbdir on the command line every time. Also this could step you through the prep stages with more description of what is being @@ -158,8 +162,8 @@ same title with a date out by 1 year or 2 years considered a match (currently weE<39>re using 2). -Nice to haves include: verification/addition of programe MPAA/VCHIP ratings, -addition of imdb.com user ratings (by votes) to programes. Potenially we +Nice to haves include: verification/addition of programme MPAA/VCHIP ratings, +addition of imdb.com user ratings (by votes) to programmes. Potentially we could expand to include "country of origin", "description", "writer" and "producer" credits, maybe even "commentator". @@ -177,6 +181,7 @@ =cut use strict; +use warnings; use XMLTV; use XMLTV::Version "$XMLTV::VERSION"; use Data::Dumper; @@ -185,7 +190,7 @@ use XMLTV::Data::Recursive::Encode; use XMLTV::Usage < [--help] [--quiet] [--download] [--prepStage (1-9,all)] +$0 --imdbdir [--help] [--quiet] [--download] [--filesort] [--prepStage (1-9,all)] $0 --imdbdir [--help] [--quiet] [--download] [--with-keywords] [--with-plot] [--movies-only] [--actors NUMBER] [--stats] [--debug] [--output FILE] [FILE...] END @@ -193,168 +198,196 @@ use XMLTV::IMDB; my ($opt_help, - $opt_output, - $opt_prepStage, - $opt_imdbDir, - $opt_quiet, - $opt_download, - $opt_stats, - $opt_debug, - $opt_movies_only, - $opt_with_keywords, - $opt_with_plot, - $opt_num_actors, - $opt_validate_title, - $opt_validate_year, - ); - -GetOptions('help' => \$opt_help, - 'output=s' => \$opt_output, - 'prepStage=s' => \$opt_prepStage, - 'imdbdir=s' => \$opt_imdbDir, - 'with-keywords' => \$opt_with_keywords, - 'with-plot' => \$opt_with_plot, - 'movies-only' => \$opt_movies_only, - 'actors=s' => \$opt_num_actors, - 'quiet' => \$opt_quiet, - 'download' => \$opt_download, - 'stats' => \$opt_stats, - 'debug+' => \$opt_debug, - 'validate-title=s' => \$opt_validate_title, - 'validate-year=s' => \$opt_validate_year, - ) or usage(0); + $opt_output, + $opt_prepStage, + $opt_imdbDir, + $opt_quiet, + $opt_download, + $opt_stats, + $opt_debug, + $opt_movies_only, + $opt_with_keywords, + $opt_with_plot, + $opt_num_actors, + $opt_validate_title, + $opt_validate_year, + $opt_sample, + $opt_filesort, + $opt_systemsort, + ); + +GetOptions('help' => \$opt_help, + 'output=s' => \$opt_output, + 'prepStage=s' => \$opt_prepStage, + 'imdbdir=s' => \$opt_imdbDir, + 'with-keywords' => \$opt_with_keywords, + 'with-plot' => \$opt_with_plot, + 'movies-only' => \$opt_movies_only, + 'actors=s' => \$opt_num_actors, + 'quiet' => \$opt_quiet, + 'download' => \$opt_download, + 'stats' => \$opt_stats, + 'debug+' => \$opt_debug, + 'validate-title=s' => \$opt_validate_title, + 'validate-year=s' => \$opt_validate_year, + 'sample=s' => \$opt_sample, + 'filesort!' => \$opt_filesort, + 'systemsort!' => \$opt_systemsort, + ) or usage(0); usage(1) if $opt_help; usage(1) if ( not defined($opt_imdbDir) ); -$opt_with_keywords=0 if ( !defined($opt_with_keywords) ); -$opt_with_plot=0 if ( !defined($opt_with_plot) ); -$opt_num_actors=3 if ( !defined($opt_num_actors) ); -$opt_movies_only=0 if ( !defined($opt_movies_only) ); -$opt_debug=0 if ( !defined($opt_debug) ); +$opt_with_keywords=0 if ( !defined($opt_with_keywords) ); +$opt_with_plot=0 if ( !defined($opt_with_plot) ); +$opt_num_actors=3 if ( !defined($opt_num_actors) ); +$opt_movies_only=0 if ( !defined($opt_movies_only) ); +$opt_debug=0 if ( !defined($opt_debug) ); +$opt_sample=0 if ( !defined($opt_sample) ); +$opt_filesort=0 if ( !defined($opt_filesort) ); +$opt_systemsort=1 if ( !defined($opt_systemsort) ); $opt_quiet=(defined($opt_quiet)); if ( !defined($opt_stats) ) { - $opt_stats=!$opt_quiet; + $opt_stats=!$opt_quiet; } else { - $opt_stats=(defined($opt_stats)); + $opt_stats=(defined($opt_stats)); } $opt_debug=0 if $opt_quiet; if ( defined($opt_prepStage) ) { - print STDERR < $opt_imdbDir, - 'verbose' => !$opt_quiet, - 'showProgressBar' => !$opt_quiet, - 'stageToRun' => $opt_prepStage, - 'downloadMissingFiles' => $opt_download, - ); - - if ( $opt_prepStage eq "all" ) { - for (my $stage=1 ; $stage <= 9 ; $stage++ ) { - my $n=new XMLTV::IMDB::Crunch(%options); - if ( !$n ) { - exit(1); - } - my $ret=$n->crunchStage($stage); - if ( $ret != 0 ) { - exit($ret); - } +END + if ($opt_prepStage eq 'all') { + print STDERR <; # ask for user input + chomp($yn); + exit(1) if (lc($yn) ne "y"); + } } - print STDERR "database load complete, let the games begin !\n" if ( !$opt_quiet); - exit(0); - } - else { - my $n=new XMLTV::IMDB::Crunch(%options); - if ( !$n ) { - exit(1); + + my %options = + ('imdbDir' => $opt_imdbDir, + 'verbose' => !$opt_quiet, + 'showProgressBar' => !$opt_quiet, + 'stageToRun' => $opt_prepStage, + 'downloadMissingFiles' => $opt_download, + 'sample' => $opt_sample, + 'filesort' => $opt_filesort, + 'systemsort' => $opt_systemsort, + 'moviesonly' => $opt_movies_only, + ); + + if ( $opt_prepStage eq "all" ) { + my $n=new XMLTV::IMDB::Crunch(%options); + if ( !$n ) { + exit(1); + } + for (my $stage=1 ; $stage <= 9 ; $stage++ ) { + my $ret=$n->crunchStage($stage); + if ( $ret != 0 ) { + exit($ret); + } + } + print STDERR "database load complete, let the games begin !\n" if ( !$opt_quiet); + exit(0); } - my $ret=$n->crunchStage(int($opt_prepStage)); - if ( $ret == 0 && int($opt_prepStage) == 9 ) { - print STDERR "database load complete, let the games begin !\n" if ( !$opt_quiet); + else { + my $n=new XMLTV::IMDB::Crunch(%options); + if ( !$n ) { + exit(1); + } + my $ret=$n->crunchStage(int($opt_prepStage)); + if ( $ret == 0 && int($opt_prepStage) == 9 ) { + print STDERR "database load complete, let the games begin !\n" if ( !$opt_quiet); + } + exit($ret); } - exit($ret); - } } elsif ( $opt_download ) { - my %options = - ('imdbDir' => $opt_imdbDir, - 'verbose' => !$opt_quiet, - 'showProgressBar' => !$opt_quiet, - 'stageToRun' => 'all', - 'downloadMissingFiles' => $opt_download, - ); + my %options = + ('imdbDir' => $opt_imdbDir, + 'verbose' => !$opt_quiet, + 'showProgressBar' => !$opt_quiet, + 'stageToRun' => 'all', + 'downloadMissingFiles' => $opt_download, + ); - my $n=new XMLTV::IMDB::Crunch(%options); - if ( !$n ) { - exit(1); - } - exit(0); + my $n=new XMLTV::IMDB::Crunch(%options); + if ( !$n ) { + exit(1); + } + exit(0); } -my $imdb=new XMLTV::IMDB('imdbDir' => $opt_imdbDir, - 'verbose' => $opt_debug, - 'cacheLookups' => 1, - 'cacheLookupSize' => 1000, - 'updateKeywords' => $opt_with_keywords, - 'updatePlot' => $opt_with_plot, - 'numActors' => $opt_num_actors, - ); +my $imdb=new XMLTV::IMDB('imdbDir' => $opt_imdbDir, + 'verbose' => $opt_debug, + 'cacheLookups' => 1, + 'cacheLookupSize' => 1000, + 'updateKeywords' => $opt_with_keywords, + 'updatePlot' => $opt_with_plot, + 'numActors' => $opt_num_actors, + ); #$imdb->{verbose}++; if ( my $errline=$imdb->sanityCheckDatabase() ) { - print STDERR "$errline"; - print STDERR "tv_imdb: you need to use --prepStage to rebuild\n"; - exit(1); + print STDERR "$errline"; + print STDERR "tv_imdb: you need to use --prepStage to rebuild\n"; + exit(1); } if ( !$imdb->openMovieIndex() ) { - print STDERR "tv_imdb: open database failed\n"; - exit(1); + print STDERR "tv_imdb: open database failed\n"; + exit(1); } if ( defined($opt_validate_title) != defined($opt_validate_year) ) { - print STDERR "tv_imdb: both --validate-title and --validate-year must be used together\n"; - exit(1); + print STDERR "tv_imdb: both --validate-title and --validate-year must be used together\n"; + exit(1); } if ( defined($opt_validate_title) && defined($opt_validate_year) ) { - my $prog; + my $prog; - $prog->{title}->[0]->[0]=$opt_validate_title; - $prog->{date}=$opt_validate_year; - $imdb->{updateTitles}=0; - - #print Dumper($prog); - my $n=$imdb->augmentProgram($prog, $opt_movies_only); - if ( $n ) { - $Data::Dumper::Sortkeys = 1; # ensure consistent order of dumped hash - #my $encoding; - #my $w = new XMLTV::Writer((), encoding => $encoding); - #$w->start(shift); - #$w->write_programme($n); - print Dumper($n); - #$w->end(); - } - $imdb->closeMovieIndex(); - exit(0); + $prog->{title}->[0]->[0]=$opt_validate_title; + $prog->{date}=$opt_validate_year; + $imdb->{updateTitles}=0; + + #print Dumper($prog); + my $n=$imdb->augmentProgram($prog, $opt_movies_only); + if ( $n ) { + $Data::Dumper::Sortkeys = 1; # ensure consistent order of dumped hash + #my $encoding; + #my $w = new XMLTV::Writer((), encoding => $encoding); + #$w->start(shift); + #$w->write_programme($n); + print Dumper($n); + #$w->end(); + } + $imdb->closeMovieIndex(); + exit(0); } # test that movie database works okay my %w_args = (); if (defined $opt_output) { - my $fh = new IO::File ">$opt_output"; - die "cannot write to $opt_output\n" if not $fh; - %w_args = (OUTPUT => $fh); + my $fh = new IO::File ">$opt_output"; + die "cannot write to $opt_output\n" if not $fh; + %w_args = (OUTPUT => $fh); } my $numberOfSeenChannels=0; @@ -363,78 +396,78 @@ my $encoding; # store encoding of input file sub encoding_cb( $ ) { - die if defined $w; - $encoding = shift; # callback returns the file's encoding - $w = new XMLTV::Writer(%w_args, encoding => $encoding); + die if defined $w; + $encoding = shift; # callback returns the file's encoding + $w = new XMLTV::Writer(%w_args, encoding => $encoding); } sub credits_cb( $ ) { - $w->start(shift); + $w->start(shift); } my %seen_ch; sub channel_cb( $ ) { - my $c = shift; - my $id = $c->{id}; - $Data::Dumper::Sortkeys = 1; # ensure consistent order of dumped hash - if (not defined $seen_ch{$id}) { - $w->write_channel($c); - $seen_ch{$id} = $c; - $numberOfSeenChannels++; - } - elsif (Dumper($seen_ch{$id}) eq Dumper($c)) { - # They're identical, okay. - } - else { - warn "channel $id may differ between two files, " - . "picking one arbitrarily\n"; - } + my $c = shift; + my $id = $c->{id}; + $Data::Dumper::Sortkeys = 1; # ensure consistent order of dumped hash + if (not defined $seen_ch{$id}) { + $w->write_channel($c); + $seen_ch{$id} = $c; + $numberOfSeenChannels++; + } + elsif (Dumper($seen_ch{$id}) eq Dumper($c)) { + # They're identical, okay. + } + else { + warn "channel $id may differ between two files, " + . "picking one arbitrarily\n"; + } } sub programme_cb( $ ) { - my $prog=shift; + my $prog=shift; + + # The database made by IMDB.pm is read as iso-8859-1. The xml file may be different (e.g. utf-8). + # IMDB::augmentProgram does not re-encode the data it adds, so the output file has invalid characters (bug #440). - # The database made by IMDB.pm is read as iso-8859-1. The xml file may be different (e.g. utf-8). - # IMDB::augmentProgram does not re-encode the data it adds, so the output file has invalid characters (bug #440). + my $orig_prog = $prog; + if (lc($encoding) ne 'iso-8859-1') { + # decode the incoming programme + $prog = XMLTV::Data::Recursive::Encode->decode($encoding, $prog); + } - my $orig_prog = $prog; - if (lc($encoding) ne 'iso-8859-1') { - # decode the incoming programme - $prog = XMLTV::Data::Recursive::Encode->decode($encoding, $prog); - } - - # augmentProgram will now add imdb data as iso-8859-1 - my $nprog=$imdb->augmentProgram($prog, $opt_movies_only); - if ( $nprog ) { - if (lc($encoding) ne 'iso-8859-1') { - # re-code the modified programme back to original encoding - $nprog = XMLTV::Data::Recursive::Encode->encode($encoding, $nprog); - } + # augmentProgram will now add imdb data as iso-8859-1 + my $nprog=$imdb->augmentProgram($prog, $opt_movies_only); + if ( $nprog ) { + if (lc($encoding) ne 'iso-8859-1') { + # re-code the modified programme back to original encoding + $nprog = XMLTV::Data::Recursive::Encode->encode($encoding, $nprog); + } $prog=$nprog; - } - else { - $prog = $orig_prog; - } - - # we only add movie information to programmes - # that have a 'date' element defined (since we need - # a year to work with when verifing we got the correct - # hit in the imdb data) - $w->write_programme($prog); + } + else { + $prog = $orig_prog; + } + + # we only add movie information to programmes + # that have a 'date' element defined (since we need + # a year to work with when verifing we got the correct + # hit in the imdb data) + $w->write_programme($prog); } @ARGV = ('-') if not @ARGV; -XMLTV::parsefiles_callback(\&encoding_cb, \&credits_cb, - \&channel_cb, \&programme_cb, - @ARGV); +XMLTV::parsefiles_callback( \&encoding_cb, \&credits_cb, + \&channel_cb, \&programme_cb, + @ARGV ); # we only get a Writer if the encoding callback gets called if ( $w ) { - $w->end(); + $w->end(); } if ( $opt_stats ) { - print STDERR $imdb->getStatsLines($numberOfSeenChannels); + print STDERR $imdb->getStatsLines($numberOfSeenChannels); } $imdb->closeMovieIndex(); exit(0); diff -Nru xmltv-0.6.3/grab/ch_search/tv_grab_ch_search.in xmltv-1.0.0/grab/ch_search/tv_grab_ch_search.in --- xmltv-0.6.3/grab/ch_search/tv_grab_ch_search.in 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/grab/ch_search/tv_grab_ch_search.in 2021-02-09 10:49:46.000000000 +0000 @@ -374,7 +374,7 @@ foreach my $tv_channel ( $tb->look_down('class' => 'sl-card tv-index-channel') ) { my $channel_id = substr($tv_channel->attr('id'), 3); # tv-sf1 -> sf1 if ( defined($channel_id) ) { - foreach my $tv_show ( $tv_channel ->look_down('class' => 'tv-tooltip') ) { + foreach my $tv_show ( $tv_channel ->look_down('class', qr/(^| )tv-tooltip( |$)/) ) { my %show; $show{channel} = channel_id($channel_id); diff -Nru xmltv-0.6.3/grab/combiner/test.conf xmltv-1.0.0/grab/combiner/test.conf --- xmltv-0.6.3/grab/combiner/test.conf 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/grab/combiner/test.conf 2021-02-09 10:49:46.000000000 +0000 @@ -1,2 +1,2 @@ -grabber=/usr/bin/tv_grab_uk_bleb;bbc1 -grabber=/usr/bin/tv_grab_uk_bleb;bbc2 +grabber=/usr/bin/tv_grab_eu_xmltvse;country=Germany&ncachedir=/tmp/.xmltv/cache&nchannel=3sat.de&nchannel=daserste.de&nchannel=prosieben.de +grabber=/usr/bin/tv_grab_it;channel www.raiuno.rai.it&nchannel www.raidue.rai.it&nchannel www.raitre.rai.it diff -Nru xmltv-0.6.3/grab/eu_epgdata/channel_ids xmltv-1.0.0/grab/eu_epgdata/channel_ids --- xmltv-0.6.3/grab/eu_epgdata/channel_ids 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/grab/eu_epgdata/channel_ids 2021-02-09 10:49:46.000000000 +0000 @@ -180,3 +180,4 @@ 12046;sky1.sky.de:;Sky 1 12102;esports1.sport1.de;eSports1 (ESPO1) 12125;voxup.vox.de;VOXup (VOXUP) +12052;sr.de;SR Fernsehen diff -Nru xmltv-0.6.3/grab/eu_xmltvse/test.conf xmltv-1.0.0/grab/eu_xmltvse/test.conf --- xmltv-0.6.3/grab/eu_xmltvse/test.conf 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/grab/eu_xmltvse/test.conf 2021-02-09 10:49:46.000000000 +0000 @@ -1,38 +1,49 @@ -country=Austria +country=Germany cachedir=/tmp/.xmltv/cache channel!1.bluemovie.de channel!13thstreet.de channel!1bar.dazn.de channel!2.bluemovie.de +channel!2.eurosport.de channel!2bar.dazn.de +channel!2extra.eurosport.de channel!3.bluemovie.de channel!360tunebox.spi.pl -channel!3sat.de +channel=3sat.de +channel!aachen.wdr.daserste.de channel!action.sky.de channel!actionhd.sky.de channel!adultchannel.co.uk channel!ae-tv.de channel!animalplanet.discovery.de channel!ar.france24.com -channel=arte.de +channel!arte.de channel!arthouse.spi.pl channel!arts.sky.de -channel!asien.dw.de -channel!at.viva.tv channel!atlantic.sky.de channel!atlantichd.sky.de channel!atlanticp1.sky.de -channel!atv.at -channel!atv2.at -channel!bangutv.com +channel!auto-motor-und-sport.tv +channel!axntv.de +channel!b5aktuell.de.radio.xmltv.se +channel!b5plus.de.radio.xmltv.se +channel!bayern1.de.radio.xmltv.se +channel!bayern2.de.radio.xmltv.se +channel!bayern3.de.radio.xmltv.se +channel!bayernplus.de.radio.xmltv.se channel!bbcworldnews.com channel!beate-uhse.tv +channel!berl.rbb-online.de channel!bfs.daserste.de channel!bibeltv.de -channel!blizztv.de +channel!bielefeld.wdr.daserste.de +channel!bongusto.tv +channel!bonn.wdr.daserste.de channel!br-alpha.daserste.de +channel!br-klassik.daserste.de channel!brandnew.mtv.de channel!brazzerstveurope.com +channel!brheimat.de.radio.xmltv.se channel!bundesliga1.sky.de channel!bundesliga10.sky.de channel!bundesliga2.sky.de @@ -58,26 +69,31 @@ channel!canal24h.rtve.es channel!cbsreality.tv channel!cinema.sky.de -channel!cinemagic.disneychannel.de channel!cinemahd.sky.de +channel!classic.vh1.se channel!classica.de channel!classics.kabel1.de channel!classicshd.kabel1.de channel!comedy.sky.de -channel!comedycentral.at channel!comedycentral.de +channel!crime.rtl.de channel!crimehd.rtl.de channel=daserste.de channel!de.eonline.com channel!deluxemusic.tv +channel!deutschlandfunk.de.radio.xmltv.se +channel!deutschlandfunkkultur.de.radio.xmltv.se +channel!deutschlandfunknova.de.radio.xmltv.se channel!discovery.de channel!disneychannel.de channel!dmax.discovery.de channel!dmaxhd.discovery.de channel!docubox.spi.pl channel!doku.kabel1.de +channel!dortmund.wdr.daserste.de channel!dr1.dr.dk -channel!dr1hd.dr.dk +channel!duisburg.wdr.daserste.de +channel!dusseldorf.wdr.daserste.de channel!dw.de channel!eins.sky.de channel!einsextra.daserste.de @@ -85,29 +101,46 @@ channel!einshd.sky.de channel!emotion.sky.de channel!emotions.sat1.de +channel!erox.spi.pl +channel!eroxxx.spi.pl +channel!essen.wdr.daserste.de channel!euronews.com channel!europa.tve.es channel!europe.bloomberg.com +channel!europe.bluehustler.com channel!europe.cnbc.com +channel!europe.daringtv.com +channel!europe.hustlertv.com channel!europe.playboytv.com channel!europe.realitykings.com +channel!eurosport.de channel!extremesports.com channel!family.cinema.sky.de channel!familyhd.cinema.sky.de -channel!familytv.de channel!fashionbox.spi.pl channel!fastandfun.spi.pl channel!fightbox.spi.pl +channel!filmboxbasic.spi.pl +channel!filmboxfamily.spi.pl +channel!filmboxhd.spi.pl +channel!filmboxplus.spi.pl +channel!filmboxpremium.spi.pl channel!fixundfoxi.tv +channel!fm4.orf.at channel!foxchannel.de channel!fr.france24.com channel!france24.com channel!fsf.fightsports.tv -channel!ftv.com +channel!fuel.tv channel!fun.prosieben.de +channel!geo-television.de +channel!ginx.tv +channel!god.tv channel!gold.sat1.de channel!goldhd.sat1.de +channel!gospel.tv channel!hd.13thstreet.de +channel!hd.3sat.de channel!hd.anixehd.tv channel!hd.arte.de channel!hd.bibeltv.de @@ -116,30 +149,41 @@ channel!hd.deluxemusic.tv channel!hd.discovery.de channel!hd.disneychannel.de +channel!hd.eurosport.de channel!hd.foxchannel.de -channel!hd.ftv.com channel!hd.historytv.de channel!hd.kabel1.de channel!hd.kinowelt.tv +channel!hd.mezzo.tv +channel!hd.mtv.de +channel!hd.mtv.se channel!hd.n-tv.de channel!hd.natgeo.de channel!hd.nick.de channel!hd.prosieben.de channel!hd.rtl.de -channel!hd.rtl2.de channel!hd.sat1.de channel!hd.servustv.com +channel!hd.servustv.de channel!hd.sixx.de channel!hd.spiegel-geschichte.tv channel!hd.syfy.de channel!hd.tele5.de -channel!hd.tv2.dk +channel!hd.viva.tv channel!hd.vox.de +channel!hd.zdf.de channel!heimatkanal.de +channel!hgtv.discovery.de channel!hits.sky.de channel!hitshd.sky.de +channel!hope-channel.de channel!hr.daserste.de +channel!hr1.de.radio.xmltv.se +channel!hr2.de.radio.xmltv.se +channel!hr3.de.radio.xmltv.se +channel!hr4.de.radio.xmltv.se channel!hrhd.daserste.de +channel!hrinfo.de.radio.xmltv.se channel!htv1.hrt.hr channel!htv2.hrt.hr channel!htv3.hrt.hr @@ -148,7 +192,6 @@ channel!int.kinopolska.pl channel!int.kinopolskamuzyka.pl channel!international.rt.com -channel!international.skynews.com channel!jr.disneychannel.de channel!jr.nick.de channel!jukebox-tv.de @@ -158,24 +201,43 @@ channel!kikahd.daserste.de channel!kinowelt.tv channel!krimi.sky.de +channel!living.rtl.de +channel!marcopolo.de channel!maxx.prosieben.de channel!maxxhd.prosieben.de channel!mdr.daserste.de channel!mdrhd.daserste.de +channel!mezzo.tv channel!motorvision.de +channel!mtv.de +channel!munster.wdr.daserste.de +channel!mv.ndr.daserste.de channel!n-tv.de channel!n24doku.de channel!natgeo.de channel!ndr.daserste.de channel!ndrhd.daserste.de +channel!nds.ndr.daserste.de channel!neo.zdf.de channel!neohd.zdf.de +channel!neokika.zdfmobil.de channel!nick.de -channel!nickelodeon.at channel!nitro.rtl.de channel!nitrohd.rtl.de +channel!nord.bfs.daserste.de channel!nostalgie.sky.de -channel=orf1.orf.at +channel!oe1.orf.at +channel!oe3.orf.at +channel!ok-kl.de +channel!ok-nahetv.de +channel!ok-weinstrasse.de +channel!ok-worms.de +channel!ok4.tv +channel!ok54.de +channel!oktv-lu.de +channel!oktv-mainz.de +channel!oktv-suedwestpfalz.de +channel!orf1.orf.at channel!orf2.orf.at channel!orf3.orf.at channel!outdoorchannel.com @@ -189,15 +251,17 @@ channel!planet-tv.de channel!plus.toggo.de channel!polonia.tvp.pl -channel!prosieben.de +channel=prosieben.de channel!protv.ro channel!puls4.at -channel!rbb.daserste.de +channel!radiob.orf.at +channel!radiobremen.tv channel!rbb.rbb-online.de channel!rbbberl.rbb-online.de channel!rbbbra.rbb-online.de channel!rbbhd.daserste.de channel!rck-tv.de +channel!rheinmaintv.de channel!rictv.de channel!romance-tv.de channel!rp.swr.daserste.de @@ -207,15 +271,17 @@ channel!rts1.rts.ch channel!rts2.rts.ch channel!russia.rt.com +channel!s-anhalt.mdr.daserste.de +channel!sachsen.mdr.daserste.de channel!sat1.de channel!sd.anixehd.tv channel!select.sky.de channel!selecthd.sky.de -channel!servustv.com -channel!sf1.srf.ch -channel!sf2.srf.ch -channel!sfi.srf.ch +channel!servustv.de +channel!sh.ndr.daserste.de +channel!siegen.wdr.daserste.de channel!sixx.de +channel!sonyentertainment.tv channel!spiegel-geschichte.tv channel!sport1.sky.de channel!sport10.sky.de @@ -232,7 +298,6 @@ channel!sportdigital.tv channel!sporthd1.sky.de channel!sporthd10.sky.de -channel!sporthd11.sky.de channel!sporthd2.sky.de channel!sporthd3.sky.de channel!sporthd4.sky.de @@ -251,17 +316,21 @@ channel!syfy.de channel!tele5.de channel!tlc.discovery.de +channel!toons.nick.de channel!travelchanneltv.eu channel!tv2.dk -channel!tv5monde.org channel!universalchannel.de channel!urbanint.trace.tv channel!vh1.eu channel!vox.de +channel!waidwerk.tv channel!wdr.daserste.de channel!wdrhd.daserste.de channel!welt.de channel!wild.natgeo.de channel!wildhd.natgeo.de -channel!xd.disneychannel.de +channel!world.kbs.co.kr +channel!wuppertal.wdr.daserste.de +channel!xite.tv +channel!youfm.de.radio.xmltv.se channel!zdf.de diff -Nru xmltv-0.6.3/grab/eu_xmltvse/tv_grab_eu_xmltvse xmltv-1.0.0/grab/eu_xmltvse/tv_grab_eu_xmltvse --- xmltv-0.6.3/grab/eu_xmltvse/tv_grab_eu_xmltvse 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/grab/eu_xmltvse/tv_grab_eu_xmltvse 2021-02-09 10:49:46.000000000 +0000 @@ -79,11 +79,11 @@ =head1 SUPPORTED CHANNELS -For information on supported channels, see http://xmltv.xmltv.se +For information on supported channels, see https://xmltv.xmltv.se =head1 AUTHOR -Joakim Nylén, me -at- jnylen -dot- nu. This script is a modified version +Joakim Nylén, joakim -at- pixelmonster -dot- ee. This script is a modified version of tv_grab_se_swedb by Mattias Holmlund, mattias -at- holmlund -dot- se. While the documentation and parts of the code copied from tv_grab_uk by Ed Avis, ed -at- membled -dot- com. @@ -125,8 +125,8 @@ sub t; -my $default_root_url = 'http://xmltv.xmltv.se/'; -my $default_main_url = 'http://xmltv.se/'; +my $default_root_url = 'https://xmltv.xmltv.se/'; +my $default_main_url = 'https://xmltv.se/'; my $default_cachedir = get_default_cachedir(); my( $opt, $conf ) = ParseOptions( { @@ -473,7 +473,7 @@ if( not exists $channels->{$channel_id} ) { print STDERR "Unknown channel $channel_id." . - " See http://xmltv.xmltv.se" . + " See https://xmltv.xmltv.se" . " for a list of available channels or run" . " tv_grab_eu_xmltvse --configure to reconfigure.\n"; next; diff -Nru xmltv-0.6.3/grab/fi/fi/programme.pm xmltv-1.0.0/grab/fi/fi/programme.pm --- xmltv-0.6.3/grab/fi/fi/programme.pm 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/grab/fi/fi/programme.pm 2021-02-09 10:49:46.000000000 +0000 @@ -160,7 +160,7 @@ # Programme post-processing # # Parental level removal (catch also the duplicates) - $title =~ s/(?:\s+\((?:S|T|K?7|K?9|K?12|K?16|K?18)\))+\s*$// + $title =~ s/(?:\s+\(\s*(?:S|T|K?7|K?9|K?12|K?16|K?18)\s*\))+\s*$// if $title_strip_parental; # # Title mapping diff -Nru xmltv-0.6.3/grab/fi/fi/source/ampparit.pm xmltv-1.0.0/grab/fi/fi/source/ampparit.pm --- xmltv-0.6.3/grab/fi/fi/source/ampparit.pm 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/grab/fi/fi/source/ampparit.pm 2021-02-09 10:49:46.000000000 +0000 @@ -135,13 +135,16 @@ if (my($hour, $minute) = $start->as_text() =~ /^(\d{2})[:.](\d{2})$/) { $title = $title->as_text(); - $desc = $desc->as_text(); - debug(3, "List entry ${id} ($hour:$minute) $title"); - debug(4, $desc) if $desc; + if (length($title)) { + $desc = $desc->as_text(); - my $object = appendProgramme($opaque, $hour, $minute, $title); - $object->description($desc); + debug(3, "List entry ${id} ($hour:$minute) $title"); + debug(4, $desc) if $desc; + + my $object = appendProgramme($opaque, $hour, $minute, $title); + $object->description($desc); + } } } } diff -Nru xmltv-0.6.3/grab/fi/fi/source/foxtv.pm xmltv-1.0.0/grab/fi/fi/source/foxtv.pm --- xmltv-0.6.3/grab/fi/fi/source/foxtv.pm 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/grab/fi/fi/source/foxtv.pm 2021-02-09 10:49:46.000000000 +0000 @@ -20,7 +20,7 @@ fi::programmeStartOnly->import(); # Cleanup filter regexes -my $cleanup_match = qr!\s*(?:(?:\d+\.\s+)?(?:Kausi|Jakso|Osa)\.?(?:\s+(:?\d+/)?\d+\.\s+)?){1,2}!i; +our $cleanup_match = qr!\s*(?:(?:\d+\.\s+)?(?:Kausi|Jakso|Osa)\.?(?:\s+(:?\d+/)?\d+\.\s+)?){1,2}!i; # Description sub description { 'foxtv.fi' } diff -Nru xmltv-0.6.3/grab/fi/fi/source/iltapulu.pm xmltv-1.0.0/grab/fi/fi/source/iltapulu.pm --- xmltv-0.6.3/grab/fi/fi/source/iltapulu.pm 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/grab/fi/fi/source/iltapulu.pm 2021-02-09 10:49:46.000000000 +0000 @@ -23,6 +23,20 @@ # Import from internal modules fi::common->import(); +fi::programmeStartOnly->import(); + +# Category mapping +our %categories = ( + e => "elokuvat", + f => "fakta", + kf => "kotimainen fiktio", + l => "lapsi", + nan => undef, # ??? e.g. "Astral TV" + u => "uutiset", + ur => "urheilu", + us => "ulkomaiset sarjat", + vm => "viihde", # "ja musiiki"??? +); # Description sub description { 'iltapulu.fi' } @@ -32,33 +46,38 @@ my %channels; # Fetch & parse HTML - my $root = fetchTree("https://www.iltapulu.fi/?&all=1"); + my $root = fetchTree("https://www.iltapulu.fi/kaikki-kanavat", + undef, undef, 1); if ($root) { # - # Channel list can be found in table rows + # Channel list can be found in sections # - # - # - # - # - # + #
+ #
+ #
+ # + # + # # ... - #
- # - # ... - #
......
- # ... - # - if (my @tables = $root->look_down("class" => "channel-row")) { - foreach my $table (@tables) { - if (my @cells = $table->look_down("class" => "channel-name")) { - foreach my $cell (@cells) { - if (my $image = $cell->find("img")) { + # + # ... + # + # + # + if (my $table = $root->look_down("id" => "programtable")) { + if (my @sections = $table->look_down("_tag" => "section", + "id" => qr/^channel-\d+$/)) { + foreach my $section (@sections) { + if (my $header = $section->look_down("class" => "channel-logo")) { + if (my $image = $header->find("img")) { my $name = $image->attr("alt"); - $name =~ s/\s+tv-ohjelmat$//; + $name =~ s/\s+tv-ohjelmat.*$//; if (defined($name) && length($name)) { - my $channel_id = (scalar(keys %channels) + 1) . ".iltapulu.fi"; + my($channel_id) = $section->attr("id") =~ /(\d+)$/; + $channel_id .= ".iltapulu.fi"; debug(3, "channel '$name' ($channel_id)"); $channels{$channel_id} = "fi $name"; } @@ -84,98 +103,65 @@ return unless my($channel) = ($id =~ /^([-\w]+)\.iltapulu\.fi$/); # Fetch & parse HTML - my $root = fetchTree("https://www.iltapulu.fi/?all=1&date=" . $today->ymdd()); + my $root = fetchTree("https://www.iltapulu.fi/" . $today->ymdd(), + undef, undef, 1); if ($root) { - my $count = 0; - my @objects; + my $opaque = startProgrammeList($id, "fi"); # - # Programme data is contained inside a div class="" + # Programme data is contained inside a li class="g-" # - # - # - # - # - # - # ... - # - # - # - # ... - # - # ... - # - #
......
- #
- #
- # - # - # - # - # - #
00.15 - # - # Uutisikkuna - # - #
- #
- #
+ #
+ #
+ #
+ # + # + # + #
- # ... - # - if (my @tables = $root->look_down("class" => "channel-row")) { - - TABLES: - foreach my $table (@tables) { - if (my @cells = $table->look_down("class" => "channel-name")) { - - # Channel in this table? - my $index = $channel - $count - 1; - $count += @cells; - if ($channel <= $count) { - - # Extract from each row the div's from the same index - my @divs; - if (my @rows = $table->look_down("_tag" => "tr", - "class" => qr/full-row/)) { - foreach my $row (@rows) { - my $children = $row->content_array_ref; - if ($children) { - my $td = $children->[$index]; - push(@divs, $td->look_down("class" => qr/full-row/)) - if defined($td); - } - } - } - - for my $div (@divs) { - my $start = $div->attr("data-starttime"); - my $end = $div->attr("data-endtime"); - my $link = $div->look_down("class" => qr/program-open/); - - if ($start && $end && $link) { - my $title = $link->as_text(); - - if (length($title)) { - my $desc = $link->attr("title"); - my $category = ($link->parent()->attr("class") =~ /movie/) ? "elokuvat" : undef; + # + #
    + # + if (my $table = $root->look_down("id" => "programtable")) { + if (my $section = $table->look_down("_tag" => "section", + "id" => qr/^channel-${channel}/)) { + if (my @entries = $section->look_down("_tag" => "li")) { + foreach my $entry (@entries) { + my $start = $entry->look_down("_tag" => "time"); + my $link = $entry->look_down("class" => "op"); + + if ($start && $link) { + if (my($hour, $minute) = + $start->as_text() =~ /^(\d{2})[:.](\d{2})$/) { + my $title = $link->as_text(); + + if (length($title)) { + my $desc = $link->attr("title"); + my($category) = ($entry->attr("class") =~ /g-(\w+)$/); + $category = $categories{$category} if $category; - debug(3, "List entry ${id} ($start -> $end) $title"); + debug(3, "List entry ${id} ($hour:$minute) $title"); debug(4, $desc) if $desc; debug(4, $category) if defined $category; - # Create program object - my $object = fi::programme->new($id, "fi", $title, $start, $end); - $object->category($category); + my $object = appendProgramme($opaque, $hour, $minute, $title); $object->description($desc); - push(@objects, $object); + $object->category($category); } } } - - # skip the rest of the data - last TABLES; } } } @@ -184,10 +170,11 @@ # Done with the HTML tree $root->delete(); - # Fix overlapping programmes - fi::programme->fixOverlaps(\@objects); - - return(\@objects); + # Convert list to program objects + # + # First entry always starts on $yesteday + # Last entry always ends on $tomorrow. + return(convertProgrammeList($opaque, $yesterday, $today, $tomorrow)); } return; diff -Nru xmltv-0.6.3/grab/fi/fi/source/telkku.pm xmltv-1.0.0/grab/fi/fi/source/telkku.pm --- xmltv-0.6.3/grab/fi/fi/source/telkku.pm 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/grab/fi/fi/source/telkku.pm 2021-02-09 10:49:46.000000000 +0000 @@ -23,7 +23,7 @@ # Description sub description { 'telkku.com' } -my %categories = ( +our %categories = ( SPORTS => "urheilu", MOVIE => "elokuvat", ); @@ -52,7 +52,7 @@ } # cache for group name to API ID mapping -my %group2id; +our %group2id; # Grab channel list sub channels { diff -Nru xmltv-0.6.3/grab/fi/fi/source/telsu.pm xmltv-1.0.0/grab/fi/fi/source/telsu.pm --- xmltv-0.6.3/grab/fi/fi/source/telsu.pm 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/grab/fi/fi/source/telsu.pm 2021-02-09 10:49:46.000000000 +0000 @@ -118,31 +118,34 @@ if (my($new, $start_h, $start_m, $end_h, $end_m) = $time->as_text() =~ /^(.+)\s(\d{2})[:.](\d{2})\s-\s(\d{2})[:.](\d{2})/) { $title = $title->as_text(); - $desc = $desc->as_text(); - # Detect day change - if ($new ne $current) { - $current = $new; - shift(@offsets); - } - my $start = timeToEpoch($offsets[0], $start_h, $start_m); - my $end = timeToEpoch($offsets[0], $end_h, $end_m); + if (length($title)) { + $desc = $desc->as_text(); - # Detect end time on next day - if ($end < $start) { - # Are there enough day offsets left to handle a day change? - # No -> more programmes than we asked for, exit loop - last if @offsets < 2; - $end = timeToEpoch($offsets[1], $end_h, $end_m); - } + # Detect day change + if ($new ne $current) { + $current = $new; + shift(@offsets); + } + my $start = timeToEpoch($offsets[0], $start_h, $start_m); + my $end = timeToEpoch($offsets[0], $end_h, $end_m); - debug(3, "List entry ${id} ($start -> $end) $title"); - debug(4, $desc) if $desc; + # Detect end time on next day + if ($end < $start) { + # Are there enough day offsets left to handle a day change? + # No -> more programmes than we asked for, exit loop + last if @offsets < 2; + $end = timeToEpoch($offsets[1], $end_h, $end_m); + } - # Create program object - my $object = fi::programme->new($id, "fi", $title, $start, $end); - $object->description($desc); - push(@objects, $object); + debug(3, "List entry ${id} ($start -> $end) $title"); + debug(4, $desc) if $desc; + + # Create program object + my $object = fi::programme->new($id, "fi", $title, $start, $end); + $object->description($desc); + push(@objects, $object); + } } } } diff -Nru xmltv-0.6.3/grab/fi/fi/source/yle.pm xmltv-1.0.0/grab/fi/fi/source/yle.pm --- xmltv-0.6.3/grab/fi/fi/source/yle.pm 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/grab/fi/fi/source/yle.pm 2021-02-09 10:49:46.000000000 +0000 @@ -22,7 +22,7 @@ # Description sub description { 'yle.fi' } -my %languages = ( +our %languages = ( "fi" => [ "areena", "opas" ], "sv" => [ "arenan", "guide" ], ); diff -Nru xmltv-0.6.3/grab/fi/test.conf xmltv-1.0.0/grab/fi/test.conf --- xmltv-0.6.3/grab/fi/test.conf 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/grab/fi/test.conf 2021-02-09 10:49:46.000000000 +0000 @@ -14,63 +14,55 @@ # NOTE: ##channel are those channels that should not be unmasked during testing # #channel 10.iltapulu.fi AVA -#channel 11.iltapulu.fi TV5 -#channel 12.iltapulu.fi Kutonen -#channel 13.iltapulu.fi Hero -##channel 14.iltapulu.fi FRII -##channel 15.iltapulu.fi TLC -##channel 16.iltapulu.fi National Geographic -##channel 17.iltapulu.fi MTV -##channel 18.iltapulu.fi Discovery Channel -##channel 19.iltapulu.fi Disney Channel +#channel 11.iltapulu.fi Yle Teema & Fem +##channel 12.iltapulu.fi C More Juniori +#channel 13.iltapulu.fi Liv +##channel 17.iltapulu.fi C More Max channel 1.iltapulu.fi YLE TV1 -##channel 20.iltapulu.fi Eurosport -##channel 21.iltapulu.fi Eurosport 2 -##channel 22.iltapulu.fi C More Max -##channel 23.iltapulu.fi C More Juniori -##channel 24.iltapulu.fi C More Sport 1 -##channel 25.iltapulu.fi C More Sport 2 -##channel 26.iltapulu.fi C More First -##channel 27.iltapulu.fi C More Series -##channel 28.iltapulu.fi C More Hits -##channel 29.iltapulu.fi C More Stars +##channel 26.iltapulu.fi C More Sport 2 +##channel 28.iltapulu.fi C More First +##channel 29.iltapulu.fi C More Hits channel 2.iltapulu.fi YLE TV2 -##channel 30.iltapulu.fi SF-kanalen -##channel 31.iltapulu.fi C More First HD -##channel 32.iltapulu.fi C More Golf HD -##channel 33.iltapulu.fi C More Hits HD -##channel 34.iltapulu.fi C More Series HD -##channel 35.iltapulu.fi Ruutu+ Lapset -##channel 36.iltapulu.fi Ruutu+ Leffat ja Sarjat -##channel 37.iltapulu.fi Ruutu+ Dokkarit -##channel 38.iltapulu.fi Ruutu+ Urheilu 1 -##channel 39.iltapulu.fi Ruutu+ Urheilu 2 +##channel 32.iltapulu.fi C More Series +##channel 34.iltapulu.fi C More Sport 1 channel 3.iltapulu.fi MTV3 -##channel 40.iltapulu.fi Viasat Sport -##channel 41.iltapulu.fi Viasat Golf -##channel 42.iltapulu.fi Viasat Hockey -##channel 43.iltapulu.fi Viasat Urheilu HD -##channel 44.iltapulu.fi Viasat Jalkapallo HD -##channel 45.iltapulu.fi Viasat Jaakiekko HD -##channel 46.iltapulu.fi Viasat Sport Premium -##channel 47.iltapulu.fi Viasat Fotboll -##channel 48.iltapulu.fi Viasat Film -##channel 49.iltapulu.fi Viasat Film Action +##channel 41.iltapulu.fi SF-kanalen +##channel 42.iltapulu.fi V Film Premiere +##channel 43.iltapulu.fi V Film Action +##channel 46.iltapulu.fi V Film Family +##channel 49.iltapulu.fi V Sport 1 #channel 4.iltapulu.fi Nelonen -##channel 50.iltapulu.fi Viasat Film Family -##channel 51.iltapulu.fi Viasat Film Comedy -##channel 52.iltapulu.fi Viasat Film Hits -##channel 53.iltapulu.fi Nickelodeon -##channel 54.iltapulu.fi TV7 -##channel 55.iltapulu.fi RTL -#channel 5.iltapulu.fi Sub -#channel 6.iltapulu.fi JIM -#channel 7.iltapulu.fi Liv +##channel 51.iltapulu.fi V Sport Golf +##channel 52.iltapulu.fi V Sport Hockey +##channel 58.iltapulu.fi Discovery Channel +##channel 59.iltapulu.fi Eurosport +#channel 5.iltapulu.fi TV5 +##channel 60.iltapulu.fi Eurosport 2 +##channel 61.iltapulu.fi MTV Finland +#channel 62.iltapulu.fi Kutonen +#channel 6.iltapulu.fi Sub +##channel 70.iltapulu.fi V Sport Urheilu HD +#channel 73.iltapulu.fi Hero +##channel 74.iltapulu.fi FRII +##channel 76.iltapulu.fi V Film Hits +##channel 77.iltapulu.fi V Sport Jalkapallo HD +##channel 78.iltapulu.fi V Sport Jaakiekko HD +##channel 79.iltapulu.fi V Sport Premium +#channel 7.iltapulu.fi JIM +##channel 80.iltapulu.fi V Sport Football +##channel 81.iltapulu.fi TLC +##channel 82.iltapulu.fi National Geographic +##channel 83.iltapulu.fi C More Stars +##channel 85.iltapulu.fi AlfaTV +##channel 86.iltapulu.fi Paramount Network Finland +##channel 87.iltapulu.fi Viaplay Urheilu +##channel 88.iltapulu.fi Cmore #channel 8.iltapulu.fi FOX -#channel 9.iltapulu.fi Yle Teema & Fem +##channel 90.iltapulu.fi Veikkaus TV +##channel 91.iltapulu.fi Ruutu ##channel alfatv.ampparit.com AlfaTV ##channel AlfaTV.fi.yle.fi AlfaTV -##channel alfatv.muut.telkku.com AlfaTV +##channel alfatv.peruskanavat.telkku.com AlfaTV ##channel AlfaTV.sv.yle.fi AlfaTV ##channel alfatv.telsu.fi AlfaTV ##channel al-jazeera.uutiset.telkku.com Al Jazeera @@ -91,19 +83,16 @@ ##channel cartoon-network.lapset.telkku.com Cartoon Network ##channel c-more-first.ampparit.com C More First ##channel cmore-first.elokuvat.telkku.com C More First -##channel cmore-first-hd.elokuvat.telkku.com C More First HD ##channel cmore_first.telsu.fi C More First -##channel cmore_golfhd.telsu.fi C More Golf HD -##channel cmore-golf-hd.urheilu.telkku.com C More Golf HD ##channel c-more-hits.ampparit.com C More Hits ##channel cmore-hits.elokuvat.telkku.com C More Hits -##channel cmore-hits-hd.elokuvat.telkku.com C More Hits HD ##channel cmore_hits.telsu.fi C More Hits ##channel c-more-juniori.ampparit.com C More Juniori +##channel cmore_max2.telsu.fi C More MAX 2 +##channel cmore-max-2.urheilu.telkku.com C More Max 2 ##channel c-more-max.ampparit.com C More MAX ##channel c-more-series.ampparit.com C More Series ##channel cmore-series.elokuvat.telkku.com C More Series -##channel cmore-series-hd.elokuvat.telkku.com C More Series HD ##channel cmore_series.telsu.fi C More Series ##channel cmore_sfkanalen.telsu.fi SF-Kanalen ##channel c-more-sport-1.ampparit.com C More Sport 1 @@ -111,19 +100,16 @@ ##channel c-more-stars.ampparit.com C More Stars ##channel cmore-stars.elokuvat.telkku.com C More Stars ##channel cmore_stars.telsu.fi C More Stars -##channel cmore-tennis.urheilu.telkku.com C More Tennis ##channel cnbc.uutiset.telkku.com CNBC ##channel cnn.telsu.fi CNN ##channel cnn.viasat-kulta.telkku.com CNN ##channel deutsche-welle.uutiset.telkku.com Deutsche Welle ##channel discovery-channel.ampparit.com Discovery Channel ##channel discovery-channel.dokumentit.telkku.com Discovery Channel -##channel discovery-hd-showcase.dokumentit.telkku.com Discovery HD Showcase ##channel discovery-science.ampparit.com Discovery Science ##channel discovery-science.dokumentit.telkku.com Discovery Science ##channel discovery.telsu.fi Discovery Channel ##channel discovery-world.ampparit.com Discovery World -##channel discovery-world.dokumentit.telkku.com Discovery World ##channel discoveryworld.telsu.fi Discovery World ##channel disney-channel.lapset.telkku.com Disney Channel ##channel disney-junior.lapset.telkku.com Disney Junior @@ -210,12 +196,7 @@ ##channel nationalgeo.telsu.fi National Geographic Channel #channel nelonen.ampparit.com Nelonen #channel Nelonen.fi.yle.fi Nelonen -##channel nelonen-maailma.ruutu.telkku.com Ruutu+ Dokkarit -##channel nelonen-nappula.lapset.telkku.com Ruutu+ Lapset #channel nelonen.peruskanavat.telkku.com Nelonen -##channel nelonen-prime.elokuvat.telkku.com Ruutu+ Leffat ja Sarjat -##channel nelonen-pro-1.urheilu.telkku.com Ruutu+ Urheilu 1 -##channel nelonen-pro-2.urheilu.telkku.com Ruutu+ Urheilu 2 #channel Nelonen.sv.yle.fi Nelonen #channel nelonen.telsu.fi Nelonen ##channel nickelodeon.lapset.telkku.com Nickelodeon @@ -223,8 +204,9 @@ ##channel nick-jr.lapset.telkku.com Nick Jr ##channel nickjr.telsu.fi Nick Jr ##channel outdoor-channel.lifestyle.telkku.com Outdoor Channel -##channel paramount-network.muut.telkku.com Paramount Network -##channel playboy-tv.lifestyle.telkku.com Playboy TV +##channel paramount-network.ampparit.com Paramount Network +##channel paramount-network.peruskanavat.telkku.com Paramount Network +##channel paramount.telsu.fi Paramount Network ##channel rtl2.telsu.fi RTL II ##channel rtl.muut.telkku.com RTL ##channel rtl.telsu.fi RTL @@ -269,59 +251,58 @@ ##channel TV-Finland.sv.yle.fi TV Finland ##channel tvfinland.telsu.fi TV Finland #channel tv-viisi.ampparit.com TV Viisi +##channel v_film_action.telsu.fi V film action +##channel v_film_family.telsu.fi V film family +##channel v_film_hits.telsu.fi V film hits +##channel v_film_premiere.telsu.fi V film premiere ##channel vh1-classic.musiikki.telkku.com VH1 Classic ##channel vh1.musiikki.telkku.com VH1 -##channel viasat_action.telsu.fi Viasat Film Action -##channel viasat-esport-tv.ampparit.com Viasat eSportsTV ##channel viasat-explore.ampparit.com Viasat Explore ##channel viasat_explore.telsu.fi Viasat Explore ##channel viasat-explore.viasat-kulta.telkku.com Viasat Explore -##channel viasat_family.telsu.fi Viasat Film Family ##channel viasat-film-action.ampparit.com Viasat Film Action ##channel viasat-film-action.elokuvat.telkku.com Viasat Film Action ##channel viasat-film.elokuvat.telkku.com Viasat Film Premiere ##channel viasat-film-family.ampparit.com Viasat Film Family ##channel viasat-film-family.elokuvat.telkku.com Viasat Film Family ##channel viasat-film-hits.ampparit.com Viasat Film Hits -#channel viasat-film-hits.viasat-kulta.telkku.com Viasat Film Hits +#channel viasat-film-hits.elokuvat.telkku.com Viasat Film Hits ##channel viasat-film-premiere.ampparit.com Viasat Film Premiere -##channel viasat-fotboll.ampparit.com Viasat Fotboll +##channel viasat-fotboll.ampparit.com Viasat Football ##channel viasat-fotboll-hd.urheilu.telkku.com Viasat Fotboll HD -##channel viasat_fotboll.telsu.fi Viasat Fotboll HD ##channel viasat-golf.ampparit.com Viasat Golf -##channel viasat_golf.telsu.fi Viasat Golf ##channel viasat-golf.urheilu.telkku.com Viasat Golf ##channel viasat-history.ampparit.com Viasat History ##channel viasat_history.telsu.fi Viasat History ##channel viasat-history.viasat-kulta.telkku.com Viasat History -##channel viasat_hits.telsu.fi Viasat Film Hits ##channel viasat-hockey.ampparit.com Viasat Hockey -##channel viasat-hockey-finland.urheilu.telkku.com Viasat Urheilu HD -##channel viasat_hockey.telsu.fi Viasat Hockey ##channel viasat-hockey.urheilu.telkku.com Viasat Hockey ##channel viasat-jaakiekko.ampparit.com Viasat Jääkiekko ##channel viasat-jaakiekko-hd.urheilu.telkku.com Viasat Jääkiekko HD -##channel viasat_jaakiekko.telsu.fi Viasat jääkiekko HD ##channel viasat-jalkapallo.ampparit.com Viasat Jalkapallo ##channel viasat-jalkapallo-hd.urheilu.telkku.com Viasat Jalkapallo HD -##channel viasat_jalkapallo.telsu.fi Viasat jalkapallo HD ##channel viasat-nature.ampparit.com Viasat Nature ##channel viasat-nature-crime.viasat-kulta.telkku.com Viasat Nature/Crime ##channel viasat_nature.telsu.fi Viasat Nature -##channel viasat_premiere.telsu.fi Viasat Film Premiere -##channel viasat-sport.ampparit.com Viasat Sport -##channel viasat_sport_fi.telsu.fi Viasat Urheilu HD +##channel viasat-sport.ampparit.com Viasat Sport1 ##channel viasat-sport-premium.ampparit.com Viasat Sport Premium ##channel viasat-sport-premium-hd.urheilu.telkku.com Viasat Sport Premium HD -##channel viasat_sport_premium.telsu.fi Viasat Sport Premium HD -##channel viasat_sport.telsu.fi Viasat Sport ##channel viasat-sport.urheilu.telkku.com Viasat Sport -##channel viasat_sport_xtra.telsu.fi Viasat Sport Xtra ##channel viasat-ultra.ampparit.com Viasat Ultra HD ##channel viasat-ultra-hd.viasat-kulta.telkku.com Viasat Ultra HD ##channel viasat-urheilu.ampparit.com Viasat Urheilu +##channel viasat-urheilu-hd.urheilu.telkku.com Viasat Urheilu HD #channel viisi.telsu.fi TV5 ##channel viron-etv.muut.telkku.com Viron ETV +##channel v_sport_fi.telsu.fi V sport urheilu +##channel v_sport_football.telsu.fi V sport football +##channel v_sport_golf.telsu.fi V sport golf +##channel v_sport_hockey.telsu.fi V sport hockey +##channel v_sport_jaakiekko.telsu.fi V sport jääkiekko +##channel v_sport_jalkapallo.telsu.fi V sport jalkapallo +##channel v_sport_premium.telsu.fi V sport premium +##channel v_sport.telsu.fi V sport 1 +##channel v_sport_ultra.telsu.fi V sport ultra HD channel yle1.telsu.fi Yle TV1 channel yle2.telsu.fi Yle TV2 ##channel Yle-Areena.fi.yle.fi Yle Areena diff -Nru xmltv-0.6.3/grab/it/test.conf xmltv-1.0.0/grab/it/test.conf --- xmltv-0.6.3/grab/it/test.conf 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/grab/it/test.conf 2021-02-09 10:49:46.000000000 +0000 @@ -1,6 +1,3 @@ -channel www.canale5.com # Canale 5 +channel www.raiuno.rai.it # Rai 1 channel www.raidue.rai.it # Rai 2 channel www.raitre.rai.it # Rai 3 -channel www.raiuno.rai.it # Rai Uno -channel mtvmusic.guidatv.sky.it # MTV Music -channel www.la7.it # LA7 diff -Nru xmltv-0.6.3/grab/na_dtv/tv_grab_na_dtv xmltv-1.0.0/grab/na_dtv/tv_grab_na_dtv --- xmltv-0.6.3/grab/na_dtv/tv_grab_na_dtv 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/grab/na_dtv/tv_grab_na_dtv 2021-02-09 10:49:46.000000000 +0000 @@ -141,6 +141,7 @@ my $queue_filename = "$TMP_FILEBASE" . "q"; my $SITEBASE = "http://www.directv.com"; +my $SITEBASESSL = "https://www.directv.com"; # URL for grabbing channel list my $CHANNEL_LIST_URL = "$SITEBASE/json/channels"; @@ -149,7 +150,7 @@ my $SCHEDULE_URL = "$SITEBASE/json/channelschedule"; # Each program ID will be appended to this URL to get its details. -my $DETAILS_URL = "$SITEBASE/json/program/flip"; +my $DETAILS_URL = "$SITEBASESSL/json/program/flip"; my $XML_PRELUDE = '' . "\n" @@ -323,8 +324,8 @@ if $conf->{proxy}->[0]; if ($DEBUG && $VERBOSE) { - $ua->add_handler("request_send", sub { print "Request:\n"; shift->dump; return }); - $ua->add_handler("response_done", sub { print "Response:\n"; shift->dump; return }); + $ua->add_handler("request_send", sub { print STDERR "Request:\n" . shift->dump; return }); + $ua->add_handler("response_done", sub { print STDERR "Response:\n" . shift->dump; return }); } return $ua; diff -Nru xmltv-0.6.3/grab/pt_vodafone/test.conf xmltv-1.0.0/grab/pt_vodafone/test.conf --- xmltv-0.6.3/grab/pt_vodafone/test.conf 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/grab/pt_vodafone/test.conf 2021-02-09 10:49:46.000000000 +0000 @@ -37,12 +37,12 @@ channel!EURSPHD channel!EURSP2 channel!EURSP2 HD -channel!ES1 HD -channel!ES2 HD -channel!ES3 HD -channel!ES4 HD -channel!ES5 HD -channel!ES6 HD +channel!Eleven 1 +channel!Eleven 2 +channel!Eleven 3 +channel!Eleven 4 +channel!Eleven 5 +channel!Eleven 6 channel!FuelTVHD channel!PFC channel!DISNEY @@ -53,15 +53,16 @@ channel!BABYTV channel!SIC K channel!BOOMERANG +channel!NICK channel!SUPER RTL -channel!TVC 1 -channel!TVC 2 -channel!TVC 3 -channel!TVC 4 -channel!TVC1 HD -channel!TVC2 HD -channel!TVC 3 HD -channel!TVC 4 HD +channel!TVC Top +channel!TVC Edition +channel!TVC Emotion +channel!TVC Action +channel!TVC Top HD +channel!TVC Edition HD +channel!TVC Emotion HD +channel!TVC Action HD channel!TVC S HD channel!TVC S channel!HOLLYW HD @@ -82,8 +83,8 @@ channel!FOX Comedy HD channel!AXN W HD channel!AXN W -channel!AXN B HD -channel!AXN BLK +channel!AXN M HD +channel!AXN M channel!SyFy HD channel!SyFy SD channel!AMC HD @@ -209,6 +210,7 @@ channel!SIC R HD channel!SIC K HD channel!SPTV 4K +channel!ACEPI channel!RTP1 H channel!RTP 2 HD channel!SIC H diff -Nru xmltv-0.6.3/grab/pt_vodafone/tv_grab_pt_vodafone xmltv-1.0.0/grab/pt_vodafone/tv_grab_pt_vodafone --- xmltv-0.6.3/grab/pt_vodafone/tv_grab_pt_vodafone 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/grab/pt_vodafone/tv_grab_pt_vodafone 2021-02-09 10:49:46.000000000 +0000 @@ -25,8 +25,7 @@ =head1 DESCRIPTION Output TV listings in XMLTV format for many stations available in Portugal. -This program consumes the EPG service from Vodafone at -L. +This program consumes the EPG service from L. First you must run B to choose which stations you want to receive. @@ -127,8 +126,7 @@ my $json_api = '/ott3_webapp/'; my $ua = LWP::UserAgent->new(ssl_opts => { - verify_hostname => 0, - SSL_version => 'TLSv12:!SSLv2:!SSLv3:!TLSv1:!TLSv11', + SSL_cipher_list => 'DEFAULT:!DH', }); $ua->agent("$grabber_name $grabber_version"); $ua->default_header('accept-encoding' => scalar HTTP::Message::decodable()); @@ -271,7 +269,7 @@ if ( ! $epgSource ){ die("Bad EPG download, probably channel list is outdated, rerun the grabber configure to update the list.\n" ); } - elsif ( $epgSource->{data}->@* == 0 ){ + elsif ( !$epgSource->{data} || scalar @{$epgSource->{data}} == 0 ){ print( STDERR " Empty EPG download for ".$channel.", probably channel list is outdated or no API data for that channel\n" . " Rerun the grabber configure to update the list or check for the channel EPG in the Vodafone app.\n" ); next; @@ -348,6 +346,8 @@ $url = $json_baseurl . $json_api . $path; } + print( STDERR "json_request(" . $method . ") url: " . $url . "\n" ) if( $opt->{debug} ); + my @params; push(@params, content_type => 'application/x-www-form-urlencoded; charset=UTF-8'); push(@params, content => $content) if defined $content; diff -Nru xmltv-0.6.3/grab/se_swedb/test.conf xmltv-1.0.0/grab/se_swedb/test.conf --- xmltv-0.6.3/grab/se_swedb/test.conf 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/grab/se_swedb/test.conf 1970-01-01 00:00:00.000000000 +0000 @@ -1,100 +0,0 @@ -root-url=http://xmltv.tvsajten.com/xmltv/channels.xml.gz -cachedir=/tmp/.xmltv/cache -channel!14hd.viasat.se -channel!action.cmore.se -channel!action.viasat.se -channel!axess.se -channel!classic.viasat.se -channel!comedy.viasat.se -channel!dance.mtv.se -channel!disneychannel.se -channel=dr1.dr.dk -channel!dr2.dr.dk -channel!drama.viasat.se -channel!elva.tv -channel!emotion.cmore.se -channel!eurosport.com -channel!eurosport2.eurosport.com -channel!explorer.viasat.se -channel!extreme.cmore.se -channel!fakta.tv4.se -channel!family.viasat.se -channel!film.tv4.se -channel!film.viasat.se -channel!first.cmore.se -channel!firsthd.cmore.se -channel!fotboll-hockey-kids.cmore.se -channel!fotboll.cmore.se -channel!fotboll.viasat.se -channel!fotbollhd.viasat.se -channel!fxl.tv4.se -channel!golf.viasat.se -channel!guld.tv4.se -channel!hd.animalplanet.discovery.com -channel!hd.dr.dk -channel!hd.ngcsverige.com -channel!hdshowcase.discovery.com -channel!history.viasat.se -channel!hits.cmore.se -channel!hits.mtv.se -channel!hitshd.cmore.se -channel!hockey.cmore.se -channel!hockey.viasat.se -channel!investigation.discovery.com -channel!jr.nickelodeon.se -channel!kanal5.se -channel!kanal9.se -channel!kids.cmore.se -channel!komedi.tv4.se -channel!kunskapskanalen.svt.se -channel!live.cmore.se -channel!live2.cmore.se -channel!live3.cmore.se -channel!live4.cmore.se -channel!livehd.cmore.se -channel!motor.viasat.se -channel!motorhd.viasat.se -channel!mtv.se -channel!nature.viasat.se -channel!ngcsverige.com -channel!nickelodeon.se -channel!nordic.animalplanet.discovery.com -channel!nordic.discovery.com -channel!nordic.science.discovery.com -channel!nordic.viasat.se -channel!p1.sr.se -channel!p2.sr.se -channel!p3.sr.se -channel!playhouse.disneychannel.se -channel!premierleaguehd.viasat.se -channel!rocks.mtv.se -channel!se.comedycentral.tv -channel!series.cmore.se -channel!serieshd.cmore.se -channel!sf.cmore.se -channel!showcasehd.discovery.com -channel!sjuan.tv4.se -channel!sport-hd.cmore.se -channel!sport.cmore.se -channel!sport.tv4.se -channel!sport.viasat.se -channel!sport1-sf.cmore.se -channel!sporthd.cmore.se -channel=svt1.svt.se -channel!svt2.svt.se -channel!svt24.svt.se -channel!svtb.svt.se -channel!tennis.cmore.se -channel!tlc.discovery.com -channel!tnt7.se -channel!tv10.viasat.se -channel!tv12.tv4.se -channel!tv3.viasat.se -channel!tv4.se -channel!tv6.viasat.se -channel!tv8.viasat.se -channel!vh1.com -channel!world.discovery.com -channel!world.svt.se -channel!xd.disneychannel.se -channel!xtra.viasat.se diff -Nru xmltv-0.6.3/grab/se_swedb/tv_grab_se_swedb.in xmltv-1.0.0/grab/se_swedb/tv_grab_se_swedb.in --- xmltv-0.6.3/grab/se_swedb/tv_grab_se_swedb.in 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/grab/se_swedb/tv_grab_se_swedb.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,478 +0,0 @@ -#!/usr/bin/perl -w - -=pod - -=head1 NAME - -@@name - Grab TV listings for @@country. - -=head1 SYNOPSIS - -@@name --help - -@@name --configure [--config-file FILE] [--gui OPTION] - -@@name [--config-file FILE] -@@nspc [--days N] [--offset N] -@@nspc [--output FILE] [--quiet] [--debug] - -@@name --list-channels [--config-file FILE] -@@nspc [--output FILE] [--quiet] [--debug] - - -=head1 DESCRIPTION - -Output TV and listings in XMLTV format for many stations -available in @@country. - -First you must run B<@@name --configure> to choose which stations -you want to receive. - -Then running B<@@name> with no arguments will get a listings for -the stations you chose for five days including today. - -=head1 OPTIONS - -B<--configure> Prompt for which stations to download and write the -configuration file. - -B<--config-file FILE> Set the name of the configuration file, the -default is B<~/.xmltv/@@name.conf>. This is the file written by -B<--configure> and read when grabbing. - -B<--gui OPTION> Use this option to enable a graphical interface to be used. -OPTION may be 'Tk', or left blank for the best available choice. -Additional allowed values of OPTION are 'Term' for normal terminal output -(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar. - -B<--output FILE> When grabbing, write output to FILE rather than -standard output. - -B<--days N> When grabbing, grab N days rather than 5. - -B<--offset N> Start grabbing at today + N days. N may be negative. - -B<--quiet> Suppress the progress-bar normally shown on standard error. - -B<--debug> Provide more information on progress to stderr to help in -debugging. - -B<--list-channels> Output a list of all channels that data is available - for. The list is in xmltv-format. - -B<--version> Show the version of the grabber. - -B<--help> Print a help message and exit. - -=head1 ERROR HANDLING - -If the grabber fails to download data for some channel on a specific day, -it will print an errormessage to STDERR and then continue with the other -channels and days. The grabber will exit with a status code of 1 to indicate -that the data is incomplete. - -=head1 ENVIRONMENT VARIABLES - -The environment variable HOME can be set to change where configuration -files are stored. All configuration is stored in $HOME/.xmltv/. On Windows, -it might be necessary to set HOME to a path without spaces in it. - -=head1 SUPPORTED CHANNELS - -For information on supported channels, see @@site - -=head1 AUTHOR - -Mattias Holmlund, mattias -at- holmlund -dot- se. This documentation -and parts of the code copied from tv_grab_uk by -Ed Avis, ed -at- membled -dot- com. - -=head1 BUGS - -=cut - -use strict; - -use XMLTV; -use XMLTV::ProgressBar; -use XMLTV::Options qw/ParseOptions/; -use XMLTV::Configure::Writer; - -use XML::LibXML; -use Date::Manip; -use Compress::Zlib; -use File::Path; -use File::Basename; -use IO::Scalar; -use LWP; - -my $ua; -$ua = LWP::UserAgent->new(); -$ua->agent("xmltv/$XMLTV::VERSION"); -$ua->env_proxy(); - -use HTTP::Cache::Transparent; - -# Although we use HTTP::Cache::Transparent, this undocumented --cache -# option for debugging is still useful since it will _always_ use a -# cached copy of a page, without contacting the server at all. -# -use XMLTV::Memoize; XMLTV::Memoize::check_argv('getuncompressed'); - -sub t; - -my $default_root_url = '@@url'; -my $default_cachedir = get_default_cachedir(); - -my( $opt, $conf ) = ParseOptions( { - grabber_name => "@@name", - capabilities => [qw/baseline manualconfig tkconfig apiconfig cache/], - stage_sub => \&config_stage, - listchannels_sub => \&list_channels, - load_old_config_sub => \&load_old_config, - version => "$XMLTV::VERSION", - description => "@@desc", - -} ); - -if (not defined( $conf->{cachedir} )) { - print STDERR "No cachedir defined in configfile " . - $opt->{'config-file'} . "\n" . - "Please run the grabber with --configure.\n"; - exit 1; -} - -if (not defined( $conf->{'root-url'} )) { - print STDERR "No root-url defined in configfile " . - $opt->{'config-file'} . "\n" . - "Please run the grabber with --configure.\n"; - exit 1; -} - -if (not defined( $conf->{'channel'} )) { - print STDERR "No channels selected in configfile " . - $opt->{'config-file'} . "\n" . - "Please run the grabber with --configure.\n"; - exit 1; -} - -init_cachedir( $conf->{cachedir}->[0] ); -HTTP::Cache::Transparent::init( { - BasePath => $conf->{cachedir}->[0], - NoUpdate => 15*60, - Verbose => $opt->{debug}, - } ); - -binmode (STDOUT); - -my($xmldecl, $channels) = load_channels( $conf->{'root-url'}->[0] ); - -my( $odoc, $root ); -my $warnings = 0; - -write_header( $xmldecl ); - -write_channel_list( $conf->{channel} ); - -my $now = ParseDate( 'now' ); -my $date =$now; -$date = DateCalc( $now, "+$opt->{offset} days" ) - if( $opt->{offset} ); - -my $bar = undef; -$bar = new XMLTV::ProgressBar( { - name => 'downloading listings', - count => $opt->{days} * @{$conf->{channel}}, - }) if (not $opt->{quiet}) && (not $opt->{debug}); - -for( my $i=0; $i < $opt->{days}; $i++ ) -{ - t "Date: $date"; - foreach my $channel_id (@{$conf->{channel}}) - { - # We have already warned the user if the channel doesn't exist. - if( exists $channels->{$channel_id} ) - { - t " $channel_id"; - my( $channel_name, $url ) = @{$channels->{$channel_id}}; - print_data( $url, $channel_id, $date ) - or warning( "Failed to download data for $channel_id on " . - UnixDate( $date, "%Y-%m-%d" ) . "." ); - } - $bar->update() if defined( $bar ); - } - $date = DateCalc( $date, "+1 days" ); -} - -$bar->finish() if defined $bar; - -write_footer(); - -# Signal that something went wrong if there were warnings. -exit(1) if $warnings; - -# All data fetched ok. -t "Exiting without warnings."; -exit(0); - -sub t -{ - my( $message ) = @_; - print STDERR $message . "\n" if $opt->{debug}; -} - -sub warning -{ - my( $message ) = @_; - print STDERR $message . "\n"; - $warnings++; -} - -sub list_channels -{ - my( $conf, $opt ) = @_; - - ( $xmldecl, $channels ) = load_channels( $conf->{'root-url'}->[0] ); - - my $result=""; - my $fh = new IO::Scalar \$result; - my $oldfh = select( $fh ); - write_header( $xmldecl ); - write_channel_list( [sort keys %{$channels}] ); - write_footer(); - select( $oldfh ); - $fh->close(); - - return $result; -} - -sub config_stage -{ - my( $stage, $conf ) = @_; - - die "Unknown stage $stage" if $stage ne "start"; - - my $result; - my $writer = new XMLTV::Configure::Writer( OUTPUT => \$result, - encoding => 'iso-8859-1' ); - $writer->start( { grabber => '@@name' } ); - $writer->write_string( { - id => 'root-url', - title => [ [ 'Root URL for grabbing data', 'en' ] ], - description => [ - [ 'The file at this URL describes which channels are available and ' . - 'where data can be found for them. ', 'en' ] ], - default => $default_root_url, - } ); - $writer->write_string( { - id => 'cachedir', - title => [ [ 'Directory to store the cache in', 'en' ] ], - description => [ - [ '@@name uses a cache with files that it has already '. - 'downloaded. Please specify where the cache shall be stored. ', - 'en' ] ], - default => $default_cachedir, - } ); - - $writer->end( 'select-channels' ); - - return $result; -} - -# -# Load a configuration file in the old format. -# - -sub load_old_config -{ - my( $config_file ) = @_; - - my @lines = XMLTV::Config_file::read_lines( $config_file ); - - my $conf = {}; - $conf->{cachedir}->[0] = $default_cachedir; - $conf->{'root-url'}->[0] = $default_root_url; - $conf->{channel} = []; - - foreach my $line (@lines) - { - next unless defined $line; - - my( $command, $param ) = split( /\s+/, $line, 2 ); - $param =~ tr/\n\r//d; - $param =~ s/\s+$//; - - if ( $command =~ /^\s*root-url\s*$/) { - $conf->{'root-url'}->[0] = $param; - } elsif ( $command =~ /^\s*channel\s*$/) { - push @{$conf->{channel}}, $param; - } elsif ( $command eq 'cache-dir' ) { - $conf->{'cachedir'}->[0] = $param; - } else { - die "Unknown command $command in config-file $config_file" - } - } - - return $conf; -} - -sub get_default_cachedir -{ - my $winhome = $ENV{HOMEDRIVE} . $ENV{HOMEPATH} - if defined( $ENV{HOMEDRIVE} ) - and defined( $ENV{HOMEPATH} ); - - my $home = $ENV{HOME} || $winhome || "."; - return "$home/.xmltv/cache"; -} - -sub init_cachedir -{ - my( $path ) = @_; - if( not -d $path ) - { - mkpath( $path ) or die "Failed to create cache-directory $path: $@"; - } -} - -sub load_channels -{ - my( $url ) = @_; - - my %channels; - - my $xmldata = getuncompressed( $url ); - - defined( $xmldata ) or die "Failed to fetch $url"; - - my $xml = XML::LibXML->new; - - my $doc = $xml->parse_string($xmldata); - - my $xmldecl = "version() . "' " . - "encoding='" . $doc->encoding() . "'?>\n"; - - my $ns = $doc->find( "//channel" ); - - foreach my $node ($ns->get_nodelist) - { - my $id = $node->findvalue( '@id' ); - my $name = $node->findvalue( 'display-name[1]' ); - my $url = $node->findvalue( 'base-url' ); - my $urlns = $node->find( './base-url' ); - foreach my $urlnode ($urlns->get_nodelist) - { - $node->removeChild( $urlnode ); - } - $channels{$id} = [ $name, $url, $node->toString(0, 1) ]; - } - - return ($xmldecl, \%channels); -} - -sub print_data -{ - my( $rooturl, $channel_id, $date ) = @_; - - my $url = $rooturl . $channel_id . "_" . UnixDate( $date, "%Y-%m-%d" ) . - ".xml.gz"; - - my $xmldata = getuncompressed( $url ); - - defined $xmldata or return 0; - - my $in = new IO::Scalar \$xmldata; - while( my $line = $in->getline() ) - { - last if $line =~ /getline() ) - { - last if $line =~ /<\/tv>/; - print $line; - } - - return 1; -} - -sub write_header -{ - my( $xmldecl ) = @_; - - # Use the same xml declaration as the one in - # channels.xml - print $xmldecl; - print '' . "\n"; - print "\n"; -} - -sub write_channel_list -{ - my( $channel_list ) = @_; - - # Write list of channels. - t 'Writing list of channels.'; - - foreach my $channel_id (@{$channel_list}) - { - if( not exists $channels->{$channel_id} ) - { - print STDERR "Unknown channel $channel_id." . - " See @@site" . - " for a list of available channels or run" . - " @@name --configure to reconfigure.\n"; - next; - } - - my( $channel_name, $url, $def ) = @{$channels->{$channel_id}}; - print " $def\n"; - } -} - -sub write_footer -{ - print "\n"; -} - -sub getuncompressed { - my( $url ) = @_; - - my $response = $ua->get($url); - - return undef - unless $response->is_success; - - my $compressed = $response->content - or return undef; - - # Since LWP 5.827, the result from get() is already - # uncompressed. - - my $uncompressed; - - eval { - $uncompressed = Compress::Zlib::memGunzip( \$compressed ); - }; - - $uncompressed = $compressed if not defined $uncompressed; - - return $uncompressed; -} - -### Setup indentation in Emacs -## Local Variables: -## perl-indent-level: 4 -## perl-continued-statement-offset: 4 -## perl-continued-brace-offset: 0 -## perl-brace-offset: -4 -## perl-brace-imaginary-offset: 0 -## perl-label-offset: -2 -## cperl-indent-level: 4 -## cperl-brace-offset: 0 -## cperl-continued-brace-offset: 0 -## cperl-label-offset: -2 -## cperl-extra-newline-before-brace: t -## cperl-merge-trailing-else: nil -## cperl-continued-statement-offset: 2 -## End: diff -Nru xmltv-0.6.3/grab/se_swedb/tv_grab_se_swedb.PL xmltv-1.0.0/grab/se_swedb/tv_grab_se_swedb.PL --- xmltv-0.6.3/grab/se_swedb/tv_grab_se_swedb.PL 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/grab/se_swedb/tv_grab_se_swedb.PL 1970-01-01 00:00:00.000000000 +0000 @@ -1,24 +0,0 @@ -# Generate tv_grab_se_swedb from tv_grab_se_swedb.in. This is done -# to allow grabbers for other countries to use the same code. -# - -use strict; - -use IO::File; -my $out = shift @ARGV; die "no output file given" if not defined $out; -my $in = 'grab/se_swedb/tv_grab_se_swedb.in'; -my $in_fh = new IO::File "< $in" or die "cannot read $in: $!"; -my $out_fh = new IO::File "> $out" or die "cannot write to $out: $!"; -my $seen = 0; -while (<$in_fh>) { - s/\@\@name/tv_grab_se_swedb/; - s/\@\@nspc/ /; - s/\@\@country/Sweden/; - s/\@\@desc/Sweden (swedb\/tvsajten)/; - s%\@\@url%http://xmltv.tvsajten.com/channels.xml.gz%; - s%\@\@site%http://xmltv.tvsajten.com/%; - print $out_fh $_; -} -close $out_fh or die "cannot close $out: $!"; -close $in_fh or die "cannot close $in: $!"; - diff -Nru xmltv-0.6.3/grab/uk_tvguide/tv_grab_uk_tvguide xmltv-1.0.0/grab/uk_tvguide/tv_grab_uk_tvguide --- xmltv-0.6.3/grab/uk_tvguide/tv_grab_uk_tvguide 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/grab/uk_tvguide/tv_grab_uk_tvguide 2021-02-09 10:49:46.000000000 +0000 @@ -74,7 +74,7 @@ listchannels_sub => \&fetch_channels, version => $VERSION, description => $GRABBER_DESC, - extra_options => [qw/nodetailspage/], + extra_options => [qw/nodetailspage legacychannels/], }); #print Dumper($conf); exit; @@ -124,6 +124,9 @@ my $programmes = (); my $channels = (); +# Store channel names during fetch +my $channames = undef; + # Get the schedule(s) from TV Guide fetch_listings(); @@ -195,7 +198,8 @@ # If we need to map the fetched channel_id to a different value my $xmlchannel_id = $channel_id; - if (defined(&map_channel_id)) { $xmlchannel_id = map_channel_id($channel_id); } + $xmlchannel_id .= '.tvguide.co.uk' unless $opt->{legacychannels}; # make channel RFC2838 compliant + if (defined(&map_channel_id)) { $xmlchannel_id = map_channel_id($xmlchannel_id); } # Fetch the page # my $tree = XMLTV::Get_nice::get_nice_tree($url); @@ -204,18 +208,44 @@ # Scrub the page if ($tree) { - my $channelname = $tree->look_down('_tag' => 'option', 'value' => $channel_id); - + my $channelname = undef; + + # Store the channel ids in a list (do this only once per program run) + if (!defined $channames) { + #debug 'fetching options tags'; + my $choptions = $tree->look_down('_tag' => 'select', 'name' => 'ch'); + if (defined $choptions) { + my @choptionslist = $choptions->look_down('_tag' => 'option'); + if (@choptionslist) { + foreach my $choption (@choptionslist) { + $channames->{$choption->attr('value')} = $choption->as_text; + } + } + } + } + + $channelname = $channames->{$channel_id} if $channames; + + # Try a fallback method if the form options are missing [Credit mkbloke] + if (!defined $channelname) { + #debug 'using fallback method'; + my $fallback = $tree->look_down('_tag' => 'input', 'name' => 'cTime'); + $fallback = $fallback->look_up('_tag', 'tr') if $fallback; + $channelname = $fallback->look_down('_tag' => 'span', 'class' => 'programmeheading') if $fallback; + $channelname = $channelname->as_text if $channelname; + } + + #debug 'found channel name: '.$channelname; + # tvguide website can be very slow - try to avoid barfing when no response + # if no channelname then assume we got no response from website if (!defined $channelname) { warning "Unable to retrieve web page for $channel_id"; next; } - $channelname = $channelname->as_text; - # - + # my @shows = $tree->look_down('_tag' => 'table', 'border' => '0', 'cellpadding' => '0', 'style' => qr/background:\s*black;border-collapse:\s*collapse;/); if (@shows) { @@ -695,8 +725,8 @@ sub map_channel_id { # Map the fetched channel_id to a different value (e.g. our PVR needs specific channel ids) - # mapped channels should be stored in a file called tv_grab_uk_guardian.map.conf - # containing lines of the form: map==fromchan==tochan e.g. 'map==5-star==5STAR' + # mapped channels should be stored in a file called tv_grab_uk_tvguide.map.conf + # containing lines of the form: map==fromchan==tochan e.g. 'map==109==BBC4' # my ($channel_id) = @_; my $mapchannels = \%mapchannelhash; @@ -946,6 +976,10 @@ Please see B +Additional options may be specified on the commandline. +use --nodetailspage to only fetch the main details of the programme schedule. (May be useful if you have problems accessing the tvguide website.) +Channel ids were made compliant with the XMLTV specification in December 2020. Use --legacychannels to output channel ids in the previous format (i.e. number only). + =head1 INSTALLATION The file F has two purposes. Firstly you can map the channel ids used by the site into something more meaningful to your PVR. E.g. diff -Nru xmltv-0.6.3/grab/zz_sdjson_sqlite/tv_grab_zz_sdjson_sqlite xmltv-1.0.0/grab/zz_sdjson_sqlite/tv_grab_zz_sdjson_sqlite --- xmltv-0.6.3/grab/zz_sdjson_sqlite/tv_grab_zz_sdjson_sqlite 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/grab/zz_sdjson_sqlite/tv_grab_zz_sdjson_sqlite 2021-02-09 10:49:46.000000000 +0000 @@ -44,6 +44,7 @@ # # Version history: # +# 2020/09/15 - 1.102 - update for cherry-pick typo correction # 2020/06/21 - 1.101 - rename scaledownload to scale-download # 2020/06/20 - 1.100 - add support for --scaledownload # 2020/06/12 - 1.99 - include programID in metadata @@ -193,7 +194,7 @@ my $RFC2838_COMPLIANT = 1; # RFC2838 compliant station ids, which makes XMLTV # validate even though the docs say "SHOULD" not "MUST" -my $SCRIPT_VERSION = '$Id: tv_grab_zz_sdjson_sqlite,v 1.101 2020/06/21 20:30:00 gtb Exp ed $'; +my $SCRIPT_VERSION = '$Id: tv_grab_zz_sdjson_sqlite,v 1.102 2020/09/15 21:30:00 gtb Exp ed $'; my $SCRIPT_URL = 'https://github.com/garybuhrmaster/tv_grab_zz_sdjson_sqlite'; my $SCRIPT_NAME = basename("$0"); my $SCRIPT_NAME_DIR = dirname("$0"); @@ -6129,7 +6130,7 @@ B<--force-download> Deletes most existing local database data and forces a download of the data. If there is a suspicion that the -data is currupt (and not being automatically corrected), forcing +data is corrupt (and not being automatically corrected), forcing a new download might be necessary. B<--days N> When grabbing, grab N days rather than all available days. diff -Nru xmltv-0.6.3/lib/exe_opt.pl xmltv-1.0.0/lib/exe_opt.pl --- xmltv-0.6.3/lib/exe_opt.pl 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/lib/exe_opt.pl 2021-02-09 10:49:46.000000000 +0000 @@ -1,8 +1,6 @@ #!perl -w # # This is a simple script to generate options so PerlApp can make the EXE -# it needs time values, so might as well put it in a perl script! -# (windows has a limited date function) # # Robert Eden rmeden@yahoo.com @@ -11,65 +9,23 @@ # # output constants # -print '-nologo --force --add=XMLTV:: --add=Date::Manip:: --add DateTime:: --add Params::Validate::** --add Date::Language:: --add Class::MethodMaker:: --add Class::MethodMaker::Engine --add arybase --bind=libexpat-1_.dll[file=C:\strawberry\c\bin\libexpat-1_.dll,extract] --bind=libxml2-2_.dll[file=C:\strawberry\c\bin\libxml2-2_.dll,extract] --bind=libiconv-2_.dll[file=C:\strawberry\c\bin\libiconv-2_.dll,extract] --bind=liblzma-5_.dll[file=C:\strawberry\c\bin\liblzma-5_.dll,extract] --bind=zlib1_.dll[file=C:\strawberry\c\bin\zlib1_.dll,extract] --bind=libgcc_x86_470.dll[file=C:\strawberry\perl\bin\libgcc_x86_470.dll,extract] --bind=libeay32_.dll[file=C:\strawberry\c\bin\libeay32_.dll,extract] --bind=SSLeay32_.dll[file=C:\strawberry\c\bin\SSLeay32_.dll,extract] --bind DateTime/Format/Builder/Parser/Regex.pm[file=c:\Strawberry\Perl\site\lib\DateTime\Format\Builder\Parser\Regex.pm,extract] --trim=Class::MethodMaker::Scalar --trim=Class::MethodMaker::Engine --trim=JSON::PP58 --trim=Test::Builder::IO::Scalar; --trim=Win32::Console --info CompanyName="XMLTV Project http://www.xmltv.org" --info FileDescription="EXE bundle of XMLTV tools to manage TV Listings" --info InternalName=xmltv.exe --info OriginalFilename=xmltv.exe --info ProductName=xmltv --info LegalCopyright="GNU General Public License http://www.gnu.org/licenses/gpl.txt" --icon xmltv_logo.ico +print ' +-M XMLTV:: +-M Date::Manip:: +-M DateTime:: +-M Params::Validate:: +-M Date::Language:: +-M Class::MethodMaker:: +-X JSON::PP58 +-X Test::Builder::IO::Scalar +-X Win32::Console '; -# -# Add XML\Parser\encodings -# -@Encoding_Path = (grep(-d $_, - map(File::Spec->catdir($_, qw(XML Parser Encodings)), - @INC) - )); -foreach $dir (@Encoding_Path) { - opendir DIR,$dir || die "Can't open encoding path directory\n"; - while ($file = readdir DIR) - { - next unless $file =~ /.enc$/i; - print "-bind=XML/Parser/Encodings/${file}[file=$dir/${file},extract]\n"; - } +# add executable scripts +open(FILE,"exe_files.txt"); +foreach (split(/ /,)) { + chomp; + next unless $_; } +close FILE; -# -# put date in file version field -# -@date=localtime; $date[4]++; $date[5]+=1900; -printf "-info FileVersion=%4d.%d.%d.%d\n",@date[5,4,3,2]; - -# -# last fields in product version should ommitable, but it doesn't work. -# -$version=shift; -@_=split(/\./,$version); -map {$_=0 unless defined $_} @_[0..4]; -printf "-info ProductVersion=%d.%d.%d.%d\n",@_; diff -Nru xmltv-0.6.3/lib/exe_wrap.pl xmltv-1.0.0/lib/exe_wrap.pl --- xmltv-0.6.3/lib/exe_wrap.pl 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/lib/exe_wrap.pl 1970-01-01 00:00:00.000000000 +0000 @@ -1,165 +0,0 @@ -#!perl -w -# -# This is a quick XMLTV shell routing to use with the windows exe -# -# A single EXE is needed to allow sharing of modules and dlls of all the -# programs. If PerlAPP was run on each one, the total size would be more than -# 12MB, even leaving out PERL56.DLL! -# -# Perlapp allows you to attach pathed files, but you need the same path -# to access them. The Makefile creates a text file of these files which is -# used to build a translation table, allowing users to just type the app name -# and not the development path. -# -# Robert Eden rmeden@yahoo.com -# - -use File::Basename; -use Carp; - -$Carp::MaxEvalLen=40; # limit confess output - -# -# this check should not be done, at least not this way. it prevents some regular expressions! -# -## Check for error of running from 'Run' dialogue box with redirection, -## which Run doesn't understand, -## -#if (grep /[<>|]/, @ARGV) { -# warn < 12; - $tz += 24 if $tz < -12; - $tz= sprintf("%+03d00",$tz); - - $ENV{TZ}= $tz; - -} #timezone -print STDERR "Timezone is $ENV{TZ}\n" unless $opt_quiet; - - -$cmd = shift || ""; - -# --version (and abbreviations thereof) -my $VERSION = '0.6.3'; -if (index('--version', $cmd) == 0 and length $cmd >= 3) { - print "xmltv $VERSION\n"; - exit; -} - -# -# some programs use a "share" directory -# -if ($cmd eq 'tv_grab_na_dd', - or $cmd eq 'tv_grab_na_icons', - ) -{ - unless (grep(/^--share/i,@ARGV)) # don't add our --share if one supplied - { - my $dir = dirname(PerlApp::exe()); # get full program path - $dir =~ s!\\!/!g; # use / not \ -# die "EXE path contains spaces. This is known to cause problems.\nPlease move xmltv.exe to a different directory\n" if $dir =~ / /; - $dir .= "/share/xmltv"; - unless (-d $dir ) - { - die "directory $dir not found\n If not kept with the executable, specify with --share\n" - } - print STDERR "adding '--share=$dir'\n" unless $opt_quiet; - push @ARGV,"--share",$dir; - } -} - -# -# special hack, allow "exec" to execute an arbitrary script -# This will be used to allow XMLTV.EXE modules to be used on beta code w/o an alpha exe -# -# Note, no extra modules are included in the EXE. There is no guarantee this will work -# it is an unsupported hack. -# -# syntax XMLTV.EXE exec filename --options -# -if ($cmd eq 'exec') -{ - my $exe=shift; - $0=$exe; - do $exe; - print STDERR $@ if length($@); - exit 1 if length($@); - exit 0; -} - -# -# scan through attached files and execute program if found -# -$files=PerlApp::get_bound_file("exe_files.txt"); -foreach my $exe (split(/ /,$files)) -{ - next unless length($exe)>3; #ignore trash - $_=$exe; - s!^.+/!!g; - $cmds{$_}=1; # build command list (just in case) - - next unless $cmd eq $_; - -# -# execute our command -# - $0 = $_; # set $0 to our script - do $exe; - print STDERR $@ if length($@); - exit 1 if length($@); - exit 0; -} - -# -# command not found, print error -# -if ($cmd eq "" ) - { - print STDERR "you must specify the program to run\n for example: $0 tv_grab_fi --configure\n"; - } -else - { - print STDERR "$cmd is not a valid command.\n"; - } - -print STDERR "Valid commands are:\n"; -@cmds=sort keys %cmds; -$rows = int($#cmds / 3)+1; - -map {$_='' unless defined $_} @cmds[0..($rows*3+2)]; -unshift @cmds,undef; - -foreach (1..$rows) -{ - printf STDERR " %-20s %-20s %-20s\n",@cmds[$_,$rows+$_,2*$rows+$_]; -} -exit 1; - diff -Nru xmltv-0.6.3/lib/IMDB.pm xmltv-1.0.0/lib/IMDB.pm --- xmltv-0.6.3/lib/IMDB.pm 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/lib/IMDB.pm 2021-02-09 10:49:46.000000000 +0000 @@ -1,914 +1,937 @@ # The IMDB file contains two packages: # 1. XMLTV::IMDB::Cruncher package which parses and manages IMDB "lists" files -# from ftp.imdb.com +# from ftp.imdb.com # 2. XMLTV::IMDB package that uses data files from the Cruncher package to -# update/add details to XMLTV programme nodes. +# update/add details to XMLTV programme nodes. # # FUTURE - multiple hits on the same 'title only' could try and look for -# character names matching from description to imdb.com character -# names. +# character names matching from description to imdb.com character +# names. # # FUTURE - multiple hits on 'title only' should probably pick latest -# tv series over any older ones. May make for better guesses. +# tv series over any older ones. May make for better guesses. # # BUG - we identify 'presenters' by the word "Host" appearing in the character -# description. For some movies, character names include the word Host. -# ex. Animal, The (2001) has a character named "Badger Milk Host". +# description. For some movies, character names include the word Host. +# ex. Animal, The (2001) has a character named "Badger Milk Host". # # BUG - if there is a matching title with > 1 entry (say made for tv-movie and -# at tv-mini series) made in the same year (or even "close" years) it is -# possible for us to pick the wrong one we should pick the one with the -# closest year, not just the first closest match based on the result ordering -# for instance Ghost Busters was made in 1984, and into a tv series in -# 1986. if we have a list of GhostBusters 1983, we should pick the 1984 movie -# and not 1986 tv series...maybe :) but currently we'll pick the first -# returned close enough match instead of trying the closest date match of -# the approx hits. +# at tv-mini series) made in the same year (or even "close" years) it is +# possible for us to pick the wrong one we should pick the one with the +# closest year, not just the first closest match based on the result ordering +# for instance Ghost Busters was made in 1984, and into a tv series in +# 1986. if we have a list of GhostBusters 1983, we should pick the 1984 movie +# and not 1986 tv series...maybe :) but currently we'll pick the first +# returned close enough match instead of trying the closest date match of +# the approx hits. # use strict; package XMLTV::IMDB; +use Search::Dict; + use open ':encoding(iso-8859-1)'; # try to enforce file encoding (does this work in Perl <5.8.1? ) # # HISTORY # .6 = what was here for the longest time # .7 = fixed file size est calculations -# = moviedb.info now includes _file_size_uncompressed values for each downloaded file +# = moviedb.info now includes _file_size_uncompressed values for each downloaded file # .8 = updated file size est calculations -# = moviedb.dat directors and actors list no longer include repeated names (which mostly -# occured in episodic tv programs (reported by Alexy Khrabrov) +# = moviedb.dat directors and actors list no longer include repeated names (which mostly +# occured in episodic tv programs (reported by Alexy Khrabrov) # .9 = added keywords data # .10 = added plot data +# .11 = revised method for database creation to reduce memory use +# bug: remove duplicated genres +# bug: if TV-version and movie in same year then one (random) was lost +# bug: multiple films with same title in same year then one was lost +# bug: movies with (aka...) in title not handled properly +# bug: incorrect data generated for a tv series (only the last episode found is stored) +# bug: genres and cast are rolled-up from all episodes to the series record (misleading) +# bug: multiple matches can sometimes extract the first one it comes across as a 'hit' +# (potentially wrong - it should not augment incoming prog when multiple matches) +# dbbuild: --filesort to sort interim data on disc rather than in memory +# dbbuild: --nosystemsort to use File::Sort rather than operating system shell's 'sort' command +# dbbuild: --movies-only to exclude tv-series (etc.) from database build # -our $VERSION = '0.10'; # version number of database +# +our $VERSION = '0.11'; # version number of database sub new { - my ($type) = shift; - my $self={ @_ }; # remaining args become attributes + my ($type) = shift; + my $self={ @_ }; # remaining args become attributes - for ('imdbDir', 'verbose') { + for ('imdbDir', 'verbose') { die "invalid usage - no $_" if ( !defined($self->{$_})); - } - #$self->{verbose}=2; - $self->{replaceDates}=0 if ( !defined($self->{replaceDates})); - $self->{replaceTitles}=0 if ( !defined($self->{replaceTitles})); - $self->{replaceCategories}=0 if ( !defined($self->{replaceCategories})); - $self->{replaceKeywords}=0 if ( !defined($self->{replaceKeywords})); - $self->{replaceURLs}=0 if ( !defined($self->{replaceURLs})); - $self->{replaceDirectors}=1 if ( !defined($self->{replaceDirectors})); - $self->{replaceActors}=0 if ( !defined($self->{replaceActors})); - $self->{replacePresentors}=1 if ( !defined($self->{replacePresentors})); - $self->{replaceCommentators}=1 if ( !defined($self->{replaceCommentators})); - $self->{replaceStarRatings}=0 if ( !defined($self->{replaceStarRatings})); - $self->{replacePlot}=0 if ( !defined($self->{replacePlot})); - - $self->{updateDates}=1 if ( !defined($self->{updateDates})); - $self->{updateTitles}=1 if ( !defined($self->{updateTitles})); - $self->{updateCategories}=1 if ( !defined($self->{updateCategories})); - $self->{updateCategoriesWithGenres}=1 if ( !defined($self->{updateCategoriesWithGenres})); - $self->{updateKeywords}=0 if ( !defined($self->{updateKeywords})); # default is to NOT add keywords - $self->{updateURLs}=1 if ( !defined($self->{updateURLs})); - $self->{updateDirectors}=1 if ( !defined($self->{updateDirectors})); - $self->{updateActors}=1 if ( !defined($self->{updateActors})); - $self->{updatePresentors}=1 if ( !defined($self->{updatePresentors})); - $self->{updateCommentators}=1 if ( !defined($self->{updateCommentators})); - $self->{updateStarRatings}=1 if ( !defined($self->{updateStarRatings})); - $self->{updatePlot}=0 if ( !defined($self->{updatePlot})); # default is to NOT add plot - - $self->{numActors}=3 if ( !defined($self->{numActors})); # default is to add top 3 actors - - $self->{moviedbIndex}="$self->{imdbDir}/moviedb.idx"; - $self->{moviedbData}="$self->{imdbDir}/moviedb.dat"; - $self->{moviedbInfo}="$self->{imdbDir}/moviedb.info"; - $self->{moviedbOffline}="$self->{imdbDir}/moviedb.offline"; - - # default is not to cache lookups - $self->{cacheLookups}=0 if ( !defined($self->{cacheLookups}) ); - $self->{cacheLookupSize}=0 if ( !defined($self->{cacheLookupSize}) ); - - $self->{cachedLookups}->{tv_series}->{_cacheSize_}=0; - - bless($self, $type); - - $self->{categories}={'movie' =>'Movie', - 'tv_movie' =>'TV Movie', # made for tv - 'video_movie' =>'Video Movie', # went straight to video or was made for it - 'tv_series' =>'TV Series', + } + #$self->{verbose}=2; + $self->{replaceDates}=0 if ( !defined($self->{replaceDates})); + $self->{replaceTitles}=0 if ( !defined($self->{replaceTitles})); + $self->{replaceCategories}=0 if ( !defined($self->{replaceCategories})); + $self->{replaceKeywords}=0 if ( !defined($self->{replaceKeywords})); + $self->{replaceURLs}=0 if ( !defined($self->{replaceURLs})); + $self->{replaceDirectors}=1 if ( !defined($self->{replaceDirectors})); + $self->{replaceActors}=0 if ( !defined($self->{replaceActors})); + $self->{replacePresentors}=1 if ( !defined($self->{replacePresentors})); + $self->{replaceCommentators}=1 if ( !defined($self->{replaceCommentators})); + $self->{replaceStarRatings}=0 if ( !defined($self->{replaceStarRatings})); + $self->{replacePlot}=0 if ( !defined($self->{replacePlot})); + + $self->{updateDates}=1 if ( !defined($self->{updateDates})); + $self->{updateTitles}=1 if ( !defined($self->{updateTitles})); + $self->{updateCategories}=1 if ( !defined($self->{updateCategories})); + $self->{updateCategoriesWithGenres}=1 if ( !defined($self->{updateCategoriesWithGenres})); + $self->{updateKeywords}=0 if ( !defined($self->{updateKeywords})); # default is to NOT add keywords + $self->{updateURLs}=1 if ( !defined($self->{updateURLs})); + $self->{updateDirectors}=1 if ( !defined($self->{updateDirectors})); + $self->{updateActors}=1 if ( !defined($self->{updateActors})); + $self->{updatePresentors}=1 if ( !defined($self->{updatePresentors})); + $self->{updateCommentators}=1 if ( !defined($self->{updateCommentators})); + $self->{updateStarRatings}=1 if ( !defined($self->{updateStarRatings})); + $self->{updatePlot}=0 if ( !defined($self->{updatePlot})); # default is to NOT add plot + + $self->{numActors}=3 if ( !defined($self->{numActors})); # default is to add top 3 actors + + $self->{moviedbIndex}="$self->{imdbDir}/moviedb.idx"; + $self->{moviedbData}="$self->{imdbDir}/moviedb.dat"; + $self->{moviedbInfo}="$self->{imdbDir}/moviedb.info"; + $self->{moviedbOffline}="$self->{imdbDir}/moviedb.offline"; + + # default is not to cache lookups + $self->{cacheLookups}=0 if ( !defined($self->{cacheLookups}) ); + $self->{cacheLookupSize}=0 if ( !defined($self->{cacheLookupSize}) ); + + $self->{cachedLookups}->{tv_series}->{_cacheSize_}=0; + + bless($self, $type); + + $self->{categories}={'movie' =>'Movie', + 'tv_movie' =>'TV Movie', # made for tv + 'video_movie' =>'Video Movie', # went straight to video or was made for it + 'tv_series' =>'TV Series', 'tv_mini_series' =>'TV Mini Series'}; - $self->{stats}->{programCount}=0; + $self->{stats}->{programCount}=0; - for my $cat (keys %{$self->{categories}}) { - $self->{stats}->{perfect}->{$cat}=0; - $self->{stats}->{close}->{$cat}=0; - } - $self->{stats}->{perfectMatches}=0; - $self->{stats}->{closeMatches}=0; + for my $cat (keys %{$self->{categories}}) { + $self->{stats}->{perfect}->{$cat}=0; + $self->{stats}->{close}->{$cat}=0; + } + $self->{stats}->{perfectMatches}=0; + $self->{stats}->{closeMatches}=0; - $self->{stats}->{startTime}=time(); + $self->{stats}->{startTime}=time(); - return($self); + return($self); } sub loadDBInfo($) { - my $file=shift; - my $info; + my $file=shift; + my $info; - open(INFO, "< $file") || return("imdbDir index file \"$file\":$!\n"); - while() { - chop(); - if ( s/^([^:]+)://o ) { - $info->{$1}=$_; - } - } - close(INFO); - return($info); + open(INFO, "< $file") || return("imdbDir index file \"$file\":$!\n"); + while() { + chomp(); + if ( s/^([^:]+)://o ) { + $info->{$1}=$_; + } + } + close(INFO); + return($info); } sub checkIndexesOkay($) { - my $self=shift; - if ( ! -d "$self->{imdbDir}" ) { - return("imdbDir \"$self->{imdbDir}\" does not exist\n"); - } - - if ( -f "$self->{moviedbOffline}" ) { - return("imdbDir index offline: check $self->{moviedbOffline} for details"); - } - - for my $file ($self->{moviedbIndex}, $self->{moviedbData}, $self->{moviedbInfo}) { - if ( ! -f "$file" ) { - return("imdbDir index file \"$file\" does not exist\n"); - } - } - - $VERSION=~m/^(\d+)\.(\d+)$/o || die "package corrupt, VERSION string invalid ($VERSION)"; - my ($major, $minor)=($1, $2); - - my $info=loadDBInfo($self->{moviedbInfo}); - return($info) if ( ref $info eq 'SCALAR' ); - - if ( !defined($info->{db_version}) ) { - return("imdbDir index db missing version information, rerun --prepStage all\n"); - } - if ( $info->{db_version}=~m/^(\d+)\.(\d+)$/o ) { - if ( $1 != $major || $2 < $minor ) { - return("imdbDir index db requires updating, rerun --prepStage all\n"); - } - if ( $1 == 0 && $2 == 1 ) { - return("imdbDir index db requires update, rerun --prepStage 5 (bug:actresses never appear)\n"); - } - if ( $1 == 0 && $2 == 2 ) { - # 0.2 -> 0.3 upgrade requires prepStage 5 to be re-run - return("imdbDir index db requires minor reindexing, rerun --prepStage 3 and 5\n"); - } - if ( $1 == 0 && $2 == 3 ) { - # 0.2 -> 0.3 upgrade requires prepStage 5 to be re-run - return("imdbDir index db requires major reindexing, rerun --prepStage 2 and new prepStages 5,6,7,8 and 9\n"); - } - if ( $1 == 0 && $2 == 4 ) { - # 0.2 -> 0.3 upgrade requires prepStage 5 to be re-run - return("imdbDir index db corrupt (got version 0.4), rerun --prepStage all\n"); + my $self=shift; + if ( ! -d "$self->{imdbDir}" ) { + return("imdbDir \"$self->{imdbDir}\" does not exist\n"); + } + + if ( -f "$self->{moviedbOffline}" ) { + return("imdbDir index offline: check $self->{moviedbOffline} for details"); + } + + for my $file ($self->{moviedbIndex}, $self->{moviedbData}, $self->{moviedbInfo}) { + if ( ! -f "$file" ) { + return("imdbDir index file \"$file\" does not exist\n"); + } + } + + $VERSION=~m/^(\d+)\.(\d+)$/o || die "package corrupt, VERSION string invalid ($VERSION)"; + my ($major, $minor)=($1, $2); + + my $info=loadDBInfo($self->{moviedbInfo}); + return($info) if ( ref $info eq 'SCALAR' ); + + if ( !defined($info->{db_version}) ) { + return("imdbDir index db missing version information, rerun --prepStage all\n"); + } + if ( $info->{db_version}=~m/^(\d+)\.(\d+)$/o ) { + if ( $1 != $major || $2 < $minor ) { + return("imdbDir index db requires updating, rerun --prepStage all\n"); + } + if ( $1 == 0 && $2 == 1 ) { + return("imdbDir index db requires update, rerun --prepStage 5 (bug:actresses never appear)\n"); + } + if ( $1 == 0 && $2 == 2 ) { + # 0.2 -> 0.3 upgrade requires prepStage 5 to be re-run + return("imdbDir index db requires minor reindexing, rerun --prepStage 3 and 5\n"); + } + if ( $1 == 0 && $2 == 3 ) { + # 0.2 -> 0.3 upgrade requires prepStage 5 to be re-run + return("imdbDir index db requires major reindexing, rerun --prepStage 2 and new prepStages 5,6,7,8 and 9\n"); + } + if ( $1 == 0 && $2 == 4 ) { + # 0.2 -> 0.3 upgrade requires prepStage 5 to be re-run + return("imdbDir index db corrupt (got version 0.4), rerun --prepStage all\n"); + } + # okay + return(undef); + } + else { + return("imdbDir index version of '$info->{db_version}' is invalid, rerun --prepStage all\n". + "if problem persists, submit bug report to xmltv-devel\@lists.sf.net\n"); } - # okay - return(undef); - } - else { - return("imdbDir index version of '$info->{db_version}' is invalid, rerun --prepStage all\n". - "if problem persists, submit bug report to xmltv-devel\@lists.sf.net\n"); - } } sub basicVerificationOfIndexes($) { - my $self=shift; - - # check that the imdbdir is invalid and up and running - my $title="Army of Darkness"; - my $year=1992; - - $self->openMovieIndex() || return("basic verification of indexes failed\n". - "database index isn't readable"); - - my $verbose = $self->{verbose}; $self->{verbose} = 0; - my $res=$self->getMovieMatches($title, $year); - $self->{verbose} = $verbose; undef $verbose; - if ( !defined($res) ) { - $self->closeMovieIndex(); - return("basic verification of indexes failed\n". - "no match for basic verification of movie \"$title, $year\"\n"); - } - if ( !defined($res->{exactMatch}) ) { - $self->closeMovieIndex(); - return("basic verification of indexes failed\n". - "no exact match for movie \"$title, $year\"\n"); - } - if ( scalar(@{$res->{exactMatch}})!= 1) { - $self->closeMovieIndex(); - return("basic verification of indexes failed\n". - "got more than one exact match for movie \"$title, $year\"\n"); - } - my @exact=@{$res->{exactMatch}}; - if ( $exact[0]->{title} ne $title ) { - $self->closeMovieIndex(); - return("basic verification of indexes failed\n". - "title associated with key \"$title, $year\" is bad\n"); - } - - if ( $exact[0]->{year} ne "$year" ) { - $self->closeMovieIndex(); - return("basic verification of indexes failed\n". - "year associated with key \"$title, $year\" is bad\n"); - } - - my $id=$exact[0]->{id}; - $res=$self->getMovieIdDetails($id); - if ( !defined($res) ) { - $self->closeMovieIndex(); - return("basic verification of indexes failed\n". - "no movie details for movie \"$title, $year\" (id=$id)\n"); - } + my $self=shift; - if ( !defined($res->{directors}) ) { - $self->closeMovieIndex(); - return("basic verification of indexes failed\n". - "movie details didn't provide any director for movie \"$title, $year\" (id=$id)\n"); - } - if ( !$res->{directors}[0]=~m/Raimi/o ) { - $self->closeMovieIndex(); - return("basic verification of indexes failed\n". - "movie details didn't show Raimi as the main director for movie \"$title, $year\" (id=$id)\n"); - } - if ( !defined($res->{actors}) ) { - $self->closeMovieIndex(); - return("basic verification of indexes failed\n". - "movie details didn't provide any cast movie \"$title, $year\" (id=$id)\n"); - } - if ( !$res->{actors}[0]=~m/Campbell/o ) { - $self->closeMovieIndex(); - return("basic verification of indexes failed\n". - "movie details didn't show Bruce Campbell as the main actor in movie \"$title, $year\" (id=$id)\n"); - } - my $matches=0; - for (@{$res->{genres}}) { - if ( $_ eq "Action" || - $_ eq "Comedy" || - $_ eq "Fantasy" || - $_ eq "Horror" || - $_ eq "Romance" ) { - $matches++; + # check that the imdbdir is valid and up and running + my $title="Army of Darkness"; + my $year=1992; + + $self->openMovieIndex() || return("basic verification of indexes failed\n". + "database index isn't readable"); + + my $verbose = $self->{verbose}; $self->{verbose} = 0; + my $res=$self->getMovieMatches($title, $year); + $self->{verbose} = $verbose; undef $verbose; + if ( !defined($res) ) { + $self->closeMovieIndex(); + return("basic verification of indexes failed\n". + "no match for basic verification of movie \"$title, $year\"\n"); + } + if ( !defined($res->{exactMatch}) ) { + $self->closeMovieIndex(); + return("basic verification of indexes failed\n". + "no exact match for movie \"$title, $year\"\n"); + } + if ( scalar(@{$res->{exactMatch}})!= 1) { + $self->closeMovieIndex(); + return("basic verification of indexes failed\n". + "got more than one exact match for movie \"$title, $year\"\n"); + } + my @exact=@{$res->{exactMatch}}; + if ( $exact[0]->{title} ne $title ) { + $self->closeMovieIndex(); + return("basic verification of indexes failed\n". + "title associated with key \"$title, $year\" is bad\n"); + } + + if ( $exact[0]->{year} ne "$year" ) { + $self->closeMovieIndex(); + return("basic verification of indexes failed\n". + "year associated with key \"$title, $year\" is bad\n"); + } + + my $id=$exact[0]->{id}; + $res=$self->getMovieIdDetails($id); + if ( !defined($res) ) { + $self->closeMovieIndex(); + return("basic verification of indexes failed\n". + "no movie details for movie \"$title, $year\" (id=$id)\n"); + } + + if ( !defined($res->{directors}) ) { + $self->closeMovieIndex(); + return("basic verification of indexes failed\n". + "movie details didn't provide any director for movie \"$title, $year\" (id=$id)\n"); + } + if ( !$res->{directors}[0]=~m/Raimi/o ) { + $self->closeMovieIndex(); + return("basic verification of indexes failed\n". + "movie details didn't show Raimi as the main director for movie \"$title, $year\" (id=$id)\n"); + } + if ( !defined($res->{actors}) ) { + $self->closeMovieIndex(); + return("basic verification of indexes failed\n". + "movie details didn't provide any cast movie \"$title, $year\" (id=$id)\n"); + } + if ( !$res->{actors}[0]=~m/Campbell/o ) { + $self->closeMovieIndex(); + return("basic verification of indexes failed\n". + "movie details didn't show Bruce Campbell as the main actor in movie \"$title, $year\" (id=$id)\n"); + } + my $matches=0; + for (@{$res->{genres}}) { + if ( $_ eq "Action" || + $_ eq "Comedy" || + $_ eq "Fantasy" || + $_ eq "Horror" || + $_ eq "Romance" ) { + $matches++; + } + } + if ( $matches == 0 ) { + $self->closeMovieIndex(); + return("basic verification of indexes failed\n". + "movie details didn't show genres correctly for movie \"$title, $year\" (id=$id)\n"); } - } - if ( $matches == 0 ) { - $self->closeMovieIndex(); - return("basic verification of indexes failed\n". - "movie details didn't show genres correctly for movie \"$title, $year\" (id=$id)\n"); - } - if ( !defined($res->{ratingDist}) || + if ( !defined($res->{ratingDist}) || !defined($res->{ratingVotes}) || !defined($res->{ratingRank}) ) { + $self->closeMovieIndex(); + return("basic verification of indexes failed\n". + "movie details didn't show imdbratings for movie \"$title, $year\" (id=$id)\n"); + } + $self->closeMovieIndex(); - return("basic verification of indexes failed\n". - "movie details didn't show imdbratings for movie \"$title, $year\" (id=$id)\n"); - } - $self->closeMovieIndex(); - return(undef); + # all okay + return(undef); } sub sanityCheckDatabase($) { - my $self=shift; - my $errline; + my $self=shift; + my $errline; - $errline=$self->checkIndexesOkay(); - return($errline) if ( defined($errline) ); - $errline=$self->basicVerificationOfIndexes(); - return($errline) if ( defined($errline) ); + $errline=$self->checkIndexesOkay(); + return($errline) if ( defined($errline) ); + $errline=$self->basicVerificationOfIndexes(); + return($errline) if ( defined($errline) ); - # all okay - return(undef); + # all okay + return(undef); } sub error($$) { - print STDERR "tv_imdb: $_[1]\n"; + print STDERR "tv_imdb: $_[1]\n"; } sub status($$) { - if ( $_[0]->{verbose} ) { - print STDERR "tv_imdb: $_[1]\n"; - } + if ( $_[0]->{verbose} ) { + print STDERR "tv_imdb: $_[1]\n"; + } } sub debug($$) { - my $self=shift; - my $mess=shift; - if ( $self->{verbose} > 1 ) { - print STDERR "tv_imdb: $mess\n"; - } + my $self=shift; + my $mess=shift; + if ( $self->{verbose} > 1 ) { + print STDERR "tv_imdb: $mess\n"; + } } -use Search::Dict; - sub openMovieIndex($) { - my $self=shift; + my $self=shift; - if ( !open($self->{INDEX_FD}, "< $self->{moviedbIndex}") ) { - return(undef); - } - if ( !open($self->{DBASE_FD}, "< $self->{moviedbData}") ) { - close($self->{INDEX_FD}); - return(undef); - } - return(1); + if ( !open($self->{INDEX_FD}, "< $self->{moviedbIndex}") ) { + return(undef); + } + if ( !open($self->{DBASE_FD}, "< $self->{moviedbData}") ) { + close($self->{INDEX_FD}); + return(undef); + } + return(1); } sub closeMovieIndex($) { - my $self=shift; + my $self=shift; - close($self->{INDEX_FD}); - delete($self->{INDEX_FD}); + close($self->{INDEX_FD}); + delete($self->{INDEX_FD}); - close($self->{DBASE_FD}); - delete($self->{DBASE_FD}); + close($self->{DBASE_FD}); + delete($self->{DBASE_FD}); - return(1); + return(1); } -# moviedbIndex file has the format: -# title:lineno -# where key is a url encoded title followed by the year of production and a colon +# moviedbIndex is a TSV file with the format: +# searchtitle title year progtype lineno +# sub getMovieMatches($$$) { - my $self=shift; - my $title=shift; - my $year=shift; - - # Articles are put at the end of a title ( in all languages ) - #$match=~s/^(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)\s+(.*)$/$2, $1/og; - - my $match="$title"; - if ( defined($year) ) { - $match.=" ($year)"; - } - - # to encode s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/oeg - # to decode s/%(?:([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/defined($1)? chr hex($1) : utf8_chr(hex($2))/oge; - - # url encode - $match=lc($match); - $match=~s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/oeg; - - $self->debug("looking for \"$match\" in $self->{moviedbIndex}"); - if ( !$self->{INDEX_FD} ) { - die "internal error: index not open"; - } - - my $FD=$self->{INDEX_FD}; - Search::Dict::look(*{$FD}, $match, 0, 0); - my $results; - while (<$FD>) { - last if ( !m/^$match/ ); - - chop(); - my @arr=split('\t', $_); - if ( scalar(@arr) != 5 ) { - warn "$self->{moviedbIndex} corrupt (correct key:$_)"; - next; - } - - if ( $arr[0] eq $match ) { - # return title and id - #$arr[1]=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/og; - - #$arr[0]=~s/%(?:([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/defined($1)? chr hex($1) : utf8_chr(hex($2))/oge; - #$self->debug("exact:$arr[1] ($arr[2]) qualifier=$arr[3] id=$arr[4]"); - my $title=$arr[1]; - if ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\)$//o ) { - } - elsif ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\/[IVX]+\)$//o ) { - } - else { - die "unable to decode year from title key \"$title\", report to xmltv-devel\@lists.sf.net"; - } - $title=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/og; - $self->debug("exact:$title ($arr[2]) qualifier=$arr[3] id=$arr[4]"); - push(@{$results->{exactMatch}}, {'key'=> $arr[1], - 'title'=>$title, - 'year'=>$arr[2], - 'qualifier'=>$arr[3], - 'id'=>$arr[4]}); + my $self=shift; + my $title=shift; + my $year=shift; + + # Articles are put at the end of a title ( in all languages ) + #$match=~s/^(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)\s+(.*)$/$2, $1/og; + + my $match="$title"; + if ( defined($year) ) { + $match.=" ($year)"; } - else { - # decode - #s/%(?:([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/defined($1)? chr hex($1) : utf8_chr(hex($2))/oge; - # return title - #$arr[1]=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/og; - #$arr[0]=~s/%(?:([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/defined($1)? chr hex($1) : utf8_chr(hex($2))/oge; - #$self->debug("close:$arr[1] ($arr[2]) qualifier=$arr[3] id=$arr[4]"); - my $title=$arr[1]; - if ( $title=~m/^\"/o && $title=~m/\"\s*\(/o ) { #" - $title=~s/^\"//o; #" - $title=~s/\"(\s*\()/$1/o; #" - } + # to encode s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/oeg + # to decode s/%(?:([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/defined($1)? chr hex($1) : utf8_chr(hex($2))/oge; + + # url encode + $match=lc($match); + $match=~s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/oeg; + + $self->debug("looking for \"$match\" in $self->{moviedbIndex}"); + if ( !$self->{INDEX_FD} ) { + die "internal error: index not open"; + } + + my $FD=$self->{INDEX_FD}; + Search::Dict::look(*{$FD}, $match, 0, 0); + my $results; + while (<$FD>) { + last if ( !m/^$match/ ); + + chomp(); + my @arr=split('\t', $_); + if ( scalar(@arr) != 5 ) { + warn "$self->{moviedbIndex} corrupt (correct key:$_)"; + next; + } + + if ( $arr[0] eq $match ) { + # return title and id + #$arr[1]=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/og; + + #$arr[0]=~s/%(?:([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/defined($1)? chr hex($1) : utf8_chr(hex($2))/oge; + #$self->debug("exact:$arr[1] ($arr[2]) qualifier=$arr[3] id=$arr[4]"); + my $title=$arr[1]; + if ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\)$//o ) { + } + elsif ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\/[IVXL]+\)$//o ) { + } + else { + die "unable to decode year from title key \"$title\", report to xmltv-devel\@lists.sf.net"; + } + $title=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/og; + $self->debug("exact:$title ($arr[2]) qualifier=$arr[3] id=$arr[4]"); + push(@{$results->{exactMatch}}, {'key'=> $arr[1], + 'title'=>$title, + 'year'=>$arr[2], + 'qualifier'=>$arr[3], + 'id'=>$arr[4]}); + } + else { + # decode + #s/%(?:([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/defined($1)? chr hex($1) : utf8_chr(hex($2))/oge; + # return title + #$arr[1]=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/og; + #$arr[0]=~s/%(?:([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/defined($1)? chr hex($1) : utf8_chr(hex($2))/oge; + #$self->debug("close:$arr[1] ($arr[2]) qualifier=$arr[3] id=$arr[4]"); + my $title=$arr[1]; + + if ( $title=~m/^\"/o && $title=~m/\"\s*\(/o ) { #" + $title=~s/^\"//o; #" + $title=~s/\"(\s*\()/$1/o; #" + } - if ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\)$//o ) { - } - elsif ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\/[IVX]+\)$//o ) { - } - else { - die "unable to decode year from title key \"$title\", report to xmltv-devel\@lists.sf.net"; - } - $title=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/og; - $self->debug("close:$title ($arr[2]) qualifier=$arr[3] id=$arr[4]"); - push(@{$results->{closeMatch}}, {'key'=> $arr[1], - 'title'=>$title, - 'year'=>$arr[2], - 'qualifier'=>$arr[3], - 'id'=>$arr[4]}); - } - } - #print "MovieMatches on ($match) = ".Dumper($results)."\n"; - return($results); + if ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\)$//o ) { + } + elsif ( $title=~s/\s+\((\d\d\d\d|\?\?\?\?)\/[IVXL]+\)$//o ) { + } + else { + die "unable to decode year from title key \"$title\", report to xmltv-devel\@lists.sf.net"; + } + $title=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/og; + $self->debug("close:$title ($arr[2]) qualifier=$arr[3] id=$arr[4]"); + push(@{$results->{closeMatch}}, {'key'=> $arr[1], + 'title'=>$title, + 'year'=>$arr[2], + 'qualifier'=>$arr[3], + 'id'=>$arr[4]}); + } + } + #print "MovieMatches on ($match) = ".Dumper($results)."\n"; + return($results); } sub getMovieExactMatch($$$) { - my $self=shift; - my $title=shift; - my $year=shift; - my $res=$self->getMovieMatches($title, $year); - - return(undef) if ( !defined($res) ); - if ( !defined($res->{exactMatch}) ) { - return(undef); - } - if ( scalar(@{$res->{exactMatch}}) != 1 ) { - return(undef); - } - return($res->{exactMatch}[0]); + my $self=shift; + my $title=shift; + my $year=shift; + my $res=$self->getMovieMatches($title, $year); + + return(undef, 0) if ( !defined($res) ); + if ( !defined($res->{exactMatch}) ) { + return(undef, 0); + } + if ( scalar(@{$res->{exactMatch}}) != 1 ) { + return(undef, scalar(@{$res->{exactMatch}})); + } + return($res->{exactMatch}[0], 1); } sub getMovieCloseMatches($$) { - my $self=shift; - my $title=shift; + my $self=shift; + my $title=shift; - my $res=$self->getMovieMatches($title, undef) || return(undef); + my $res=$self->getMovieMatches($title, undef) || return(undef); - if ( defined($res->{exactMatch})) { - die "corrupt imdb database - hit on \"$title\""; - } - return(undef) if ( !defined($res->{closeMatch}) ); - my @arr=@{$res->{closeMatch}}; - #print "CLOSE DUMP=".Dumper(@arr)."\n"; - return(@arr); + if ( defined($res->{exactMatch})) { + die "corrupt imdb database - hit on \"$title\""; + } + return(undef) if ( !defined($res->{closeMatch}) ); + my @arr=@{$res->{closeMatch}}; + #print "CLOSE DUMP=".Dumper(@arr)."\n"; + return(@arr); } +# moviedbData file is a TSV file with the format: +# lineno:directors actors genres ratingDist ratingVotes ratingRank keywords plot +# sub getMovieIdDetails($$) { - my $self=shift; - my $id=shift; + my $self=shift; + my $id=shift; - if ( !$self->{DBASE_FD} ) { - die "internal error: index not open"; - } - my $results; - my $FD=$self->{DBASE_FD}; - Search::Dict::look(*{$FD}, "$id:", 0, 0); - while (<$FD>) { - last if ( !m/^$id:/ ); - chop(); - if ( s/^$id:// ) { - my ($directors, $actors, $genres, $ratingDist, $ratingVotes, $ratingRank, $keywords, $plot)=split('\t', $_); - if ( $directors ne "<>" ) { - for my $name (split('\|', $directors)) { - # remove (I) etc from imdb.com names (kept in place for reference) - $name=~s/\s\([IVX]+\)$//o; - # switch name around to be surname last - $name=~s/^([^,]+),\s*(.*)$/$2 $1/o; - push(@{$results->{directors}}, $name); - } - } - if ( $actors ne "<>" ) { - for my $name (split('\|', $actors)) { - # remove (I) etc from imdb.com names (kept in place for reference) - my $HostNarrator; - if ( $name=~s/\[([^\]]+)\]$//o ) { - $HostNarrator=$1; - } - $name=~s/\s\([IVX]+\)$//o; - - # switch name around to be surname last - $name=~s/^([^,]+),\s*(.*)$/$2 $1/o; - if ( $HostNarrator ) { - if ( $HostNarrator=~s/,*Host//o ) { - push(@{$results->{presenter}}, $name); - } - if ( $HostNarrator=~s/,*Narrator//o ) { - push(@{$results->{commentator}}, $name); - } - } - else { - push(@{$results->{actors}}, $name); - } - } - } - if ( $genres ne "<>" ) { - push(@{$results->{genres}}, split('\|', $genres)); - } - if ( $keywords ne "<>" ) { - push(@{$results->{keywords}}, split(',', $keywords)); - } - $results->{ratingDist}=$ratingDist if ( $ratingDist ne "<>" ); - $results->{ratingVotes}=$ratingVotes if ( $ratingVotes ne "<>" ); - $results->{ratingRank}=$ratingRank if ( $ratingRank ne "<>" ); - $results->{plot}=$plot if ( $plot ne "<>" ); + if ( !$self->{DBASE_FD} ) { + die "internal error: index not open"; } - else { - warn "lookup of movie (id=$id) resulted in garbage ($_)"; + my $results; + my $FD=$self->{DBASE_FD}; + Search::Dict::look(*{$FD}, "$id:", 0, 0); + while (<$FD>) { + last if ( !m/^$id:/ ); + chomp(); + if ( s/^$id:// ) { + my ($directors, $actors, $genres, $ratingDist, $ratingVotes, $ratingRank, $keywords, $plot)=split('\t', $_); + if ( $directors ne "<>" ) { + for my $name (split('\|', $directors)) { + # remove (I) etc from imdb.com names (kept in place for reference) + $name=~s/\s\([IVXL]+\)$//o; + # switch name around to be surname last + $name=~s/^([^,]+),\s*(.*)$/$2 $1/o; + push(@{$results->{directors}}, $name); + } + } + if ( $actors ne "<>" ) { + for my $name (split('\|', $actors)) { + # remove (I) etc from imdb.com names (kept in place for reference) + my $HostNarrator; + if ( $name=~s/\s?\[([^\]]+)\]$//o ) { + $HostNarrator=$1; + } + $name=~s/\s\([IVXL]+\)$//o; + + # switch name around to be surname last + $name=~s/^([^,]+),\s*(.*)$/$2 $1/o; + if ( $HostNarrator ) { + if ( $HostNarrator=~s/,*Host//o ) { + push(@{$results->{presenter}}, $name); + } + if ( $HostNarrator=~s/,*Narrator//o ) { + push(@{$results->{commentator}}, $name); + } + } + else { + push(@{$results->{actors}}, $name); + } + } + } + if ( $genres ne "<>" ) { + push(@{$results->{genres}}, split('\|', $genres)); + } + if ( $keywords ne "<>" ) { + push(@{$results->{keywords}}, split(',', $keywords)); + } + $results->{ratingDist}=$ratingDist if ( $ratingDist ne "<>" ); + $results->{ratingVotes}=$ratingVotes if ( $ratingVotes ne "<>" ); + $results->{ratingRank}=$ratingRank if ( $ratingRank ne "<>" ); + $results->{plot}=$plot if ( $plot ne "<>" ); + } + else { + warn "lookup of movie (id=$id) resulted in garbage ($_)"; + } } - } - if ( !defined($results) ) { - # some movies we don't have any details for - $results->{noDetails}=1; - } - #print "MovieDetails($id) = ".Dumper($results)."\n"; - return($results); + if ( !defined($results) ) { + # some movies we don't have any details for + $results->{noDetails}=1; + } + #print "MovieDetails($id) = ".Dumper($results)."\n"; + return($results); } # # FUTURE - close hit could be just missing or extra -# punctuation: -# "Run Silent, Run Deep" for imdb's "Run Silent Run Deep" -# "Cherry, Harry and Raquel" for imdb's "Cherry, Harry and Raquel!" -# "Cat Women of the Moon" for imdb's "Cat-Women of the Moon" -# "Baywatch Hawaiian Wedding" for imdb's "Baywatch: Hawaiian Wedding" :) +# punctuation: +# "Run Silent, Run Deep" for imdb's "Run Silent Run Deep" +# "Cherry, Harry and Raquel" for imdb's "Cherry, Harry and Raquel!" +# "Cat Women of the Moon" for imdb's "Cat-Women of the Moon" +# "Baywatch Hawaiian Wedding" for imdb's "Baywatch: Hawaiian Wedding" :) # # FIXED - "Victoria and Albert" appears for imdb's "Victoria & Albert" (and -> &) # FIXED - "Columbo Cries Wolf" appears instead of "Columbo:Columbo Cries Wolf" # FIXED - Place the article last, for multiple languages. For instance -# Los amantes del crculo polar -> amantes del crculo polar, Los +# Los amantes del crculo polar -> amantes del crculo polar, Los # FIXED - common international vowel changes. For instance -# "Anna Karnin" (->e) +# "Anna Karnin" (->e) # sub alternativeTitles($) { - my $title=shift; - my @titles; + my $title=shift; + my @titles; - push(@titles, $title); + push(@titles, $title); - # try the & -> and conversion - if ( $title=~m/\&/o ) { - my $t=$title; - while ( $t=~s/(\s)\&(\s)/$1and$2/o ) { - push(@titles, $t); - } - } - - # try the and -> & conversion - if ( $title=~m/\sand\s/io ) { - my $t=$title; - while ( $t=~s/(\s)and(\s)/$1\&$2/io ) { - push(@titles, $t); - } - } - - # try the "Columbo: Columbo cries Wolf" -> "Columbo cries Wolf" conversion - my $max=scalar(@titles); - for (my $i=0; $i<$max ; $i++) { - my $t=$titles[$i]; - if ( $t=~m/^[^:]+:.+$/io ) { - while ( $t=~s/^[^:]+:\s*(.+)\s*$/$1/io ) { - push(@titles, $t); - } - } - } - - # Place the articles last - $max=scalar(@titles); - for (my $i=0; $i<$max ; $i++) { - my $t=$titles[$i]; - if ( $t=~m/^(The|A|Une|Les|Los|Las|L\'|Le|La|El|Das|De|Het|Een)\s+(.*)$/io ) { - $t=~s/^(The|A|Une|Les|Los|Las|L\'|Le|La|El|Das|De|Het|Een)\s+(.*)$/$2, $1/iog; - push(@titles, $t); - } - if ( $t=~m/^(.+),\s*(The|A|Une|Les|Los|Las|L\'|Le|La|El|Das|De|Het|Een)$/io ) { - $t=~s/^(.+),\s*(The|A|Une|Les|Los|Las|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/iog; - push(@titles, $t); - } - } - - # convert all the special language characters - $max=scalar(@titles); - for (my $i=0; $i<$max ; $i++) { - my $t=$titles[$i]; - if ( $t=~m/[]/io ) { - $t=~s/[]/a/gio; - $t=~s/[]/e/gio; - $t=~s/[]/i/gio; - $t=~s/[]/o/gio; - $t=~s/[]/u/gio; - $t=~s/[]/ae/gio; - $t=~s/[]/c/gio; - $t=~s/[]/n/gio; - $t=~s/[]/ss/gio; - $t=~s/[]/y/gio; - $t=~s/[]//gio; - push(@titles, $t); - } - } - - # optional later possible titles include removing the '.' from titles - # ie "Project V.I.P.E.R." matching imdb "Project VIPER" - $max=scalar(@titles); - for (my $i=0; $i<$max ; $i++) { - my $t=$titles[$i]; - if ( $t=~s/\.//go ) { - push(@titles,$t); + # try the & -> and conversion + if ( $title=~m/\&/o ) { + my $t=$title; + while ( $t=~s/(\s)\&(\s)/$1and$2/o ) { + push(@titles, $t); + } } - } - return(\@titles); -} - -sub findMovieInfo($$$$) -{ - my ($self, $title, $year, $exact)=@_; - my @titles=@{alternativeTitles($title)}; + # try the and -> & conversion + if ( $title=~m/\sand\s/io ) { + my $t=$title; + while ( $t=~s/(\s)and(\s)/$1\&$2/io ) { + push(@titles, $t); + } + } - if ( $exact == 1 ) { - # try an exact match first :) - for my $mytitle ( @titles ) { - my $info=$self->getMovieExactMatch($mytitle, $year); - if ( defined($info) ) { - if ( $info->{qualifier} eq "movie" ) { - $self->status("perfect hit on movie \"$info->{key}\""); - $info->{matchLevel}="perfect"; - return($info); - } - elsif ( $info->{qualifier} eq "tv_movie" ) { - $self->status("perfect hit on made-for-tv-movie \"$info->{key}\""); - $info->{matchLevel}="perfect"; - return($info); - } - elsif ( $info->{qualifier} eq "video_movie" ) { - $self->status("perfect hit on made-for-video-movie \"$info->{key}\""); - $info->{matchLevel}="perfect"; - return($info); + # try the "Columbo: Columbo cries Wolf" -> "Columbo cries Wolf" conversion + my $max=scalar(@titles); + for (my $i=0; $i<$max ; $i++) { + my $t=$titles[$i]; + if ( $t=~m/^[^:]+:.+$/io ) { + while ( $t=~s/^[^:]+:\s*(.+)\s*$/$1/io ) { + push(@titles, $t); + } } - elsif ( $info->{qualifier} eq "video_game" ) { - next; + } + + # Place the articles last + $max=scalar(@titles); + for (my $i=0; $i<$max ; $i++) { + my $t=$titles[$i]; + if ( $t=~m/^(The|A|Une|Les|Los|Las|L\'|Le|La|El|Das|De|Het|Een)\s+(.*)$/io ) { + $t=~s/^(The|A|Une|Les|Los|Las|L\'|Le|La|El|Das|De|Het|Een)\s+(.*)$/$2, $1/iog; + push(@titles, $t); + } + if ( $t=~m/^(.+),\s*(The|A|Une|Les|Los|Las|L\'|Le|La|El|Das|De|Het|Een)$/io ) { + $t=~s/^(.+),\s*(The|A|Une|Les|Los|Las|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/iog; + push(@titles, $t); } - elsif ( $info->{qualifier} eq "tv_series" ) { + } + + # convert all the special language characters + $max=scalar(@titles); + for (my $i=0; $i<$max ; $i++) { + my $t=$titles[$i]; + if ( $t=~m/[]/io ) { + $t=~s/[]/a/gio; + $t=~s/[]/e/gio; + $t=~s/[]/i/gio; + $t=~s/[]/o/gio; + $t=~s/[]/u/gio; + $t=~s/[]/ae/gio; + $t=~s/[]/c/gio; + $t=~s/[]/n/gio; + $t=~s/[]/ss/gio; + $t=~s/[]/y/gio; + $t=~s/[]//gio; + push(@titles, $t); } - elsif ( $info->{qualifier} eq "tv_mini_series" ) { + } + + # optional later possible titles include removing the '.' from titles + # ie "Project V.I.P.E.R." matching imdb "Project VIPER" + $max=scalar(@titles); + for (my $i=0; $i<$max ; $i++) { + my $t=$titles[$i]; + if ( $t=~s/\.//go ) { + push(@titles,$t); } - else { - $self->error("$self->{moviedbIndex} responded with wierd entry for \"$info->{key}\""); - $self->error("weird trailing qualifier \"$info->{qualifier}\""); - $self->error("submit bug report to xmltv-devel\@lists.sf.net"); + } + return(\@titles); +} + +sub findMovieInfo($$$$) +{ + my ($self, $title, $year, $exact)=@_; + + my @titles=@{alternativeTitles($title)}; + + if ( $exact == 1 ) { + # try an exact match first :) + for my $mytitle ( @titles ) { + my ($info,$matchcount) = $self->getMovieExactMatch($mytitle, $year); + if ($matchcount > 1) { + # if multiple records exactly match title+year then we don't know which one is correct + $self->status("multiple hits on movie \"$mytitle ($year)\""); + return(undef, $matchcount); + } + if ( defined($info) ) { + if ( $info->{qualifier} eq "movie" ) { + $self->status("perfect hit on movie \"$info->{key}\""); + $info->{matchLevel}="perfect"; + return($info); + } + elsif ( $info->{qualifier} eq "tv_movie" ) { + $self->status("perfect hit on made-for-tv-movie \"$info->{key}\""); + $info->{matchLevel}="perfect"; + return($info); + } + elsif ( $info->{qualifier} eq "video_movie" ) { + $self->status("perfect hit on made-for-video-movie \"$info->{key}\""); + $info->{matchLevel}="perfect"; + return($info); + } + elsif ( $info->{qualifier} eq "video_game" ) { + next; + } + elsif ( $info->{qualifier} eq "tv_series" ) { + } + elsif ( $info->{qualifier} eq "tv_mini_series" ) { + } + else { + $self->error("$self->{moviedbIndex} responded with wierd entry for \"$info->{key}\""); + $self->error("weird trailing qualifier \"$info->{qualifier}\""); + $self->error("submit bug report to xmltv-devel\@lists.sf.net"); + } + } + $self->debug("no exact title/year hit on \"$mytitle ($year)\""); } - } - $self->debug("no exact title/year hit on \"$mytitle ($year)\""); + return(undef); } - return(undef); - } - elsif ( $exact == 2 ) { - # looking for first exact match on the title, don't have a year to compare + elsif ( $exact == 2 ) { + # looking for first exact match on the title, don't have a year to compare - for my $mytitle ( @titles ) { - # try close hit if only one :) - my $cnt=0; - my @closeMatches=$self->getMovieCloseMatches("$mytitle"); - - # we traverse the hits twice, first looking for success, - # then again to produce warnings about missed close matches - for my $info (@closeMatches) { - next if ( !defined($info) ); - $cnt++; - - # within one year with exact match good enough - if ( lc($mytitle) eq lc($info->{title}) ) { - - if ( $info->{qualifier} eq "movie" ) { - $self->status("close enough hit on movie \"$info->{key}\" (since no 'date' field present)"); - $info->{matchLevel}="close"; - return($info); - } - elsif ( $info->{qualifier} eq "tv_movie" ) { - $self->status("close enough hit on made-for-tv-movie \"$info->{key}\" (since no 'date' field present)"); - $info->{matchLevel}="close"; - return($info); - } - elsif ( $info->{qualifier} eq "video_movie" ) { - $self->status("close enough hit on made-for-video-movie \"$info->{key}\" (since no 'date' field present)"); - $info->{matchLevel}="close"; - return($info); - } - elsif ( $info->{qualifier} eq "video_game" ) { - next; - } - elsif ( $info->{qualifier} eq "tv_series" ) { - } - elsif ( $info->{qualifier} eq "tv_mini_series" ) { - } - else { - $self->error("$self->{moviedbIndex} responded with wierd entry for \"$info->{key}\""); - $self->error("weird trailing qualifier \"$info->{qualifier}\""); - $self->error("submit bug report to xmltv-devel\@lists.sf.net"); - } + for my $mytitle ( @titles ) { + # try close hit if only one :) + my $cnt=0; + my @closeMatches=$self->getMovieCloseMatches("$mytitle"); + + # we traverse the hits twice, first looking for success, + # then again to produce warnings about missed close matches + for my $info (@closeMatches) { + next if ( !defined($info) ); + $cnt++; + + # within one year with exact match good enough + if ( lc($mytitle) eq lc($info->{title}) ) { + + if ( $info->{qualifier} eq "movie" ) { + $self->status("close enough hit on movie \"$info->{key}\" (since no 'date' field present)"); + $info->{matchLevel}="close"; + return($info); + } + elsif ( $info->{qualifier} eq "tv_movie" ) { + $self->status("close enough hit on made-for-tv-movie \"$info->{key}\" (since no 'date' field present)"); + $info->{matchLevel}="close"; + return($info); + } + elsif ( $info->{qualifier} eq "video_movie" ) { + $self->status("close enough hit on made-for-video-movie \"$info->{key}\" (since no 'date' field present)"); + $info->{matchLevel}="close"; + return($info); + } + elsif ( $info->{qualifier} eq "video_game" ) { + next; + } + elsif ( $info->{qualifier} eq "tv_series" ) { + } + elsif ( $info->{qualifier} eq "tv_mini_series" ) { + } + else { + $self->error("$self->{moviedbIndex} responded with wierd entry for \"$info->{key}\""); + $self->error("weird trailing qualifier \"$info->{qualifier}\""); + $self->error("submit bug report to xmltv-devel\@lists.sf.net"); + } + } + } } - } + # nothing worked + return(undef); } - # nothing worked - return(undef); - } - # otherwise we're looking for a title match with a close year - for my $mytitle ( @titles ) { - # try close hit if only one :) - my $cnt=0; - my @closeMatches=$self->getMovieCloseMatches("$mytitle"); - - # we traverse the hits twice, first looking for success, - # then again to produce warnings about missed close matches - for my $info (@closeMatches) { - next if ( !defined($info) ); - $cnt++; - - # within one year with exact match good enough - if ( lc($mytitle) eq lc($info->{title}) ) { - my $yearsOff=abs(int($info->{year})-$year); - - $info->{matchLevel}="close"; - - if ( $yearsOff <= 2 ) { - my $showYear=int($info->{year}); - - if ( $info->{qualifier} eq "movie" ) { - $self->status("close enough hit on movie \"$info->{key}\" (off by $yearsOff years)"); - return($info); - } - elsif ( $info->{qualifier} eq "tv_movie" ) { - $self->status("close enough hit on made-for-tv-movie \"$info->{key}\" (off by $yearsOff years)"); - return($info); - } - elsif ( $info->{qualifier} eq "video_movie" ) { - $self->status("close enough hit on made-for-video-movie \"$info->{key}\" (off by $yearsOff years)"); - return($info); - } - elsif ( $info->{qualifier} eq "video_game" ) { - $self->status("ignoring close hit on video-game \"$info->{key}\""); - next; - } - elsif ( $info->{qualifier} eq "tv_series" ) { - $self->status("ignoring close hit on tv series \"$info->{key}\""); - #$self->status("close enough hit on tv series \"$info->{key}\" (off by $yearsOff years)"); - } - elsif ( $info->{qualifier} eq "tv_mini_series" ) { - $self->status("ignoring close hit on tv mini-series \"$info->{key}\""); - #$self->status("close enough hit on tv mini-series \"$info->{key}\" (off by $yearsOff years)"); - } - else { - $self->error("$self->{moviedbIndex} responded with wierd entry for \"$info->{key}\""); - $self->error("weird trailing qualifier \"$info->{qualifier}\""); - $self->error("submit bug report to xmltv-devel\@lists.sf.net"); - } - } - } - } - - # if we found at least something, but nothing matched - # produce warnings about missed, but close matches - for my $info (@closeMatches) { - next if ( !defined($info) ); - - # within one year with exact match good enough - if ( lc($mytitle) eq lc($info->{title}) ) { - my $yearsOff=abs(int($info->{year})-$year); - if ( $yearsOff <= 2 ) { - #die "internal error: key \"$info->{key}\" failed to be processed properly"; - } - elsif ( $yearsOff <= 5 ) { - # report these as status - $self->status("ignoring close, but not good enough hit on \"$info->{key}\" (off by $yearsOff years)"); + # otherwise we're looking for a title match with a close year + for my $mytitle ( @titles ) { + # try close hit if only one :) + my $cnt=0; + my @closeMatches=$self->getMovieCloseMatches("$mytitle"); + + # we traverse the hits twice, first looking for success, + # then again to produce warnings about missed close matches + for my $info (@closeMatches) { + next if ( !defined($info) ); + $cnt++; + + # within one year with exact match good enough + if ( lc($mytitle) eq lc($info->{title}) ) { + my $yearsOff=abs(int($info->{year})-$year); + + $info->{matchLevel}="close"; + + if ( $yearsOff <= 2 ) { + my $showYear=int($info->{year}); + + if ( $info->{qualifier} eq "movie" ) { + $self->status("close enough hit on movie \"$info->{key}\" (off by $yearsOff years)"); + return($info); + } + elsif ( $info->{qualifier} eq "tv_movie" ) { + $self->status("close enough hit on made-for-tv-movie \"$info->{key}\" (off by $yearsOff years)"); + return($info); + } + elsif ( $info->{qualifier} eq "video_movie" ) { + $self->status("close enough hit on made-for-video-movie \"$info->{key}\" (off by $yearsOff years)"); + return($info); + } + elsif ( $info->{qualifier} eq "video_game" ) { + $self->status("ignoring close hit on video-game \"$info->{key}\""); + next; + } + elsif ( $info->{qualifier} eq "tv_series" ) { + $self->status("ignoring close hit on tv series \"$info->{key}\""); + #$self->status("close enough hit on tv series \"$info->{key}\" (off by $yearsOff years)"); + } + elsif ( $info->{qualifier} eq "tv_mini_series" ) { + $self->status("ignoring close hit on tv mini-series \"$info->{key}\""); + #$self->status("close enough hit on tv mini-series \"$info->{key}\" (off by $yearsOff years)"); + } + else { + $self->error("$self->{moviedbIndex} responded with wierd entry for \"$info->{key}\""); + $self->error("weird trailing qualifier \"$info->{qualifier}\""); + $self->error("submit bug report to xmltv-devel\@lists.sf.net"); + } + } + } } - else { - # report these as debug messages - $self->debug("ignoring close hit on \"$info->{key}\" (off by $yearsOff years)"); + + # if we found at least something, but nothing matched + # produce warnings about missed, but close matches + for my $info (@closeMatches) { + next if ( !defined($info) ); + + # within one year with exact match good enough + if ( lc($mytitle) eq lc($info->{title}) ) { + my $yearsOff=abs(int($info->{year})-$year); + if ( $yearsOff <= 2 ) { + #die "internal error: key \"$info->{key}\" failed to be processed properly"; + } + elsif ( $yearsOff <= 5 ) { + # report these as status + $self->status("ignoring close, but not good enough hit on \"$info->{key}\" (off by $yearsOff years)"); + } + else { + # report these as debug messages + $self->debug("ignoring close hit on \"$info->{key}\" (off by $yearsOff years)"); + } + } + else { + $self->debug("ignoring close hit on \"$info->{key}\" (title did not match)"); + } } - } - else { - $self->debug("ignoring close hit on \"$info->{key}\" (title did not match)"); - } - } - } - #$self->status("failed to lookup \"$title ($year)\""); - return(undef); + } + #$self->status("failed to lookup \"$title ($year)\""); + return(undef); } sub findTVSeriesInfo($$) { - my ($self, $title)=@_; + my ($self, $title)=@_; - if ( $self->{cacheLookups} ) { - my $id=$self->{cachedLookups}->{tv_series}->{$title}; + if ( $self->{cacheLookups} ) { + my $id=$self->{cachedLookups}->{tv_series}->{$title}; - if ( defined($id) ) { - #print STDERR "REF= (".ref($id).")\n"; - if ( $id ne '' ) { - return($id); - } - return(undef); + if ( defined($id) ) { + #print STDERR "REF= (".ref($id).")\n"; + if ( $id ne '' ) { + return($id); + } + return(undef); + } } - } - - my @titles=@{alternativeTitles($title)}; - # try an exact match first :) - my $idInfo; + my @titles=@{alternativeTitles($title)}; - for my $mytitle ( @titles ) { - # try close hit if only one :) - my $cnt=0; - my @closeMatches=$self->getMovieCloseMatches("$mytitle"); - - for my $info (@closeMatches) { - next if ( !defined($info) ); - $cnt++; - - if ( lc($mytitle) eq lc($info->{title}) ) { - - $info->{matchLevel}="perfect"; + # try an exact match first :) + my $idInfo; - if ( $info->{qualifier} eq "movie" ) { - #$self->status("ignoring close hit on movie \"$info->{key}\""); - } - elsif ( $info->{qualifier} eq "tv_movie" ) { - #$self->status("ignoring close hit on tv movie \"$info->{key}\""); - } - elsif ( $info->{qualifier} eq "video_movie" ) { - #$self->status("ignoring close hit on made-for-video-movie \"$info->{key}\""); - } - elsif ( $info->{qualifier} eq "video_game" ) { - #$self->status("ignoring close hit on made-for-video-movie \"$info->{key}\""); - next; + for my $mytitle ( @titles ) { + # try close hit if only one :) + my $cnt=0; + my @closeMatches=$self->getMovieCloseMatches("$mytitle"); + + for my $info (@closeMatches) { + next if ( !defined($info) ); + $cnt++; + + if ( lc($mytitle) eq lc($info->{title}) ) { + + $info->{matchLevel}="perfect"; + + if ( $info->{qualifier} eq "movie" ) { + #$self->status("ignoring close hit on movie \"$info->{key}\""); + } + elsif ( $info->{qualifier} eq "tv_movie" ) { + #$self->status("ignoring close hit on tv movie \"$info->{key}\""); + } + elsif ( $info->{qualifier} eq "video_movie" ) { + #$self->status("ignoring close hit on made-for-video-movie \"$info->{key}\""); + } + elsif ( $info->{qualifier} eq "video_game" ) { + #$self->status("ignoring close hit on made-for-video-movie \"$info->{key}\""); + next; + } + elsif ( $info->{qualifier} eq "tv_series" ) { + $idInfo=$info; + $self->status("perfect hit on tv series \"$info->{key}\""); + last; + } + elsif ( $info->{qualifier} eq "tv_mini_series" ) { + $idInfo=$info; + $self->status("perfect hit on tv mini-series \"$info->{key}\""); + last; + } + else { + $self->error("$self->{moviedbIndex} responded with wierd entry for \"$info->{key}\""); + $self->error("weird trailing qualifier \"$info->{qualifier}\""); + $self->error("submit bug report to xmltv-devel\@lists.sf.net"); + } + } } - elsif ( $info->{qualifier} eq "tv_series" ) { - $idInfo=$info; - $self->status("perfect hit on tv series \"$info->{key}\""); - last; + last if ( defined($idInfo) ); + } + + if ( $self->{cacheLookups} ) { + # flush cache after this lookup if its gotten too big + if ( $self->{cachedLookups}->{tv_series}->{_cacheSize_} > + $self->{cacheLookupSize} ) { + delete($self->{cachedLookups}->{tv_series}); + $self->{cachedLookups}->{tv_series}->{_cacheSize_}=0; } - elsif ( $info->{qualifier} eq "tv_mini_series" ) { - $idInfo=$info; - $self->status("perfect hit on tv mini-series \"$info->{key}\""); - last; + if ( defined($idInfo) ) { + $self->{cachedLookups}->{tv_series}->{$title}=$idInfo; } else { - $self->error("$self->{moviedbIndex} responded with wierd entry for \"$info->{key}\""); - $self->error("weird trailing qualifier \"$info->{qualifier}\""); - $self->error("submit bug report to xmltv-devel\@lists.sf.net"); - } - } - } - last if ( defined($idInfo) ); - } - - if ( $self->{cacheLookups} ) { - # flush cache after this lookup if its gotten too big - if ( $self->{cachedLookups}->{tv_series}->{_cacheSize_} > - $self->{cacheLookupSize} ) { - delete($self->{cachedLookups}->{tv_series}); - $self->{cachedLookups}->{tv_series}->{_cacheSize_}=0; + $self->{cachedLookups}->{tv_series}->{$title}=""; + } + $self->{cachedLookups}->{tv_series}->{_cacheSize_}++; } if ( defined($idInfo) ) { - $self->{cachedLookups}->{tv_series}->{$title}=$idInfo; + return($idInfo); } else { - $self->{cachedLookups}->{tv_series}->{$title}=""; + #$self->status("failed to lookup tv series \"$title\""); + return(undef); } - $self->{cachedLookups}->{tv_series}->{_cacheSize_}++; - } - if ( defined($idInfo) ) { - return($idInfo); - } - else { - #$self->status("failed to lookup tv series \"$title\""); - return(undef); - } } # @@ -921,405 +944,409 @@ # todo - producer # todo - running time (duration) # todo - identify 'Host' and 'Narrator's and put them in as -# credits:presenter and credits:commentator resp. +# credits:presenter and credits:commentator resp. # todo - check program length - probably a warning if longer ? -# can we update length (separate from runnning time in the output ?) +# can we update length (separate from runnning time in the output ?) # todo - icon - url from www.imdb.com of programme image ? -# this could be done by scraping for the hyper linked poster -# -# and grabbin' out the img entry. (BTW ..../npa.jpg seems to line up with no poster available) +# this could be done by scraping for the hyper linked poster +# +# and grabbin' out the img entry. (BTW ..../npa.jpg seems to line up with no poster available) # # sub applyFound($$$) { - my ($self, $prog, $idInfo)=@_; + my ($self, $prog, $idInfo)=@_; - my $title=$prog->{title}->[0]->[0]; + my $title=$prog->{title}->[0]->[0]; - if ( $self->{updateDates} ) { - my $date; + if ( $self->{updateDates} ) { + my $date; - # don't add dates only fix them for tv_series - if ( $idInfo->{qualifier} eq "movie" || - $idInfo->{qualifier} eq "video_movie" || - $idInfo->{qualifier} eq "tv_movie" ) { - #$self->debug("adding 'date' field (\"$idInfo->{year}\") on \"$title\""); - $date=int($idInfo->{year}); - } - else { - #$self->debug("not adding 'date' field to $idInfo->{qualifier} \"$title\""); - $date=undef; + # don't add dates only fix them for tv_series + if ( $idInfo->{qualifier} eq "movie" || + $idInfo->{qualifier} eq "video_movie" || + $idInfo->{qualifier} eq "tv_movie" ) { + #$self->debug("adding 'date' field (\"$idInfo->{year}\") on \"$title\""); + $date=int($idInfo->{year}); + } + else { + #$self->debug("not adding 'date' field to $idInfo->{qualifier} \"$title\""); + $date=undef; + } + + if ( $self->{replaceDates} ) { + if ( defined($prog->{date}) && defined($date) ) { + $self->debug("replacing 'date' field"); + delete($prog->{date}); + $prog->{date}=$date; + } + } + else { + # only set date if not already defined + if ( !defined($prog->{date}) && defined($date) ) { + $prog->{date}=$date; + } + } } - if ( $self->{replaceDates} ) { - if ( defined($prog->{date}) && defined($date) ) { - $self->debug("replacing 'date' field"); - delete($prog->{date}); - $prog->{date}=$date; - } + if ( $self->{updateTitles} ) { + if ( $idInfo->{title} ne $title ) { + if ( $self->{replaceTitles} ) { + $self->debug("replacing (all) 'title' from \"$title\" to \"$idInfo->{title}\""); + delete($prog->{title}); + } + + my @list; + + push(@list, [$idInfo->{title}, undef]); + + if ( defined($prog->{title}) ) { + my $name=$idInfo->{title}; + my $found=0; + for my $v (@{$prog->{title}}) { + if ( lc($v->[0]) eq lc($name) ) { + $found=1; + } + else { + push(@list, $v); + } + } + } + $prog->{title}=\@list; + } } - else { - # only set date if not already defined - if ( !defined($prog->{date}) && defined($date) ) { - $prog->{date}=$date; - } - } - } - - if ( $self->{updateTitles} ) { - if ( $idInfo->{title} ne $title ) { - if ( $self->{replaceTitles} ) { - $self->debug("replacing (all) 'title' from \"$title\" to \"$idInfo->{title}\""); - delete($prog->{title}); - } - - my @list; - - push(@list, [$idInfo->{title}, undef]); - - if ( defined($prog->{title}) ) { - my $name=$idInfo->{title}; - my $found=0; - for my $v (@{$prog->{title}}) { - if ( lc($v->[0]) eq lc($name) ) { - $found=1; - } - else { - push(@list, $v); - } - } - } - $prog->{title}=\@list; - } - } - - if ( $self->{updateURLs} ) { - if ( $self->{replaceURLs} ) { - if ( defined($prog->{url}) ) { - $self->debug("replacing (all) 'url'"); - delete($prog->{url}); - } - } - - # add url to programme on www.imdb.com - my $url=$idInfo->{key}; - - $url=~s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/oeg; - $url="http://us.imdb.com/M/title-exact?".$url; - - if ( defined($prog->{url}) ) { - my @rep; - push(@rep, $url); - for (@{$prog->{url}}) { - # skip urls for imdb.com that we're probably safe to replace - if ( !m;^http://us.imdb.com/M/title-exact;o ) { - push(@rep, $_); + + if ( $self->{updateURLs} ) { + if ( $self->{replaceURLs} ) { + if ( defined($prog->{url}) ) { + $self->debug("replacing (all) 'url'"); + delete($prog->{url}); + } + } + + # add url to programme on www.imdb.com + my $url=$idInfo->{key}; + + $url=~s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/oeg; + $url="https://www.imdb.com/find?q=".$url."&s=tt&exact=true"; + + if ( defined($prog->{url}) ) { + my @rep; + push(@rep, $url); + for (@{$prog->{url}}) { + # skip urls for imdb.com that we're probably safe to replace + if ( !m;^http://us.imdb.com/M/title-exact;o && !m;^https://www.imdb.com/find;o ) { + push(@rep, $_); + } + } + $prog->{url}=\@rep; + } + else { + push(@{$prog->{url}}, $url); } - } - $prog->{url}=\@rep; + } + + # squirrel away movie qualifier so its first on the list of replacements + my @categories; + push(@categories, [$self->{categories}->{$idInfo->{qualifier}}, 'en']); + if ( !defined($self->{categories}->{$idInfo->{qualifier}}) ) { + die "how did we get here with an invalid qualifier '$idInfo->{qualifier}'"; + } + + my $details=$self->getMovieIdDetails($idInfo->{id}); + if ( $details->{noDetails} ) { + # we don't have any details on this movie } else { - push(@{$prog->{url}}, $url); + # add directors list + if ( $self->{updateDirectors} && defined($details->{directors}) ) { + # only update directors if we have exactly one or if + # its a movie of some kind, add more than one. + if ( scalar(@{$details->{directors}}) == 1 || + $idInfo->{qualifier} eq "movie" || + $idInfo->{qualifier} eq "video_movie" || + $idInfo->{qualifier} eq "tv_movie" ) { + + if ( $self->{replaceDirectors} ) { + if ( defined($prog->{credits}->{director}) ) { + $self->debug("replacing director(s)"); + delete($prog->{credits}->{director}); + } + } + + my @list; + # add top 3 billing directors list form www.imdb.com + for my $name (splice(@{$details->{directors}},0,3)) { + push(@list, $name); + } + + # preserve all existing directors listed if we did't already have them. + if ( defined($prog->{credits}->{director}) ) { + for my $name (@{$prog->{credits}->{director}}) { + my $found=0; + for(@list) { + if ( lc eq lc($name) ) { + $found=1; + } + } + if ( !$found ) { + push(@list, $name); + } + } + } + $prog->{credits}->{director}=\@list; + } + else { + $self->debug("not adding 'director' field to $idInfo->{qualifier} \"$title\""); + } + } + + if ( $self->{updateActors} && defined($details->{actors}) ) { + if ( $self->{replaceActors} ) { + if ( defined($prog->{credits}->{actor}) ) { + $self->debug("replacing actor(s) on $idInfo->{qualifier} \"$idInfo->{key}\""); + delete($prog->{credits}->{actor}); + } + } + + my @list; + # add top billing actors (default = 3) from www.imdb.com + for my $name (splice(@{$details->{actors}},0,$self->{numActors})) { + push(@list, $name); + } + # preserve all existing actors listed if we did't already have them. + if ( defined($prog->{credits}->{actor}) ) { + for my $name (@{$prog->{credits}->{actor}}) { + my $found=0; + for(@list) { + if ( lc eq lc($name) ) { + $found=1; + } + } + if ( !$found ) { + push(@list, $name); + } + } + } + $prog->{credits}->{actor}=\@list; + } + + if ( $self->{updatePresentors} && defined($details->{presenter}) ) { + if ( $self->{replacePresentors} ) { + if ( defined($prog->{credits}->{presenter}) ) { + $self->debug("replacing presentor"); + delete($prog->{credits}->{presenter}); + } + } + $prog->{credits}->{presenter}=$details->{presenter}; + } + if ( $self->{updateCommentators} && defined($details->{commentator}) ) { + if ( $self->{replaceCommentators} ) { + if ( defined($prog->{credits}->{commentator}) ) { + $self->debug("replacing commentator"); + delete($prog->{credits}->{commentator}); + } + } + $prog->{credits}->{commentator}=$details->{commentator}; + } + + # push genres as categories + if ( $self->{updateCategoriesWithGenres} ) { + if ( defined($details->{genres}) ) { + for (@{$details->{genres}}) { + push(@categories, [$_, 'en']); + } + } + } + + if ( $self->{updateStarRatings} && defined($details->{ratingRank}) ) { + if ( $self->{replaceStarRatings} ) { + if ( defined($prog->{'star-rating'}) ) { + $self->debug("replacing 'star-rating'"); + delete($prog->{'star-rating'}); + } + unshift( @{$prog->{'star-rating'}}, [ $details->{ratingRank} . "/10", 'IMDB User Rating' ] ); + } + else { + # add IMDB User Rating in front of all other star-ratings + unshift( @{$prog->{'star-rating'}}, [ $details->{ratingRank} . "/10", 'IMDB User Rating' ] ); + } + } + + if ( $self->{updateKeywords} ) { + my @keywords; + if ( defined($details->{keywords}) ) { + for (@{$details->{keywords}}) { + push(@keywords, [$_, 'en']); + } + } + + if ( $self->{replaceKeywords} ) { + if ( defined($prog->{keywords}) ) { + $self->debug("replacing (all) 'keywords'"); + delete($prog->{keywords}); + } + } + if ( defined($prog->{keyword}) ) { + for my $value (@{$prog->{keyword}}) { + my $found=0; + for my $k (@keywords) { + if ( lc($k->[0]) eq lc($value->[0]) ) { + $found=1; + } + } + if ( !$found ) { + push(@keywords, $value); + } + } + } + $prog->{keyword}=\@keywords; + } + + if ( $self->{updatePlot} ) { + # plot is held as a entity + # if 'replacePlot' then delete all existing entities and add new + # else add this plot as an additional entity + # + if ( $self->{replacePlot} ) { + if ( defined($prog->{desc}) ) { + $self->debug("replacing (all) 'desc'"); + delete($prog->{desc}); + } + } + if ( defined($details->{plot}) ) { + # check it's not already there + my $found = 0; + for my $_desc ( @{$prog->{desc}} ) { + $found = 1 if ( @{$_desc}[0] eq $details->{plot} ); + } + push @{$prog->{desc}}, [ $details->{plot}, 'en' ] if !$found; + } + } + } - } - # squirrel away movie qualifier so its first on the list of replacements - my @categories; - push(@categories, [$self->{categories}->{$idInfo->{qualifier}}, 'en']); - if ( !defined($self->{categories}->{$idInfo->{qualifier}}) ) { - die "how did we get here with an invalid qualifier '$idInfo->{qualifier}'"; - } - - my $details=$self->getMovieIdDetails($idInfo->{id}); - if ( $details->{noDetails} ) { - # we don't have any details on this movie - } - else { - # add directors list - if ( $self->{updateDirectors} && defined($details->{directors}) ) { - # only update directors if we have exactly one or if - # its a movie of some kind, add more than one. - if ( scalar(@{$details->{directors}}) == 1 || - $idInfo->{qualifier} eq "movie" || - $idInfo->{qualifier} eq "video_movie" || - $idInfo->{qualifier} eq "tv_movie" ) { - - if ( $self->{replaceDirectors} ) { - if ( defined($prog->{credits}->{director}) ) { - $self->debug("replacing director(s)"); - delete($prog->{credits}->{director}); - } - } - - my @list; - # add top 3 billing directors list form www.imdb.com - for my $name (splice(@{$details->{directors}},0,3)) { - push(@list, $name); - } - - # preserve all existing directors listed if we did't already have them. - if ( defined($prog->{credits}->{director}) ) { - for my $name (@{$prog->{credits}->{director}}) { - my $found=0; - for(@list) { - if ( lc eq lc($name) ) { - $found=1; - } - } - if ( !$found ) { - push(@list, $name); - } - } - } - $prog->{credits}->{director}=\@list; - } - else { - $self->debug("not adding 'director' field to $idInfo->{qualifier} \"$title\""); - } - } - - if ( $self->{updateActors} && defined($details->{actors}) ) { - if ( $self->{replaceActors} ) { - if ( defined($prog->{credits}->{actor}) ) { - $self->debug("replacing actor(s) on $idInfo->{qualifier} \"$idInfo->{key}\""); - delete($prog->{credits}->{actor}); - } - } - - my @list; - # add top billing actors (default = 3) from www.imdb.com - for my $name (splice(@{$details->{actors}},0,$self->{numActors})) { - push(@list, $name); - } - # preserve all existing actors listed if we did't already have them. - if ( defined($prog->{credits}->{actor}) ) { - for my $name (@{$prog->{credits}->{actor}}) { - my $found=0; - for(@list) { - if ( lc eq lc($name) ) { - $found=1; - } - } - if ( !$found ) { - push(@list, $name); - } - } - } - $prog->{credits}->{actor}=\@list; - } - - if ( $self->{updatePresentors} && defined($details->{presenter}) ) { - if ( $self->{replacePresentors} ) { - if ( defined($prog->{credits}->{presenter}) ) { - $self->debug("replacing presentor"); - delete($prog->{credits}->{presenter}); - } - } - $prog->{credits}->{presenter}=$details->{presenter}; - } - if ( $self->{updateCommentators} && defined($details->{commentator}) ) { - if ( $self->{replaceCommentators} ) { - if ( defined($prog->{credits}->{commentator}) ) { - $self->debug("replacing commentator"); - delete($prog->{credits}->{commentator}); - } - } - $prog->{credits}->{commentator}=$details->{commentator}; - } - - # push genres as categories - if ( $self->{updateCategoriesWithGenres} ) { - if ( defined($details->{genres}) ) { - for (@{$details->{genres}}) { - push(@categories, [$_, 'en']); - } - } - } - - if ( $self->{updateStarRatings} && defined($details->{ratingRank}) ) { - if ( $self->{replaceStarRatings} ) { - if ( defined($prog->{'star-rating'}) ) { - $self->debug("replacing 'star-rating'"); - delete($prog->{'star-rating'}); - } - unshift( @{$prog->{'star-rating'}}, [ $details->{ratingRank} . "/10", 'IMDB User Rating' ] ); - } - else { - # add IMDB User Rating in front of all other star-ratings - unshift( @{$prog->{'star-rating'}}, [ $details->{ratingRank} . "/10", 'IMDB User Rating' ] ); - } - } - - if ( $self->{updateKeywords} ) { - my @keywords; - if ( defined($details->{keywords}) ) { - for (@{$details->{keywords}}) { - push(@keywords, [$_, 'en']); - } - } - - if ( $self->{replaceKeywords} ) { - if ( defined($prog->{keywords}) ) { - $self->debug("replacing (all) 'keywords'"); - delete($prog->{keywords}); - } - } - if ( defined($prog->{keyword}) ) { - for my $value (@{$prog->{keyword}}) { - my $found=0; - for my $k (@keywords) { - if ( lc($k->[0]) eq lc($value->[0]) ) { - $found=1; - } - } - if ( !$found ) { - push(@keywords, $value); - } - } - } - $prog->{keyword}=\@keywords; - } - - if ( $self->{updatePlot} ) { - # plot is held as a entity - # if 'replacePlot' then delete all existing entities and add new - # else add this plot as an additional entity - # - if ( $self->{replacePlot} ) { - if ( defined($prog->{desc}) ) { - $self->debug("replacing (all) 'desc'"); - delete($prog->{desc}); - } - } - if ( defined($details->{plot}) ) { - # check it's not already there - my $found = 0; - for my $_desc ( @{$prog->{desc}} ) { - $found = 1 if ( @{$_desc}[0] eq $details->{plot} ); - } - push @{$prog->{desc}}, [ $details->{plot}, 'en' ] if !$found; - } - } - - } - - if ( $self->{updateCategories} ) { - if ( $self->{replaceCategories} ) { - if ( defined($prog->{category}) ) { - $self->debug("replacing (all) 'category'"); - delete($prog->{category}); - } - } - if ( defined($prog->{category}) ) { - for my $value (@{$prog->{category}}) { - my $found=0; - #print "checking category $value->[0] with $mycategory\n"; - for my $c (@categories) { - if ( lc($c->[0]) eq lc($value->[0]) ) { - $found=1; - } - } - if ( !$found ) { - push(@categories, $value); + if ( $self->{updateCategories} ) { + if ( $self->{replaceCategories} ) { + if ( defined($prog->{category}) ) { + $self->debug("replacing (all) 'category'"); + delete($prog->{category}); + } + } + if ( defined($prog->{category}) ) { + for my $value (@{$prog->{category}}) { + my $found=0; + #print "checking category $value->[0] with $mycategory\n"; + for my $c (@categories) { + if ( lc($c->[0]) eq lc($value->[0]) ) { + $found=1; + } + } + if ( !$found ) { + push(@categories, $value); + } + } } - } + $prog->{category}=\@categories; } - $prog->{category}=\@categories; - } - return($prog); + return($prog); } sub augmentProgram($$$) { - my ($self, $prog, $movies_only)=@_; + my ($self, $prog, $movies_only)=@_; - $self->{stats}->{programCount}++; + $self->{stats}->{programCount}++; - # assume first title in first language is the one we want. - my $title=$prog->{title}->[0]->[0]; + # assume first title in first language is the one we want. + my $title=$prog->{title}->[0]->[0]; - if ( defined($prog->{date}) && $prog->{date}=~m/^\d\d\d\d$/o ) { + if ( defined($prog->{date}) && $prog->{date}=~m/^\d\d\d\d$/o ) { - # for programs with dates we try: - # - exact matches on movies - # - exact matches on tv series - # - close matches on movies - my $id=$self->findMovieInfo($title, $prog->{date}, 1); # exact match - if ( !defined($id) ) { - $id=$self->findTVSeriesInfo($title); - if ( !defined($id) ) { - $id=$self->findMovieInfo($title, $prog->{date}, 0); # close match - } - } - if ( defined($id) ) { - $self->{stats}->{$id->{matchLevel}."Matches"}++; - $self->{stats}->{$id->{matchLevel}}->{$id->{qualifier}}++; - return($self->applyFound($prog, $id)); + # for programs with dates we try: + # - exact matches on movies + # - exact matches on tv series + # - close matches on movies + my ($id, $matchcount) = $self->findMovieInfo($title, $prog->{date}, 1); # exact match + if (defined $matchcount && $matchcount > 1) { + $self->status("failed to find a sole match for movie \"$title ($prog->{date})\""); + return(undef); + } + if ( !defined($id) ) { + $id=$self->findTVSeriesInfo($title); + if ( !defined($id) ) { + ($id, $matchcount) = $self->findMovieInfo($title, $prog->{date}, 0); # close match + } + } + if ( defined($id) ) { + $self->{stats}->{$id->{matchLevel}."Matches"}++; + $self->{stats}->{$id->{matchLevel}}->{$id->{qualifier}}++; + return($self->applyFound($prog, $id)); + } + $self->status("failed to find a match for movie \"$title ($prog->{date})\""); + return(undef); + # fall through and try again as a tv series + } + + if ( !$movies_only ) { + my $id=$self->findTVSeriesInfo($title); + if ( defined($id) ) { + $self->{stats}->{$id->{matchLevel}."Matches"}++; + $self->{stats}->{$id->{matchLevel}}->{$id->{qualifier}}++; + return($self->applyFound($prog, $id)); + } + + if ( 0 ) { + # this has hard to support 'close' results, unless we know + # for certain we're looking for a movie (ie duration etc) + # this is a bad idea. + my ($id, $matchcount) = $self->findMovieInfo($title, undef, 2); # any title match + if ( defined($id) ) { + $self->{stats}->{$id->{matchLevel}."Matches"}++; + $self->{stats}->{$id->{matchLevel}}->{$id->{qualifier}}++; + return($self->applyFound($prog, $id)); + } + } + $self->status("failed to find a match for show \"$title\""); } - $self->status("failed to find a match for movie \"$title ($prog->{date})\""); return(undef); - # fall through and try again as a tv series - } - - if ( !$movies_only ) { - my $id=$self->findTVSeriesInfo($title); - if ( defined($id) ) { - $self->{stats}->{$id->{matchLevel}."Matches"}++; - $self->{stats}->{$id->{matchLevel}}->{$id->{qualifier}}++; - return($self->applyFound($prog, $id)); - } - - if ( 0 ) { - # this has hard to support 'close' results, unless we know - # for certain we're looking for a movie (ie duration etc) - # this is a bad idea. - my $id=$self->findMovieInfo($title, undef, 2); # any title match - if ( defined($id) ) { - $self->{stats}->{$id->{matchLevel}."Matches"}++; - $self->{stats}->{$id->{matchLevel}}->{$id->{qualifier}}++; - return($self->applyFound($prog, $id)); - } - } - $self->status("failed to find a match for show \"$title\""); - } - return(undef); } # # todo - add in stats on other things added (urls ?, actors, directors,categories) -# separate out from what was added or updated +# separate out from what was added or updated # sub getStatsLines($) { - my $self=shift; - my $totalChannelsParsed=shift; + my $self=shift; + my $totalChannelsParsed=shift; - my $endTime=time(); - my %stats=%{$self->{stats}}; + my $endTime=time(); + my %stats=%{$self->{stats}}; - my $ret=sprintf("Checked %d programs, on %d channels\n", $stats{programCount}, $totalChannelsParsed); + my $ret=sprintf("Checked %d programs, on %d channels\n", $stats{programCount}, $totalChannelsParsed); - for my $cat (sort keys %{$self->{categories}}) { - $ret.=sprintf(" found %d %s titles", $stats{perfect}->{$cat}+$stats{close}->{$cat}, - $self->{categories}->{$cat}); - if ( $stats{close}->{$cat} != 0 ) { - if ( $stats{close}->{$cat} == 1 ) { - $ret.=sprintf(" (%d was not perfect)", $stats{close}->{$cat}); - } - else { - $ret.=sprintf(" (%d were not perfect)", $stats{close}->{$cat}); - } + for my $cat (sort keys %{$self->{categories}}) { + $ret.=sprintf(" found %d %s titles", $stats{perfect}->{$cat}+$stats{close}->{$cat}, + $self->{categories}->{$cat}); + if ( $stats{close}->{$cat} != 0 ) { + if ( $stats{close}->{$cat} == 1 ) { + $ret.=sprintf(" (%d was not perfect)", $stats{close}->{$cat}); + } + else { + $ret.=sprintf(" (%d were not perfect)", $stats{close}->{$cat}); + } + } + $ret.="\n"; } - $ret.="\n"; - } - $ret.=sprintf(" augmented %.2f%% of the programs, parsing %.2f programs/sec\n", + $ret.=sprintf(" augmented %.2f%% of the programs, parsing %.2f programs/sec\n", ($stats{programCount}!=0)?(($stats{perfectMatches}+$stats{closeMatches})*100)/$stats{programCount}:0, ($endTime!=$stats{startTime} && $stats{programCount} != 0)? $stats{programCount}/($endTime-$stats{startTime}):0); - return($ret); + return($ret); } 1; @@ -1328,24 +1355,37 @@ # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ package XMLTV::IMDB::Crunch; + use LWP; +use XMLTV::Gunzip; +use IO::File; + +# is system sort available? +use constant HAS_SYSTEMSORT => ($^O=~'linux|cygwin|MSWin32'); + +# is File::Sort available? +use constant HAS_FILESORT => defined eval { require File::Sort }; use open ':encoding(iso-8859-1)'; # try to enforce file encoding (does this work in Perl <5.8.1? ) # Use Term::ProgressBar if installed. use constant Have_bar => eval { - require Term::ProgressBar; - $Term::ProgressBar::VERSION >= 2; + require Term::ProgressBar; + $Term::ProgressBar::VERSION >= 2; }; +my $VERSION = '0.11'; # version number of database + +my %titlehash = (); + # # This package parses and manages to index imdb plain text files from # ftp.imdb.com/interfaces. (see http://www.imdb.com/interfaces for # details) # # I might, given time build a download manager that: -# - downloads the latest plain text files -# - understands how to download each week's diffs and apply them +# - downloads the latest plain text files +# - understands how to download each week's diffs and apply them # Currently, the 'downloadMissingFiles' flag in the hash of attributes # passed triggers a simple-minded downloader. # @@ -1354,130 +1394,143 @@ # the imdb file formats. # +# [honir] 2020-12-27 An undocumented option --sample n will fetch only n records from each IMDb data file +# Note the output will not be valid (since the n records will not cross-reference from the different files) +# it's simply a way to avoid having to process all 4.5 million titles when you are debugging! + + sub new { - my ($type) = shift; - my $self={ @_ }; # remaining args become attributes - for ($self->{downloadMissingFiles}) { - $_=0 if not defined; # default - } + my ($type) = shift; + my $self={ @_ }; # remaining args become attributes + for ($self->{downloadMissingFiles}) { + $_=0 if not defined; # default + } - for ('imdbDir', 'verbose') { - die "invalid usage - no $_" if ( !defined($self->{$_})); - } + for ('imdbDir', 'verbose') { + die "invalid usage - no $_" if ( !defined($self->{$_})); + } - $self->{stageLast} = 9; # set the final stage in the build - i.e. the one which builds the final database - $self->{stages} = { 1=>'movies', 2=>'directors', 3=>'actors', 4=>'actresses', 5=>'genres', 6=>'ratings', 7=>'keywords', 8=>'plot' }; - $self->{optionalStages} = { 'keywords' => 7, 'plot' => 8 }; # list of optional stages - no need to download files for these - - $self->{moviedbIndex}="$self->{imdbDir}/moviedb.idx"; - $self->{moviedbData}="$self->{imdbDir}/moviedb.dat"; - $self->{moviedbInfo}="$self->{imdbDir}/moviedb.info"; - $self->{moviedbOffline}="$self->{imdbDir}/moviedb.offline"; - - # only leave progress bar on if its available - if ( !Have_bar ) { - $self->{showProgressBar}=0; - } - - bless($self, $type); - - if ( $self->{stageToRun} ne $self->{stageLast} ) { - # unless this is the last stage, check we have the necessary files - return(undef) if ( $self->checkFiles() != 0 ); - } + $self->{stageLast} = 9; # set the final stage in the build - i.e. the one which builds the final database + $self->{stages} = { 1=>'movies', 2=>'directors', 3=>'actors', 4=>'actresses', 5=>'genres', 6=>'ratings', 7=>'keywords', 8=>'plot' }; + $self->{optionalStages} = { 'keywords' => 7, 'plot' => 8 }; # list of optional stages - no need to download files for these - return($self); + $self->{moviedbIndex}="$self->{imdbDir}/moviedb.idx"; + $self->{moviedbData}="$self->{imdbDir}/moviedb.dat"; + $self->{moviedbInfo}="$self->{imdbDir}/moviedb.info"; + $self->{moviedbOffline}="$self->{imdbDir}/moviedb.offline"; + + # only leave progress bar on if its available + if ( !Have_bar ) { + $self->{showProgressBar}=0; + } + + bless($self, $type); + + if ( $self->{filesort} && !( HAS_FILESORT || HAS_SYSTEMSORT ) ) { + $self->error("filesort requested but not available"); + return(undef); + } + $self->{usefilesort} = ( (HAS_FILESORT || HAS_SYSTEMSORT) && $self->{filesort} ); # --filesort => 1 --nofilesort => 0 + $self->{usesystemsort} = ( HAS_SYSTEMSORT && $self->{filesort} && $self->{systemsort}); # use linux sort in preference to File::Sort as it is sooo much faster on big files + + if ( $self->{stageToRun} ne $self->{stageLast} ) { + # unless this is the last stage, check we have the necessary files + return(undef) if ( $self->checkFiles() != 0 ); + } + + return($self); } sub checkFiles () { - my ($self)=@_; + my ($self)=@_; - if ( ! -d "$self->{imdbDir}" ) { - if ( $self->{downloadMissingFiles} ) { - warn "creating directory $self->{imdbDir}\n"; - mkdir $self->{imdbDir}, 0777 - or die "cannot mkdir $self->{imdbDir}: $!"; + if ( ! -d "$self->{imdbDir}" ) { + if ( $self->{downloadMissingFiles} ) { + warn "creating directory $self->{imdbDir}\n"; + mkdir $self->{imdbDir}, 0777 + or die "cannot mkdir $self->{imdbDir}: $!"; + } + else { + die "$self->{imdbDir}:does not exist"; + } } - else { - die "$self->{imdbDir}:does not exist"; + my $listsDir = "$self->{imdbDir}/lists"; + if ( ! -d $listsDir ) { + mkdir $listsDir, 0777 or die "cannot mkdir $listsDir: $!"; } - } - my $listsDir = "$self->{imdbDir}/lists"; - if ( ! -d $listsDir ) { - mkdir $listsDir, 0777 or die "cannot mkdir $listsDir: $!"; - } CHECK_FILES: - my %missingListFiles; # maps 'movies' to filename ...movies.gz + my %missingListFiles; # maps 'movies' to filename ...movies.gz - FILES_CHECK: - while ( my( $key, $value ) = each %{ $self->{stages} } ) { - # don't check *all* files - only the ones we are crunching - next FILES_CHECK if ( lc($self->{stageToRun}) ne 'all' && $key != int($self->{stageToRun}) ); - my $file=$value; - my $filename="$listsDir/$file.list"; - my $filenameGz="$filename.gz"; - my $filenameExists = -f $filename; - my $filenameSize = -s $filename; - my $filenameGzExists = -f $filenameGz; - my $filenameGzSize = -s $filenameGz; - - if ( $filenameExists and not $filenameSize ) { - warn "removing zero-length $filename\n"; - unlink $filename or die "cannot unlink $filename: $!"; - $filenameExists = 0; - } - if ( $filenameGzExists and not $filenameGzSize ) { - warn "removing zero-length $filenameGz\n"; - unlink $filenameGz or die "cannot unlink $filenameGz: $!"; - $filenameGzExists = 0; - } - - if ( not $filenameExists and not $filenameGzExists ) { - # Just report one of the filenames, keep the message simple. - warn "$filenameGz does not exist\n"; - if ( $self->{optionalStages}{$file} ) { - warn "$file will not be added to database\n"; - } else { - $missingListFiles{$file}=$filenameGz; - } - } - elsif ( not $filenameExists and $filenameGzExists ) { - $self->{imdbListFiles}->{$file}=$filenameGz; - } - elsif ( $filenameExists and not $filenameGzExists ) { - $self->{imdbListFiles}->{$file}=$filename; - } - elsif ( $filenameExists and $filenameGzExists ) { - die "both $filename and $filenameGz exist, remove one of them\n"; - } - else { die } - } - if ( $self->{downloadMissingFiles} ) { - my $baseUrl = 'ftp://ftp.fu-berlin.de/pub/misc/movies/database/frozendata'; - foreach ( sort keys %missingListFiles ) { - my $url = "$baseUrl/$_.list.gz"; - my $filename = delete $missingListFiles{$_}; - my $partial = "$filename.partial"; - if (-e $partial) { - if (not -s $partial) { - print STDERR "removing empty $partial\n"; - unlink $partial or die "cannot unlink $partial: $!"; + FILES_CHECK: + while ( my( $key, $value ) = each %{ $self->{stages} } ) { + # don't check *all* files - only the ones we are crunching + next FILES_CHECK if ( lc($self->{stageToRun}) ne 'all' && $key != int($self->{stageToRun}) ); + my $file=$value; + my $filename="$listsDir/$file.list"; + my $filenameGz="$filename.gz"; + my $filenameExists = -f $filename; + my $filenameSize = -s $filename; + my $filenameGzExists = -f $filenameGz; + my $filenameGzSize = -s $filenameGz; + + if ( $filenameExists and not $filenameSize ) { + warn "removing zero-length $filename\n"; + unlink $filename or die "cannot unlink $filename: $!"; + $filenameExists = 0; + } + if ( $filenameGzExists and not $filenameGzSize ) { + warn "removing zero-length $filenameGz\n"; + unlink $filenameGz or die "cannot unlink $filenameGz: $!"; + $filenameGzExists = 0; + } + + if ( not $filenameExists and not $filenameGzExists ) { + # Just report one of the filenames, keep the message simple. + warn "$filenameGz does not exist\n"; + if ( $self->{optionalStages}{$file} && lc($self->{stageToRun}) eq 'all' ) { + warn "$file will not be added to database\n"; + } else { + $missingListFiles{$file}=$filenameGz; + } } - else { - die < by hand. + elsif ( not $filenameExists and $filenameGzExists ) { + $self->{imdbListFiles}->{$file}=$filenameGz; + } + elsif ( $filenameExists and not $filenameGzExists ) { + $self->{imdbListFiles}->{$file}=$filename; + } + elsif ( $filenameExists and $filenameGzExists ) { + die "both $filename and $filenameGz exist, remove one of them\n"; + } + else { die } + } + + if ( $self->{downloadMissingFiles} ) { + my $baseUrl = 'ftp://ftp.fu-berlin.de/pub/misc/movies/database/frozendata'; + foreach ( sort keys %missingListFiles ) { + my $url = "$baseUrl/$_.list.gz"; + my $filename = delete $missingListFiles{$_}; + my $partial = "$filename.partial"; + if (-e $partial) { + if (not -s $partial) { + print STDERR "removing empty $partial\n"; + unlink $partial or die "cannot unlink $partial: $!"; + } + else { + die < by hand. END ; - } - } + } + } - print STDERR <. With a slow network link this could fail; it might be better to download the file by hand and save it as @@ -1485,1953 +1538,2241 @@ END ; - # For downloading we use LWP - # - my $ua = LWP::UserAgent->new(); - $ua->env_proxy(); - $ua->show_progress(1); - - my $req = HTTP::Request->new(GET => $url); - $req->authorization_basic('anonymous', 'tv_imdb'); - - my $resp = $ua->request($req, $filename); - my $got_size = -s $filename; - if (defined $resp and $resp->is_success ) { - die if not $got_size; - print STDERR "<$url>\n\t-> $filename, success\n\n"; - } - else { - my $msg = "failed to download $url to $filename"; - $msg .= ", http response code: ".$resp->status_line if defined $resp; - warn $msg; - if ($got_size) { - warn "renaming $filename -> $partial\n"; - rename $filename, $partial - or die "cannot rename $filename to $partial: $!"; - warn "You might try continuing the download of <$url> manually.\n"; - } - exit(1); - } - } - $self->{downloadMissingFiles} = 0; - goto CHECK_FILES; - } - - if ( %missingListFiles ) { - print STDERR "tv_imdb: requires you to download the above files from ftp.imdb.com\n"; - print STDERR " see http://www.imdb.com/interfaces for details\n"; - print STDERR " or try the --download option\n"; - #return(undef); - return 1; - } - - return 0; + # For downloading we use LWP + # + my $ua = LWP::UserAgent->new(); + $ua->env_proxy(); + $ua->show_progress(1); + + my $req = HTTP::Request->new(GET => $url); + $req->authorization_basic('anonymous', 'tv_imdb'); + + my $resp = $ua->request($req, $filename); + my $got_size = -s $filename; + if (defined $resp and $resp->is_success ) { + die if not $got_size; + print STDERR "<$url>\n\t-> $filename, success\n\n"; + } + else { + my $msg = "failed to download $url to $filename"; + $msg .= ", http response code: ".$resp->status_line if defined $resp; + warn $msg; + if ($got_size) { + warn "renaming $filename -> $partial\n"; + rename $filename, $partial + or die "cannot rename $filename to $partial: $!"; + warn "You might try continuing the download of <$url> manually.\n"; + } + exit(1); + } + } + + $self->{downloadMissingFiles} = 0; + goto CHECK_FILES; + } + + if ( %missingListFiles ) { + print STDERR "tv_imdb: requires you to download the above files from ftp.fu-berlin.de \n"; + #print STDERR " see http://www.imdb.com/interfaces for details\n"; + print STDERR " or try the --download option\n"; + #return(undef); + return 1; + } + + return 0; +} + +sub sortfile ($$$) { + my ($self, $stage, $file)=@_; + + # file already written : sort it using (1) system sort command, or (2) File::Sort package + + my $f=$file; + my $st = time; + my $res; + + if ($self->{usesystemsort}) { # use shell sort if we can (much faster on big files) + $self->status("using system sort on stage $stage"); + + # which OS are we on? + if ($^O=~'linux|cygwin') { # TODO: untested on cygwin + if ($stage == 1) { + $res = system( "sort", "-t", "\t", qw(-k 1 -o), "$f.sorted", "$f" ); + } else { + $res = system( "sort", qw(-t : -k 1n -o), "$f.sorted", "$f" ); + } + if ($? == -1) { $self->error("failed to execute: $! \n"); } + elsif ( $? & 127 || $? & 128 ) { $self->error("system call died with signal %d \n"); } + else { $res = $? >> 8; } + $res = 1 if $res == 0; # successful call returns 0 in $? + + } elsif ($^O=~'MSWin32') { # TODO: untested on Windows + $res = system( "sort", "/O ", "$f.sorted", "$f"); + $res = 1 if $res == 0; # successful call returns 0 in $? + } + + } else { + $self->status("using filesort on stage $stage (this might take up to 1 hour)"); + if ($stage == 1) { + $res = File::Sort::sort_file({ t =>"\t", k=>'1', y=>200000, I=>"$f", o=>"$f.sorted" }); + } else { + $res = File::Sort::sort_file({ t =>':', k=>'1n', y=>200000, I=>"$f", o=>"$f.sorted" }); + } + } + + $self->status("sorting took ".(int(((time - $st)/60)*10)/10)." minutes") if (time - $st > 60); + + if (!$res) { + die "Filesort failed on $f"; + } else { + unlink($f); + rename "$f.sorted", $f or die "Cannot rename file: $!"; + } + + return($res); } sub redirect($$) { - my ($self, $file)=@_; + my ($self, $file)=@_; - if ( defined($file) ) { - if ( !open($self->{logfd}, "> $file") ) { - print STDERR "$file:$!\n"; - return(0); - } - $self->{errorCountInLog}=0; - } - else { - close($self->{logfd}); - $self->{logfd}=undef; - } - return(1); + if ( defined($file) ) { + if ( !open($self->{logfd}, "> $file") ) { + print STDERR "$file:$!\n"; + return(0); + } + $self->{errorCountInLog}=0; + } + else { + close($self->{logfd}); + $self->{logfd}=undef; + } + return(1); } sub error($$) { - my $self=shift; - if ( defined($self->{logfd}) ) { - print {$self->{logfd}} $_[0]."\n"; - $self->{errorCountInLog}++; - } - else { - print STDERR $_[0]."\n"; - } + my $self=shift; + if ( defined($self->{logfd}) ) { + print {$self->{logfd}} $_[0]."\n"; + $self->{errorCountInLog}++; + } + else { + print STDERR $_[0]."\n"; + } } sub status($$) { - my $self=shift; + my $self=shift; - if ( $self->{verbose} ) { - print STDERR $_[0]."\n"; - } + if ( $self->{verbose} ) { + print STDERR $_[0]."\n"; + } } sub withThousands ($) { - my ($val) = @_; - $val =~ s/(\d{1,3}?)(?=(\d{3})+$)/$1,/g; - return $val; + my ($val) = @_; + $val =~ s/(\d{1,3}?)(?=(\d{3})+$)/$1,/g; + return $val; } -use XMLTV::Gunzip; -use IO::File; - sub openMaybeGunzip($) { - for ( shift ) { - return gunzip_open($_) if m/\.gz$/; - return new IO::File("< $_"); - } + for ( shift ) { + return gunzip_open($_) if m/\.gz$/; + return new IO::File("< $_"); + } } sub closeMaybeGunzip($$) { - if ( $_[0]=~m/\.gz$/o ) { - # Would close($fh) but that causes segfaults on my system. - # Investigating, but in the meantime just leave it open. - # - #return gunzip_close($_[1]); - } + if ( $_[0]=~m/\.gz$/o ) { + # Would close($fh) but that causes segfaults on my system. + # Investigating, but in the meantime just leave it open. + # + #return gunzip_close($_[1]); + } - # Apparently this can also segfault (wtf?). - #return close($_[1]); + # Apparently this can also segfault (wtf?). + #return close($_[1]); } -sub readMoviesOrGenres($$$$) +sub beginProgressBar($$$) { - my ($self, $whichMoviesOrGenres, $countEstimate, $file)=@_; - my $startTime=time(); - my $header; - my $whatAreWeParsing; - my $lineCount=0; - - if ( $whichMoviesOrGenres eq "Movies" ) { - $header="MOVIES LIST"; - $whatAreWeParsing=1; - } - elsif ( $whichMoviesOrGenres eq "Genres" ) { - $header="8: THE GENRES LIST"; - $whatAreWeParsing=2; - } - my $fh = openMaybeGunzip($file) || return(-2); - while(<$fh>) { - $lineCount++; - if ( m/^$header/ ) { - if ( !($_=<$fh>) || !m/^===========/o ) { - $self->error("missing ======= after $header at line $lineCount"); - closeMaybeGunzip($file, $fh); - return(-1); - } - if ( !($_=<$fh>) || !m/^\s*$/o ) { - $self->error("missing empty line after ======= at line $lineCount"); - closeMaybeGunzip($file, $fh); - return(-1); - } - last; - } - elsif ( $lineCount > 1000 ) { - $self->error("$file: stopping at line $lineCount, didn't see \"$header\" line"); - closeMaybeGunzip($file, $fh); - return(-1); - } - } - - my $progress=Term::ProgressBar->new({name => "parsing $whichMoviesOrGenres", - count => $countEstimate, - ETA => 'linear'}) - if ( $self->{showProgressBar} ); - - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - - my $count=0; - while(<$fh>) { - $lineCount++; - my $line=$_; - #print "read line $lineCount:$line\n"; - - # end is line consisting of only '-' - last if ( $line=~m/^\-\-\-\-\-\-\-+/o ); - - $line=~s/\n$//o; - - my $tab=index($line, "\t"); - if ( $tab != -1 ) { - my $mkey=substr($line, 0, $tab); - - next if ($mkey=~m/\s*\{\{SUSPENDED\}\}/o); - - if ( $whatAreWeParsing == 2 ) { - # don't see what these are...? - # ignore {{SUSPENDED}} - $mkey=~s/\s*\{\{SUSPENDED\}\}//o; - - # ignore {Twelve Angry Men (1954)} - $mkey=~s/\s*\{[^\}]+\}//go; - - # skip enties that have {} in them since they're tv episodes - #next if ( $mkey=~s/\s*\{[^\}]+\}$//o ); - - my $genre=substr($line, $tab); - - # genres sometimes has more than one tab - $genre=~s/^\t+//og; - if ( defined($self->{movies}{$mkey}) ) { - $self->{movies}{$mkey}.="|".$genre; - } - else { - $self->{movies}{$mkey}=$genre; - # returned count is number of unique titles found - $count++; - } - } - else { - push(@{$self->{movies}}, $mkey); - # returned count is number of titles found - $count++; - } + my ($self, $what, $countEstimate)=@_; + print STDERR $what.' '.$countEstimate; + if ($self->{showProgressBar}) { + $self->{progress} = Term::ProgressBar->new({name => "$what", + count => $countEstimate*1.01, + ETA => 'linear'}); + $self->{progress}->minor(0) if ($self->{showProgressBar}); + $self->{progress}->max_update_rate(1) if ($self->{showProgressBar}); + $self->{count_estimate} = $countEstimate; + $self->{next_update} = 0; + } +} - if ( $self->{showProgressBar} ) { +sub updateProgressBar($$$) +{ + my ($self, $what, $count)=@_; + + if ( $self->{showProgressBar} ) { # re-adjust target so progress bar doesn't seem too wonky - if ( $count > $countEstimate ) { - $countEstimate = $progress->target($count+1000); - $next_update=$progress->update($count); + if ( $count > $self->{count_estimate} ) { + $self->{count_estimate} = $self->{progress}->target($count*1.05); + $self->{next_update} = $self->{progress}->update($count); } - elsif ( $count > $next_update ) { - $next_update=$progress->update($count); + elsif ( $count > $self->{next_update} ) { + $self->{next_update} = $self->{progress}->update($count); } - } + } +} + +sub endProgressBar($$$) +{ + my ($self, $what, $count)=@_; + + if ( $self->{showProgressBar} ) { + $self->{progress}->update($self->{count_estimate}); + } +} + +sub makeTitleKey($$) +{ + # make a unique key for each prog title. Also determine the prog type. + + # some edge cases we need to handle: + # 1] multiple titles with same year, e.g. + # '83 (2017/I) + # '83 (2017/II) + # + # 2] multiple films with same year but different type, e.g. + # Journey to the Center of the Earth (2008) # cinema release + # Journey to the Center of the Earth (2008) (TV) # TV movie + # Journey to the Center of the Earth (2008) (V) # straight to video + # + # 3] tv series and film with same year, e.g. + # "Ashes to Ashes" (2008) # tv series + # Ashes to Ashes (2008) # movie + # + # 4] titles without a year, e.g. + # California Cornflakes (????) + # Zed (????/II) + # + # 5] titles including alternatiove title, e.g. + # Family Prayers (aka Karim & Suha) (2010) + # + + my ($self, $progtitle)=@_; + + # tidy the film title, and extract the prog type + # + my $dbkey = $progtitle; + my $progtype; + + # drop episode information - ex: "Supernatural" (2005) {A Very Supernatural Christmas (#3.8)} + my $isepisode = $dbkey=~s/\s*\{[^\}]+\}//go; + + # remove 'aka' details from prog-title + $dbkey =~ s/\s*\((?:aka|as) ([^\)]+)\)//o; + + # todo - this would make things easier + # change double-quotes around title to be (made-for-tv) suffix instead + if ( $dbkey=~m/^\"/o && #" + $dbkey=~m/\"\s*\(/o ) { #" + $dbkey.=" (tv_series)"; + $progtype=4; + } + # how rude, some entries have (TV) appearing more than once. + $dbkey=~s/\(TV\)\s*\(TV\)$/(TV)/o; + + my $qualifier; + if ( $dbkey=~m/\s+\(TV\)$/ ) { # don't strip from title - it's considered part of the title: so we need it for matching against other source files + $qualifier="tv_movie"; + $progtype=2; + } + elsif ( $dbkey=~m/\s+\(V\)$/ ) { # ditto + $qualifier="video_movie"; + $progtype=3; + } + elsif ( $dbkey=~m/\s+\(VG\)$/ ) { # ditto + $qualifier="video_game"; + $progtype=5; + } + elsif ( $dbkey=~s/\s+\(mini\) \(tv_series\)$// ) { # but strip the rest + $qualifier="tv_mini_series"; + $progtype=4; + } + elsif ( $dbkey=~s/\s+\(tv_series\)$// ) { + $qualifier="tv_series"; + $progtype=4; + } + elsif ( $dbkey=~s/\s+\(mini\)$//o ) { + $qualifier="tv_mini_series"; + $progtype=4; } else { - $self->error("$file:$lineCount: unrecognized format (missing tab)"); - $next_update=$progress->update($count) if ($self->{showProgressBar}); + $qualifier="movie"; + $progtype=1; } - } - $progress->update($countEstimate) if ($self->{showProgressBar}); - $self->status(sprintf("parsing $whichMoviesOrGenres found ".withThousands($count)." titles in ". + + # make a key from the title + # + my $year; my $yearcount; + my $title = $dbkey; + + if ( $title=~m/^\"/o && $title=~m/\"\s*\(/o ) { # remove " marks around title + $title=~s/^\"//o; #" + $title=~s/\"(\s*\()/$1/o; #" + } + + # strip the above progtypes from the hashkey + $title=~s/\s*\((TV|V|VG)\)$//; + + # extract the year from the title + if ( $title=~s/\s+\((\d\d\d\d)\)$//o || + $title=~s/\s+\((\d\d\d\d)\/([IVXL]+)\)$//o ) { + $year=$1; + } + elsif ( $title=~s/\s+\((\?\?\?\?)\)$//o || + $title=~s/\s+\((\?\?\?\?)\/([IVXL]+)\)$//o ) { + $year="0000"; + } + else { + $self->error("movie list format failed to decode year from title '$title'"); + $year="0000"; + } + $title=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/og; # move definite article to front of title + + $title=~s/\t/ /g; # remove tab chars (there shouldn't be any but it will corrupt our data output if we find one) + + my $hashkey=lc("$title ($year)"); # use calculated year to avoid things like "72 Hours (????/I)" + + $hashkey=~s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/oeg; + + #print STDERR "input:$dbkey\n\tdbkey:$hashkey\n\ttitle=$title\n\tyear=$year\n\tcounter=$yearcount\n\tqualifier=$qualifier\n"; + + return ( $hashkey, $dbkey, $year, $yearcount, $qualifier, $progtype, $isepisode ); +} + +sub readMovies($$$$$) +{ + # build %movieshash from movies.list source file + + my ($self, $which, $countEstimate, $file, $stage)=@_; + my $startTime=time(); + my $header; + my $whatAreWeParsing; + my $lineCount=0; + + if ( $which eq "Movies" ) { + $header="MOVIES LIST"; + $whatAreWeParsing=1; + } + + $self->beginProgressBar('parsing '.$which, $countEstimate); + + + #----------------------------------------------------------- + # find the start of the actual data + + my $fh = openMaybeGunzip($file) || return(-2); + while(<$fh>) { + chomp(); + $lineCount++; + if ( m/^$header/ ) { + if ( !($_=<$fh>) || !m/^===========/o ) { + $self->error("missing ======= after $header at line $lineCount"); + closeMaybeGunzip($file, $fh); + return(-1); + } + if ( !($_=<$fh>) || !m/^\s*$/o ) { + $self->error("missing empty line after ======= at line $lineCount"); + closeMaybeGunzip($file, $fh); + return(-1); + } + last; + } + elsif ( $lineCount > 1000 ) { # didn't find the header within the first 1000 lines in the file! (wrong file? file corrupt? data changed?) + $self->error("$file: stopping at line $lineCount, didn't see \"$header\" line"); + closeMaybeGunzip($file, $fh); + return(-1); + } + } + + + + #----------------------------------------------------------- + # read the movies data, and create the db IDX file (as a temporary file called stage1.data) + # input data are "film-name year" separated by one or more tabs + # Army of Darkness (1992) 1992 + + my $count=0; my $countout=0; + while(<$fh>) { + chomp(); + $lineCount++; + my $line=$_; + next if ( length($line) == 0 ); + last if ( $self->{sample} != 0 && $self->{sample} < $count ); # undocumented option (used in debugging) + #$self->status("read line $lineCount:$line"); + + # end of data is line consisting of only '-' + last if ( $line =~ m/^\-\-\-\-\-\-\-+/o ); + + my $tabstop = index($line, "\t"); # there is always at least one tabstop in the incoming data + if ( $tabstop != -1 ) { + my ($mtitle, $myear) = $line =~ m/^(.*?)\t+(.*)$/; + + next if ($mtitle =~ m/\s*\{\{SUSPENDED\}\}/o); + + # returned count is number of titles found + $count++; + + # compute the data we need for the IDX file + # key title year title id + # + my ($hashkey, $title, $year, $yearcount, $qualifier, $progtype, $isepisode) = $self->makeTitleKey($mtitle); + + # we don't want "video games" + if ($qualifier eq "video_game") { next; } + + # we don't keep episode information TODO: enhancement: change tv_imdb to do episodes? + if ($isepisode == 1) { next; } + + next if ($self->{moviesonly} && ($progtype != 1 && $progtype != 2) ); # user requested movies_only + + + # store the movies data + if ($self->{usefilesort}) { + # if sorting on disc then write the extracted movies data to an interim file + print {$self->{fhdata}} $hashkey."\t".$title."\t".$year."\t".$qualifier."\n"; + + } else { + # store the title in a hash of $key=>{$title} + if ( defined($self->{movieshash}{$hashkey}) ) { # check for duplicates + # + # there's a lot (c. 9,000!) instances of duplicate titles in the movies.list file + # so only report where titles are different + if ( defined $self->{movieshash}{$hashkey}{$title} && $self->{movieshash}{$hashkey}{$title} ne $year."\t".$qualifier ) { # {."\t".$progtype} + $self->error("duplicate moviedb key computed $hashkey - this programme will be ignored $mtitle"); + #$self->error(" ".$self->{movieshash}{$hashkey}{$title}); + next; + } + } + + # the output IDX and DAT files must be sorted by dbkey (because of the way the searching is done) + # so we need to store all the incoming 4 million records and then sort them + # + $self->{movieshash}{$hashkey}{$title} = $year."\t".$qualifier; # we don't currently use the progtype flag so don't print it {."\t".$progtype} + + } + + # return number of titles kept + $countout++; + + $self->updateProgressBar('', $lineCount); + } + else { + $self->error("$file:$lineCount: unrecognized format (missing tab)"); + $self->updateProgressBar('', $lineCount); + } + } + + $self->endProgressBar(); + + $self->status(sprintf("parsing $which found ".withThousands($countout)." titles in ". withThousands($lineCount)." lines in %d seconds",time()-$startTime)); - closeMaybeGunzip($file, $fh); - return($count); + closeMaybeGunzip($file, $fh); + + #----------------------------------------------------------- + return($count, $countout); } -sub readCastOrDirectors($$$) +sub readCastOrDirectors($$$$$) { - my ($self, $whichCastOrDirector, $castCountEstimate, $file)=@_; - my $startTime=time(); + my ($self, $which, $countEstimate, $file, $stage)=@_; + my $startTime=time(); + my $header; + my $whatAreWeParsing; + my $lineCount=0; - my $header; - my $whatAreWeParsing; - my $lineCount=0; - - if ( $whichCastOrDirector eq "Actors" ) { - $header="THE ACTORS LIST"; - $whatAreWeParsing=1; - } - elsif ( $whichCastOrDirector eq "Actresses" ) { - $header="THE ACTRESSES LIST"; - $whatAreWeParsing=2; - } - elsif ( $whichCastOrDirector eq "Directors" ) { - $header="THE DIRECTORS LIST"; - $whatAreWeParsing=3; - } - else { - die "why are we here ?"; - } - - my $fh = openMaybeGunzip($file) || return(-2); - my $progress=Term::ProgressBar->new({name => "parsing $whichCastOrDirector", - count => $castCountEstimate, - ETA => 'linear'}) - if ($self->{showProgressBar}); - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - while(<$fh>) { - $lineCount++; - if ( m/^$header/ ) { - if ( !($_=<$fh>) || !m/^===========/o ) { - $self->error("missing ======= after $header at line $lineCount"); - closeMaybeGunzip($file, $fh); - return(-1); - } - if ( !($_=<$fh>) || !m/^\s*$/o ) { - $self->error("missing empty line after ======= at line $lineCount"); - closeMaybeGunzip($file, $fh); - return(-1); - } - if ( !($_=<$fh>) || !m/^Name\s+Titles\s*$/o ) { - $self->error("missing name/titles line after ======= at line $lineCount"); - closeMaybeGunzip($file, $fh); - return(-1); - } - if ( !($_=<$fh>) || !m/^[\s\-]+$/o ) { - $self->error("missing name/titles suffix line after ======= at line $lineCount"); - closeMaybeGunzip($file, $fh); - return(-1); - } - last; - } - elsif ( $lineCount > 1000 ) { - $self->error("$file: stopping at line $lineCount, didn't see \"$header\" line"); - closeMaybeGunzip($file, $fh); - return(-1); - } - } - - my $cur_name; - my $count=0; - my $castNames=0; - while(<$fh>) { - $lineCount++; - my $line=$_; - $line=~s/\n$//o; - #$self->status("read line $lineCount:$line"); - - # end is line consisting of only '-' - last if ( $line=~m/^\-\-\-\-\-\-\-+/o ); - - next if ( length($line) == 0 ); - - if ( $line=~s/^([^\t]+)\t+//o ) { - $cur_name=$1; - $castNames++; - - if ( $self->{showProgressBar} ) { - # re-adjust target so progress bar doesn't seem too wonky - if ( $castNames > $castCountEstimate ) { - $castCountEstimate = $progress->target($castNames+100); - $next_update=$progress->update($castNames); - } - elsif ( $castNames > $next_update ) { - $next_update=$progress->update($castNames); - } - } - } - - my $billing; - my $HostNarrator=""; - if ( $whatAreWeParsing < 3 ) { - # actors or actresses - $billing="9999"; - if ( $line=~s/\s*<(\d+)>//o ) { - $billing=sprintf("%04d", int($1)); - } - - if ( (my $start=index($line, " [")) != -1 ) { - #my $end=rindex($line, "]"); - my $ex=substr($line, $start+1); - - if ( $ex=~s/Host//o ) { - if ( length($HostNarrator) ) { - $HostNarrator.=","; - } - $HostNarrator.="Host"; - } - if ( $ex=~s/Narrator//o ) { - if ( length($HostNarrator) ) { - $HostNarrator.=","; - } - $HostNarrator.="Narrator"; - } - $line=substr($line, 0, $start); - # ignore character name - } - } - # try ignoring these - next if ($line=~m/\s*\{\{SUSPENDED\}\}/o); - - # don't see what these are...? - # ignore {{SUSPENDED}} - $line=~s/\s*\{\{SUSPENDED\}\}//o; - - # [honir] this is wrong - this puts cast from all the episodes as though they are in the entire series! - # ##ignore {Twelve Angry Men (1954)} - $line=~s/\s*\{[^\}]+\}//o; - - if ( $whatAreWeParsing < 3 ) { - if ( $line=~s/\s*\(aka ([^\)]+)\).*$//o ) { - # $attr=$1; - } - } - if ( $line=~s/ (\(.*)$//o ) { - # $attrs=$1; - } - $line=~s/^\s+//og; - $line=~s/\s+$//og; - - if ( $whatAreWeParsing < 3 ) { - if ( $line=~s/\s+Narrator$//o ) { - # ignore - } - } - - my $val=$self->{movies}{$line}; - my $name=$cur_name; - if ( length($HostNarrator) ) { - $name.="[$HostNarrator]"; - } - if ( defined($billing) ) { - if ( defined($val) ) { - $self->{movies}{$line}=$val."|$billing:$name"; - } - else { - $self->{movies}{$line}="$billing:$name"; - } + if ( $which eq "Actors" ) { + $header="THE ACTORS LIST"; + $whatAreWeParsing=1; + } + elsif ( $which eq "Actresses" ) { + $header="THE ACTRESSES LIST"; + $whatAreWeParsing=2; + } + elsif ( $which eq "Directors" ) { + $header="THE DIRECTORS LIST"; + $whatAreWeParsing=3; } else { - if ( defined($val) ) { - $self->{movies}{$line}=$val."|$name"; - } - else { - $self->{movies}{$line}=$name; - } - } - $count++; - } - $progress->update($castCountEstimate) if ($self->{showProgressBar}); + die "why are we here ?"; + } + + $self->beginProgressBar('parsing '.$which, $countEstimate); + + # + # note: not all movies end up with a cast, but we include these movies anyway. + # + + #----------------------------------------------------------- + # find the start of the actual data + + my $fh = openMaybeGunzip($file) || return(-2); + while(<$fh>) { + chomp(); + $lineCount++; + if ( m/^$header/ ) { + if ( !($_=<$fh>) || !m/^===========/o ) { + $self->error("missing ======= after $header at line $lineCount"); + closeMaybeGunzip($file, $fh); + return(-1); + } + if ( !($_=<$fh>) || !m/^\s*$/o ) { + $self->error("missing empty line after ======= at line $lineCount"); + closeMaybeGunzip($file, $fh); + return(-1); + } + if ( !($_=<$fh>) || !m/^Name\s+Titles\s*$/o ) { + $self->error("missing name/titles line after ======= at line $lineCount"); + closeMaybeGunzip($file, $fh); + return(-1); + } + if ( !($_=<$fh>) || !m/^[\s\-]+$/o ) { + $self->error("missing name/titles suffix line after ======= at line $lineCount"); + closeMaybeGunzip($file, $fh); + return(-1); + } + last; + } + elsif ( $lineCount > 1000 ) { + $self->error("$file: stopping at line $lineCount, didn't see \"$header\" line"); + closeMaybeGunzip($file, $fh); + return(-1); + } + } + + + #----------------------------------------------------------- + # read the cast or directors data, and create the stagex.data file + # input data are "person-name film-title" separated by one or more tabs + # Raimi,Sam Army of Darkness (1992) + # person name appears only once for multiple film entries + + my $count=0; + my $countnames=0; + my $cur_name; + while(<$fh>) { + chomp(); + $lineCount++; + my $line=$_; + next if ( length($line) == 0 ); + last if ( $self->{sample} != 0 && $self->{sample} < $count ); # undocumented option (used in debugging) + #$self->status("read line $lineCount:$line"); + + # end is line consisting of only '-' + last if ( $line =~ m/^\-\-\-\-\-\-\-+/o ); + + my $tabstop = index($line, "\t"); # there is always at least one tabstop in the incoming data + if ( $tabstop != -1 ) { + my ($mname, $mtitle) = $line =~ m/^(.*?)\t+(.*)$/; # get person-name (everything up to the first tab) + + next if ($mtitle=~m/\s*\{\{SUSPENDED\}\}/o); + + # skip enties that have {} in them since they're tv episodes + next if ($mtitle=~m/\s*\{[^\}]+\}$/ ); + + # skip "video games" + next if ($mtitle=~m/\s+\(VG\)(\s|$)/ ); + # note may not be end of line e.g. "Ahad, Alex (I) Skullgirls (2012) (VG) (creative director)" + + + # returned count is number of directors found + $count++; + + $mname =~ s/^\s+|\s+$//g; # trim + + # person name appears only on the first record in a group for this person + if ($mname ne '') { + $countnames++; + $cur_name = $mname; + } + + + # Directors' processing + # A. Guggenheim, Sonia After Maiko (2015) (as Sonia Guggenheim) + # Journey (2015/III) (as Sonia Guggenheim) + # A. Solla, Ricardo "7 vidas" (1999) {(#2.37)} + # "7 vidas" (1999) {Atahualpa Yupanqui (#6.20)} + # + + # Actors' processing + # -Gradowska, Kasia Lewandowska Who are the WWP Women? (2015) (V) [Herself] <1> + # 'Rovel' Torres, Crystal "The Tonight Show Starring Jimmy Fallon" (2014) {Ice T/Andrew Rannells/Lupe Fiasco (#2.105)} [Herself - Musical Support] + # 's Gravemade, Nienke A Short Tour & Farewell (2015) + # Tweeduizendseks (2010) (TV) [Yolanda van der Graaf] + # Bennett, Mollie "Before the Snap" (2011) (voice) [Narrator] + # 'Twinkie' Bird, Tracy "Casting Qs" (2010) {An Interview with Tracy 'Twinkie' Byrd (#2.14)} (as Twinkie Byrd) [Herself] + # Abbott, Tasha (I) "Electives" (2018) [Julie] <41> + # + + my $billing; + my $hostnarrator; + if ( $whatAreWeParsing < 3 ) { # actors or actresses + + # extract/strip the billing + $billing="9999"; + if ( $mtitle =~ s/\s*<(\d+)>//o ) { # e.g. <41> + $billing = sprintf("%04d", int($1)); + } + + # extract/strip the role/character + if ( $mtitle =~ s/\s*\[(.*?)\]//o ) { # e.g. [Julie] or [Narrator] + if ( $1 =~ m/(Host|Narrator)/ ) { # also picks up "Hostess", "Co-Host" + $hostnarrator = $1; + } + } + } + - $self->status(sprintf("parsing $whichCastOrDirector found ".withThousands($castNames)." names, ". + #------------------------------------------------------- + # tidy the title + + # remove the episode if a series + if ( $mtitle =~ s/\s*\{[^\}]+\}//o ) { #redundant + # $attr=$1; + next; # skip tv episodes (we only output main titles so don't store episode data against the main title) + } + + # remove 'aka' details from prog-title + if ( $mtitle =~ s/\s*\((?:aka|as) ([^\)]+)\)//o ) { + # $attr=$1; + } + + # remove prog type (e.g. "(V)" or "(TV)" ) + # no: don't strip from title - it's considered part of the title: so we need it for matching against movies.list + ##if ( $mtitle =~ s/\s(\((TV|V|VG)\))//o ) { + # $attrs=$1; + ##} + + # junk everything after " (" (e.g. " (collaborating director)" ) + if ( $mtitle =~ s/ (\(.*)$//o ) { + # $attrs=$1; + } + + $mtitle =~ s/^\s+|\s+$//g; # trim + + + #------------------------------------------------------- + # $mtitle should now contain the programme's title + my $title = $mtitle; + + # find the IDX id from the hash of titles ($title=>$lineno) created in stage 1 + my $idxid = $self->{titleshash}{$title}; + + if (!$idxid ) { + ## no, don't print errors where we can't match the incoming title - there are 100s of these in the incoming data + ## often where the year on the actor record is 1 year out + ## people will get worried if we report over 1000 errors and there's nothing we can sensibly do about them + ##$self->error("$file:$lineCount: cannot find $title in titles list"); + ### if we reinstate this test then we'd need to allow for 'moviesonly' option (i.e. a lot of titles will have been deliberately excluded) + next; + } + + + #------------------------------------------------------- + # the output ".data" files must be sorted by id so they can be merged in stage final + # so we need to store all the incoming records and then sort them + # + my $mperson = ''; + $mperson = "$billing:" if ( defined($billing) ); + $mperson .= $cur_name; + $mperson .= " [$hostnarrator]" if ( defined($hostnarrator) ); # this is wrong: incoming data are "lastname, firstname" so this creates "Huwyler, Fabio [Host]" + + if ($self->{usefilesort}) { + # write the extracted imdb data to a temporary file, preceeded by the IDX id for each record + my $k = sprintf("%07d", $idxid); + print {$self->{fhdata}} $k.':'.$mperson."\n"; + + } else { + my $h = "stage${stage}hash"; + if (defined( $self->{$h}{$idxid} )) { + $self->{$h}{$idxid} .= "|".$mperson; + } else { + $self->{$h}{$idxid} = $mperson; + } + } + + + $self->updateProgressBar('', $lineCount); + } + else { + $self->error("$file:$lineCount: unrecognized format (missing tab)"); + $self->updateProgressBar('', $lineCount); + } + } + + $self->endProgressBar(); + + $self->status(sprintf("parsing $which found ".withThousands($countnames)." names, ". withThousands($count)." titles in ".withThousands($lineCount)." lines in %d seconds",time()-$startTime)); - closeMaybeGunzip($file, $fh); + closeMaybeGunzip($file, $fh); - return($castNames); + #----------------------------------------------------------- + return($count); } -sub readRatings($$$$) +sub readGenres($$$$$) { - my ($self, $countEstimate, $file)=@_; - my $startTime=time(); - my $lineCount=0; - - my $fh = openMaybeGunzip($file) || return(-2); - while(<$fh>) { - $lineCount++; - if ( m/^MOVIE RATINGS REPORT/o ) { - if ( !($_=<$fh>) || !m/^\s*$/o) { - $self->error("missing empty line after \"MOVIE RATINGS REPORT\" at line $lineCount"); - closeMaybeGunzip($file, $fh); - return(-1); - } - if ( !($_=<$fh>) || !m/^New Distribution Votes Rank Title/o ) { - $self->error("missing \"New Distribution Votes Rank Title\" at line $lineCount"); - closeMaybeGunzip($file, $fh); - return(-1); - } - last; - } - elsif ( $lineCount > 1000 ) { - $self->error("$file: stopping at line $lineCount, didn't see \"MOVIE RATINGS REPORT\" line"); - closeMaybeGunzip($file, $fh); - return(-1); - } - } - - my $progress=Term::ProgressBar->new({name => "parsing Ratings", - count => $countEstimate, - ETA => 'linear'}) - if ($self->{showProgressBar}); - - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - - my $count=0; - while(<$fh>) { - $lineCount++; - my $line=$_; - #print "read line $lineCount:$line"; - - $line=~s/\n$//o; - - # skip empty lines (only really appear right before last line ending with ---- - next if ( $line=~m/^\s*$/o ); - # end is line consisting of only '-' - last if ( $line=~m/^\-\-\-\-\-\-\-+/o ); - - # e.g. New Distribution Votes Rank Title - # 0000000133 225568 8.9 12 Angry Men (1957) - if ( $line=~s/^\s+([\.|\*|\d]+)\s+(\d+)\s+(\d+)\.(\d+)\s+//o ) { - $self->{movies}{$line}=[$1,$2,"$3.$4"]; - $count++; - if ( $self->{showProgressBar} ) { - # re-adjust target so progress bar doesn't seem too wonky - if ( $count > $countEstimate ) { - $countEstimate = $progress->target($count+1000); - $next_update=$progress->update($count); + my ($self, $which, $countEstimate, $file, $stage)=@_; + my $startTime=time(); + my $header; + my $whatAreWeParsing; + my $lineCount=0; + + if ( $which eq "Genres" ) { + $header="8: THE GENRES LIST"; + $whatAreWeParsing=1; + } + + $self->beginProgressBar('parsing '.$which, $countEstimate); + + + #----------------------------------------------------------- + # find the start of the actual data + + my $fh = openMaybeGunzip($file) || return(-2); + while(<$fh>) { + chomp(); + $lineCount++; + if ( m/^$header/ ) { + if ( !($_=<$fh>) || !m/^===========/o ) { + $self->error("missing ======= after $header at line $lineCount"); + closeMaybeGunzip($file, $fh); + return(-1); + } + if ( !($_=<$fh>) || !m/^\s*$/o ) { + $self->error("missing empty line after ======= at line $lineCount"); + closeMaybeGunzip($file, $fh); + return(-1); + } + last; } - elsif ( $count > $next_update ) { - $next_update=$progress->update($count); + elsif ( $lineCount > 1000 ) { + $self->error("$file: stopping at line $lineCount, didn't see \"$header\" line"); + closeMaybeGunzip($file, $fh); + return(-1); } - } } - else { - $self->error("$file:$lineCount: unrecognized format"); - $next_update=$progress->update($count) if ($self->{showProgressBar}); + + + #----------------------------------------------------------- + # read the genres data, and create the stagex.data file + # input data are "film-title genre" separated by one or more tabs + # multiple genres are searated by | + # Army of Darkness (1992) Horror + # King Jeff (2009) Comedy|Short + + my $count=0; + while(<$fh>) { + chomp(); + $lineCount++; + my $line=$_; + next if ( length($line) == 0 ); + last if ( $self->{sample} != 0 && $self->{sample} < $lineCount ); # undocumented option (used in debugging) + #$self->status("read line $lineCount:$line"); + + # end is line consisting of only '-' + last if ( $line=~m/^\-\-\-\-\-\-\-+/o ); + + my $tabstop = index($line, "\t"); # there is always at least one tabstop in the incoming data + if ( $tabstop != -1 ) { + my ($mtitle, $mgenres) = $line =~ m/^(.*?)\t+(.*)$/; # get film-title (everything up to the first tab) + + next if ($mtitle=~m/\s*\{\{SUSPENDED\}\}/o); + + # skip enties that have {} in them since they're tv episodes + next if ($mtitle=~m/\s*\{[^\}]+\}/ ); + + # skip "video games" + next if ($mtitle=~m/\s+\(VG\)$/ ); + + # returned count is number of titles found + $count++; + + if ( $whatAreWeParsing == 1 ) { # genres + + # genres sometimes contains tabs + $mgenres=~s/^\t+//og; + + } + + + #------------------------------------------------------- + # tidy the title + + # remove the episode if a series + if ( $mtitle =~ s/\s*\{[^\}]+\}//o ) { #redundant + # $attr=$1; + } + + # remove 'aka' details from prog-title + if ( $mtitle =~ s/\s*\((?:aka|as) ([^\)]+)\)//o ) { + # $attr=$1; + } + + $mtitle =~ s/^\s+|\s+$//g; # trim + + + #------------------------------------------------------- + # $mtitle should now contain the programme's title + my $title = $mtitle; + + # find the IDX id from the hash of titles ($title=>$lineno) created in stage 1 + my $idxid = $self->{titleshash}{$title}; + + if (!$idxid ) { + ## no, don't print errors where we can't match the incoming title - there are 100s of these in the incoming data + ## often where the year on the actor record is 1 year out + ##$self->error("$file:$lineCount: cannot find $title in titles list"); + next; + } + + + #------------------------------------------------------- + # the output ".data" files must be sorted by id so they can be merged in stage final + # so we need to store all the incoming records and then sort them + # + if ($self->{usefilesort}) { + # write the extracted imdb data to a temporary file, preceeded by the IDX id for each record + my $k = sprintf("%07d", $idxid); + print {$self->{fhdata}} $k.':'.$mgenres."\n"; + + } else { + my $h = "stage${stage}hash"; + if (defined( $self->{$h}{$idxid} )) { + $self->{$h}{$idxid} .= "|".$mgenres; + } else { + $self->{$h}{$idxid} = $mgenres; + } + } + + + $self->updateProgressBar('', $lineCount); + } + else { + $self->error("$file:$lineCount: unrecognized format (missing tab)"); + $self->updateProgressBar('', $lineCount); + } + } + + $self->endProgressBar(); + + $self->status(sprintf("parsing $which found ".withThousands($count)." titles in ". + withThousands($lineCount)." lines in %d seconds",time()-$startTime)); + + closeMaybeGunzip($file, $fh); + + #----------------------------------------------------------- + return($count); +} + +sub readRatings($$$$$) +{ + my ($self, $which, $countEstimate, $file, $stage)=@_; + my $startTime=time(); + my $header; + my $whatAreWeParsing; + my $lineCount=0; + + if ( $which eq "Ratings" ) { + $header="MOVIE RATINGS REPORT"; + $whatAreWeParsing=1; + } + + $self->beginProgressBar('parsing '.$which, $countEstimate); + + + #----------------------------------------------------------- + # find the start of the actual data + + my $fh = openMaybeGunzip($file) || return(-2); + while(<$fh>) { + chomp(); + $lineCount++; + if ( m/^$header/ ) { + # there is no ====== in ratings data! + if ( !($_=<$fh>) || !m/^\s*$/o ) { + $self->error("missing empty line after $header at line $lineCount"); + closeMaybeGunzip($file, $fh); + return(-1); + } + if ( !($_=<$fh>) || !m/^New Distribution Votes Rank Title/o ) { + $self->error("missing \"New Distribution Votes Rank Title\" at line $lineCount"); + closeMaybeGunzip($file, $fh); + return(-1); + } + last; + } + elsif ( $lineCount > 1000 ) { + $self->error("$file: stopping at line $lineCount, didn't see \"$header\" line"); + closeMaybeGunzip($file, $fh); + return(-1); + } } - } - $progress->update($countEstimate) if ($self->{showProgressBar}); - $self->status(sprintf("parsing Ratings found ".withThousands($count)." titles in ". + + #----------------------------------------------------------- + # read the ratings data, and create the stagex.data file + # input data are "flag-new disribution votes rank film-title" separated by one or more spaces + # 0000002211 000001 9.9 Army of Darkness (1992) + # 0000000133 225568 8.9 12 Angry Men (1957) + + my $count=0; + while(<$fh>) { + chomp(); + $lineCount++; + my $line=$_; + next if ( length($line) == 0 ); + last if ( $self->{sample} != 0 && $self->{sample} < $lineCount ); # undocumented option (used in debugging) + #$self->status("read line $lineCount:$line"); + + # skip empty lines (only really appear right before last line ending with ---- + next if ( $line=~m/^\s*$/o ); + # end is line consisting of only '-' + last if ( $line=~m/^\-\-\-\-\-\-\-+/o ); + + my $tabstop = index($line, " "); # there is always at least one space in the incoming data + if ( $tabstop != -1 ) { + my ($mdistrib, $mvotes, $mrank, $mtitle) = $line =~ m/^\s+([\.|\*|\d]+)\s+(\d+)\s+(\d+\.\d+)\s+(.*)$/; + + next if ($mtitle=~m/\s*\{\{SUSPENDED\}\}/o); + + next if ($mtitle=~m/\s*\{[^\}]+\}/ ); # skip tv episodes + + next if ($mtitle=~m/\s+\(VG\)$/ ); # we don't want "video games" + + # returned count is number of titles found + $count++; + + if ( $whatAreWeParsing == 1 ) { # ratings + # null + } + + + #------------------------------------------------------- + # tidy the title + + # remove the episode if a series + if ( $mtitle =~ s/\s*\{[^\}]+\}//o ) { #redundant + # $attr=$1; + } + + # remove 'aka' details from prog-title + if ( $mtitle =~ s/\s*\((?:aka|as) ([^\)]+)\)//o ) { + # $attr=$1; + } + + $mtitle =~ s/^\s+|\s+$//g; # trim + + + #------------------------------------------------------- + # $mtitle should now contain the programme's title + my $title = $mtitle; + + # find the IDX id from the hash of titles ($title=>$lineno) created in stage 1 + my $idxid = $self->{titleshash}{$title}; + + if (!$idxid ) { + ## no, don't print errors where we can't match the incoming title - there are 100s of these in the incoming data + ## often where the year on the actor record is 1 year out + ##$self->error("$file:$lineCount: cannot find $title in titles list"); + next; + } + + + #------------------------------------------------------- + # the output ".data" files must be sorted by id so they can be merged in stage final + # so we need to store all the incoming records and then sort them + # + if ($self->{usefilesort}) { + # write the extracted imdb data to a temporary file, preceeded by the IDX id for each record + my $k = sprintf("%07d", $idxid); + print {$self->{fhdata}} $k.':'."$mdistrib;$mvotes;$mrank"."\n"; + + } else { + my $h = "stage${stage}hash"; + if (defined( $self->{$h}{$idxid} )) { + # we shouldn't get duplicates + $self->error("$file: duplicate film found at line $lineCount - this rating will be ignored $mtitle"); + } else { + $self->{$h}{$idxid} = "$mdistrib;$mvotes;$mrank"; + } + } + + + $self->updateProgressBar('', $lineCount); + } + else { + $self->error("$file:$lineCount: unrecognized format (missing tab)"); + $self->updateProgressBar('', $lineCount); + } + } + + $self->endProgressBar(); + + $self->status(sprintf("parsing $which found ".withThousands($count)." titles in ". withThousands($lineCount)." lines in %d seconds",time()-$startTime)); - closeMaybeGunzip($file, $fh); - return($count); + closeMaybeGunzip($file, $fh); + + #----------------------------------------------------------- + return($count); } -sub readKeywords($$$$) +sub readKeywords($$$$$) { - my ($self, $countEstimate, $file)=@_; - my $startTime=time(); - my $lineCount=0; - - my $fh = openMaybeGunzip($file) || return(-2); - while(<$fh>) { - $lineCount++; - - if ( m/THE KEYWORDS LIST/ ) { - if ( !($_=<$fh>) || !m/^===========/o ) { - $self->error("missing ======= after \"THE KEYWORDS LIST\" at line $lineCount"); - closeMaybeGunzip($file, $fh); - return(-1); - } - if ( !($_=<$fh>) || !m/^\s*$/o ) { - $self->error("missing empty line after ======= at line $lineCount"); - closeMaybeGunzip($file, $fh); - return(-1); - } - last; - } - elsif ( $lineCount > 100000 ) { - $self->error("$file: stopping at line $lineCount, didn't see \"THE KEYWORDS LIST\" line"); - closeMaybeGunzip($file, $fh); - return(-1); - } - } - - my $progress=Term::ProgressBar->new({name => "parsing keywords", - count => $countEstimate, - ETA => 'linear'}) - if ($self->{showProgressBar}); - - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - - my $count=0; - while(<$fh>) { - $lineCount++; - my $line=$_; - chomp($line); - next if ($line =~ m/^\s*$/); - my ($title, $keyword) = ($line =~ m/^(.*)\s+(\S+)\s*$/); - if ( defined($title) and defined($keyword) ) { - - my ($episode) = $title =~ m/^.*\s+(\{.*\})$/; - - # ignore anything which is an episode (e.g. "{Doctor Who (#10.22)}" ) - if ( !defined $episode || $episode eq '' ) - { - if ( defined($self->{movies}{$title}) ) { - $self->{movies}{$title}.=",".$keyword; - } else { - $self->{movies}{$title}=$keyword; - # returned count is number of unique titles found - $count++; - } - } - - if ( $self->{showProgressBar} ) { - # re-adjust target so progress bar doesn't seem too wonky - if ( $count > $countEstimate ) { - $countEstimate = $progress->target($count+1000); - $next_update=$progress->update($count); - } - elsif ( $count > $next_update ) { - $next_update=$progress->update($count); - } - } - } else { - $self->error("$file:$lineCount: unrecognized format \"$line\""); - $next_update=$progress->update($count) if ($self->{showProgressBar}); + my ($self, $which, $countEstimate, $file, $stage)=@_; + my $startTime=time(); + my $header; + my $whatAreWeParsing; + my $lineCount=0; + + if ( $which eq "Keywords" ) { + $header="8: THE KEYWORDS LIST"; + $whatAreWeParsing=1; + } + + $self->beginProgressBar('parsing '.$which, $countEstimate); + + + #----------------------------------------------------------- + # find the start of the actual data + + my $fh = openMaybeGunzip($file) || return(-2); + while(<$fh>) { + chomp(); + $lineCount++; + if ( m/^$header/ ) { + if ( !($_=<$fh>) || !m/^===========/o ) { + $self->error("missing ======= after $header at line $lineCount"); + closeMaybeGunzip($file, $fh); + return(-1); + } + if ( !($_=<$fh>) || !m/^\s*$/o ) { + $self->error("missing empty line after ======= at line $lineCount"); + closeMaybeGunzip($file, $fh); + return(-1); + } + last; + } + elsif ( $lineCount > 150000 ) { # line 101935 as at 2020-12-23 + $self->error("$file: stopping at line $lineCount, didn't see \"$header\" line"); + closeMaybeGunzip($file, $fh); + return(-1); + } } - } - $progress->update($countEstimate) if ($self->{showProgressBar}); - $self->status(sprintf("parsing Keywords found ".withThousands($count)." titles in ". + + #----------------------------------------------------------- + # read the keywords data, and create the stagex.data file + # input data are "film-title keyword" separated by one or more tabs + # multiple keywords are searated by | + # Army of Darkness (1992) Horror + # King Jeff (2009) Comedy|Short + + my $count=0; + while(<$fh>) { + chomp(); + $lineCount++; + my $line=$_; + next if ( length($line) == 0 ); + last if ( $self->{sample} != 0 && $self->{sample} < $lineCount ); # undocumented option (used in debugging) + #$self->status("read line $lineCount:$line"); + + # end is line consisting of only '-' + last if ( $line=~m/^\-\-\-\-\-\-\-+/o ); + + my $tabstop = index($line, "\t"); # there is always at least one tabstop in the incoming data + if ( $tabstop != -1 ) { + my ($mtitle, $mkeywords) = $line =~ m/^(.*?)\t+(.*)$/; # get film-title (everything up to the first tab) + + next if ($mtitle=~m/\s*\{\{SUSPENDED\}\}/o); + + next if ($mtitle=~m/\s*\{[^\}]+\}/ ); # skip tv episodes + + next if ($mtitle=~m/\s+\(VG\)$/ ); # we don't want "video games" + + # returned count is number of titles found + $count++; + + if ( $whatAreWeParsing == 1 ) { # genres + + # ignore anything which is an episode (e.g. "{Doctor Who (#10.22)}" ) + next if $mtitle =~ m/^.*\s+(\{.*\})$/; + + } + + + #------------------------------------------------------- + # tidy the title + + # remove the episode if a series + # [honir] this is wrong - this puts all the keywords as though they are in the entire series! + if ( $mtitle =~ s/\s*\{[^\}]+\}//o ) { #redundant + # $attr=$1; + } + + # remove 'aka' details from prog-title + if ( $mtitle =~ s/\s*\((?:aka|as) ([^\)]+)\)//o ) { + # $attr=$1; + } + + $mtitle =~ s/^\s+|\s+$//g; # trim + + + #------------------------------------------------------- + # $mtitle should now contain the programme's title + my $title = $mtitle; + + # find the IDX id from the hash of titles ($title=>$lineno) created in stage 1 + my $idxid = $self->{titleshash}{$title}; + + if (!$idxid ) { + ## no, don't print errors where we can't match the incoming title - there are 100s of these in the incoming data + ## often where the year on the actor record is 1 year out + ##$self->error("$file:$lineCount: cannot find $title in titles list"); + next; + } + + + #------------------------------------------------------- + # the output ".data" files must be sorted by id so they can be merged in stage final + # so we need to store all the incoming records and then sort them + # + if ($self->{usefilesort}) { + # write the extracted imdb data to a temporary file, preceeded by the IDX id for each record + my $k = sprintf("%07d", $idxid); + print {$self->{fhdata}} $k.':'.$mkeywords."\n"; + + } else { + my $h = "stage${stage}hash"; + if (defined( $self->{$h}{$idxid} )) { + $self->{$h}{$idxid} .= "|".$mkeywords; + } else { + $self->{$h}{$idxid} = $mkeywords; + } + } + + + $self->updateProgressBar('', $lineCount); + } + else { + $self->error("$file:$lineCount: unrecognized format (missing tab)"); + $self->updateProgressBar('', $lineCount); + } + } + + $self->endProgressBar(); + + $self->status(sprintf("parsing $which found ".withThousands($count)." titles in ". withThousands($lineCount)." lines in %d seconds",time()-$startTime)); - closeMaybeGunzip($file, $fh); - return($count); + closeMaybeGunzip($file, $fh); + + #----------------------------------------------------------- + return($count); } -sub readPlots($$$$) +sub readPlots($$$$$) { - my ($self, $countEstimate, $file)=@_; - my $startTime=time(); - my $lineCount=0; - - my $fh = openMaybeGunzip($file) || return(-2); - while(<$fh>) { - $lineCount++; - - if ( m/PLOT SUMMARIES LIST/ ) { - if ( !($_=<$fh>) || !m/^===========/o ) { - $self->error("missing ======= after \"PLOT SUMMARIES LIST\" at line $lineCount"); - closeMaybeGunzip($file, $fh); - return(-1); - } - if ( !($_=<$fh>) || !m/^-----------/o ) { - $self->error("missing ------- line after ======= at line $lineCount"); - closeMaybeGunzip($file, $fh); - return(-1); - } - last; - } - elsif ( $lineCount > 500 ) { - $self->error("$file: stopping at line $lineCount, didn't see \"PLOT SUMMARIES LIST\" line"); - closeMaybeGunzip($file, $fh); - return(-1); - } - } - - my $progress=Term::ProgressBar->new({name => "parsing plots", - count => $countEstimate, - ETA => 'linear'}) - if ($self->{showProgressBar}); - - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - - my $count=0; - while(<$fh>) { - $lineCount++; - my $line=$_; - chomp($line); - next if ($line =~ m/^\s*$/); - my ($title, $episode) = ($line =~ m/^MV:\s(.*?)\s?(\{.*\})?$/); - if ( defined($title) ) { - - # ignore anything which is an episode (e.g. "{Doctor Who (#10.22)}" ) - if ( !defined $episode || $episode eq '' ) - { - my $plot = ''; - LOOP: - while (1) { - if ( $line = <$fh> ) { - $lineCount++; - chomp($line); - next if ($line =~ m/^\s*$/); - if ( $line =~ m/PL:\s(.*)$/ ) { # plot summary is a number of lines starting "PL:" - $plot .= ($plot ne ''?' ':'') . $1; - } - last LOOP if ( $line =~ m/BY:\s(.*)$/ ); # the author line "BY:" signals the end of the plot summary - } else { - last LOOP; - } - } - - if ( !defined($self->{movies}{$title}) ) { - # ensure there's no tab chars in the plot or else the db stage will barf - $plot =~ s/\t//og; - $self->{movies}{$title}=$plot; - # returned count is number of unique titles found - $count++; - } - } - - if ( $self->{showProgressBar} ) { - # re-adjust target so progress bar doesn't seem too wonky - if ( $count > $countEstimate ) { - $countEstimate = $progress->target($count+1000); - $next_update=$progress->update($count); - } - elsif ( $count > $next_update ) { - $next_update=$progress->update($count); - } - } - } else { - # skip lines up to the next "MV:" - if ($line !~ m/^(---|PL:|BY:)/ ) { - $self->error("$file:$lineCount: unrecognized format \"$line\""); - } - $next_update=$progress->update($count) if ($self->{showProgressBar}); + my ($self, $which, $countEstimate, $file, $stage)=@_; + my $startTime=time(); + my $header; + my $whatAreWeParsing; + my $lineCount=0; + + if ( $which eq "Plot" ) { + $header="PLOT SUMMARIES LIST"; + $whatAreWeParsing=1; } - } - $progress->update($countEstimate) if ($self->{showProgressBar}); - $self->status(sprintf("parsing Plots found $count ".withThousands($count)." in ". + $self->beginProgressBar('parsing '.$which, $countEstimate); + + + #----------------------------------------------------------- + # find the start of the actual data + + my $fh = openMaybeGunzip($file) || return(-2); + while(<$fh>) { + chomp(); + $lineCount++; + if ( m/^$header/ ) { + if ( !($_=<$fh>) || !m/^===========/o ) { + $self->error("missing ======= after $header at line $lineCount"); + closeMaybeGunzip($file, $fh); + return(-1); + } + # no blank line in plot data! + ##if ( !($_=<$fh>) || !m/^\s*$/o ) { + ## $self->error("missing empty line after ======= at line $lineCount"); + ## closeMaybeGunzip($file, $fh); + ## return(-1); + ##} + last; + } + elsif ( $lineCount > 1000 ) { + $self->error("$file: stopping at line $lineCount, didn't see \"$header\" line"); + closeMaybeGunzip($file, $fh); + return(-1); + } + } + + + #----------------------------------------------------------- + # read the plot data, and create the stagex.data file + # input data are "flag-new disribution votes rank film-title" separated by one or more spaces + # there can be multiple entries for each film + # ------------------------------------------------------------------------------- + # MV: Army of Darkness (1992) + # + # PL: Ash is transported with his car to 1,300 A.D., where he is captured by Lord + # PL: Arthur and turned slave with Duke Henry the Red and a couple of his men. + # [...] + # PL: battle between Ash's 20th Century tactics and the minions of darkness. + # + # BY: David Thiel + # + # PL: Ash finds himself stranded in the year 1300 AD with his car, his shotgun, + # PL: and his chainsaw. Soon he is discovered and thought to be a spy for a rival + # [...] + # PL: forces at play in the land. Ash accidentally releases the Army of Darkness + # PL: when retrieving the book, and a fight to the finish ensues. + # + # BY: Ed Sutton + + my $count=0; + while(<$fh>) { + chomp(); + $lineCount++; + my $line=$_; + next if ( length($line) == 0 ); + last if ( $self->{sample} != 0 && $self->{sample} < $lineCount ); # undocumented option (used in debugging) + #$self->status("read line $lineCount:$line"); + + # skip empty lines + next if ( $line=~m/^\s*$/o ); + + next if ( $line=~m/\s*\{[^\}]+\}/ ); # skip tv episodes + + next if ( $line=~m/\s+\(VG\)$/ ); # skip "video games" + + # process a data block - starts with "MV:" + # + my ($mtitle, $mepisode) = ($line =~ m/^MV:\s(.*?)\s?(\{.*\})?$/); + if ( defined($mtitle) ) { + my $mplot = ''; + + # ignore anything which is an episode (e.g. "{Doctor Who (#10.22)}" ) + if ( !defined $mepisode || $mepisode eq '' ) + { + LOOP: + while (1) { + if ( $line = <$fh> ) { + $lineCount++; + chomp($line); + next if ($line =~ m/^\s*$/); + if ( $line =~ m/PL:\s(.*)$/ ) { # plot summary is a number of lines starting "PL:" + $mplot .= ($mplot ne ''?' ':'') . $1; + } + last LOOP if ( $line =~ m/BY:\s(.*)$/ ); # the author line "BY:" signals the end of the plot summary + } else { + last LOOP; + } + } + + # ensure there's no tab chars in the plot or else the db stage will barf + $mplot =~ s/\t//og; + + # returned count is number of unique titles found + $count++; + } + + + #------------------------------------------------------- + # tidy the title + + # remove the episode if a series + if ( $mtitle =~ s/\s*\{[^\}]+\}//o ) { #redundant + # $attr=$1; + } + + # remove 'aka' details from prog-title + if ( $mtitle =~ s/\s*\((?:aka|as) ([^\)]+)\)//o ) { + # $attr=$1; + } + + $mtitle =~ s/^\s+|\s+$//g; # trim + + + #------------------------------------------------------- + # $mtitle should now contain the programme's title + my $title = $mtitle; + + # find the IDX id from the hash of titles ($title=>$lineno) created in stage 1 + my $idxid = $self->{titleshash}{$title}; + + if (!$idxid ) { + ## no, don't print errors where we can't match the incoming title - there are 100s of these in the incoming data + ## often where the year on the actor record is 1 year out + ##$self->error("$file:$lineCount: cannot find $title in titles list"); + next; + } + + + #------------------------------------------------------- + # the output ".data" files must be sorted by id so they can be merged in stage final + # so we need to store all the incoming records and then sort them + # + if ($self->{usefilesort}) { + # write the extracted imdb data to a temporary file, preceeded by the IDX id for each record + my $k = sprintf("%07d", $idxid); + print {$self->{fhdata}} $k.':'.$mplot."\n"; + + } else { + my $h = "stage${stage}hash"; + if (defined( $self->{$h}{$idxid} )) { + # we shouldn't get duplicates + $self->error("$file: duplicate film found at line $lineCount - this plot will be ignored $mtitle"); + } else { + $self->{$h}{$idxid} = $mplot; + } + } + + + $self->updateProgressBar('', $lineCount); + } + else { + # skip lines up to the next "MV:" (this means we only get the first plot summary for each film) + if ($line !~ m/^(---|PL:|BY:)/ ) { + $self->error("$file:$lineCount: unrecognized format \"$line\""); + } + $self->updateProgressBar('', $lineCount); + } + } + + $self->endProgressBar(); + + $self->status(sprintf("parsing $which found ".withThousands($count)." in ". withThousands($lineCount)." lines in %d seconds",time()-$startTime)); - closeMaybeGunzip($file, $fh); - return($count); + closeMaybeGunzip($file, $fh); + + #----------------------------------------------------------- + return($count); } sub stageComplete($) { - my ($self, $stage)=@_; + my ($self, $stage)=@_; - if ( -f "$self->{imdbDir}/stage$stage.data" ) { - return(1); - } - return(0); + if ( -f "$self->{imdbDir}/stage$stage.data" ) { + return(1); + } + return(0); } sub dbinfoLoad($) { - my $self=shift; + my $self=shift; - my $ret=XMLTV::IMDB::loadDBInfo($self->{moviedbInfo}); - if ( ref $ret eq 'SCALAR' ) { - return($ret); - } - $self->{dbinfo}=$ret; - return(undef); + my $ret=XMLTV::IMDB::loadDBInfo($self->{moviedbInfo}); + if ( ref $ret eq 'SCALAR' ) { + return($ret); + } + $self->{dbinfo}=$ret; + return(undef); } sub dbinfoAdd($$$) { - my ($self, $key, $value)=@_; - $self->{dbinfo}->{$key}=$value; + my ($self, $key, $value)=@_; + $self->{dbinfo}->{$key}=$value; } sub dbinfoGet($$$) { - my ($self, $key, $defaultValue)=@_; - if ( defined($self->{dbinfo}->{$key}) ) { - return($self->{dbinfo}->{$key}); - } - return($defaultValue); + my ($self, $key, $defaultValue)=@_; + if ( defined($self->{dbinfo}->{$key}) ) { + return($self->{dbinfo}->{$key}); + } + return($defaultValue); } sub dbinfoSave($) { - my $self=shift; - open(INFO, "> $self->{moviedbInfo}") || return(1); - for (sort keys %{$self->{dbinfo}}) { - print INFO "".$_.":".$self->{dbinfo}->{$_}."\n"; - } - close(INFO); - return(0); + my $self=shift; + open(INFO, "> $self->{moviedbInfo}") || return(1); + for (sort keys %{$self->{dbinfo}}) { + print INFO "".$_.":".$self->{dbinfo}->{$_}."\n"; + } + close(INFO); + return(0); } sub dbinfoGetFileSize($$) { - my ($self, $key)=@_; + my ($self, $key)=@_; - if ( !defined($self->{imdbListFiles}->{$key}) ) { - die ("invalid call"); - } - my $fileSize=int(-s "$self->{imdbListFiles}->{$key}"); - - # if compressed, then attempt to run gzip -l - if ( $self->{imdbListFiles}->{$key}=~m/.gz$/) { - if ( open(my $fd, "gzip -l ".$self->{imdbListFiles}->{$key}."|") ) { - # if parse fails, then defalt to wild ass guess of compression of 65% - $fileSize=int(($fileSize*100)/(100-65)); - - while(<$fd>) { - if ( m/^\s*\d+\s+(\d+)/ ) { - $fileSize=$1; - } - } - close($fd); + if ( !defined($self->{imdbListFiles}->{$key}) ) { + die ("invalid call"); } - else { - # wild ass guess of compression of 65% - $fileSize=int(($fileSize*100)/(100-65)); + my $fileSize=int(-s "$self->{imdbListFiles}->{$key}"); + + # if compressed, then attempt to run gzip -l + if ( $self->{imdbListFiles}->{$key}=~m/.gz$/) { + if ( open(my $fd, "gzip -l ".$self->{imdbListFiles}->{$key}."|") ) { + # if parse fails, then defalt to wild ass guess of compression of 65% + $fileSize=int(($fileSize*100)/(100-65)); + + while(<$fd>) { + if ( m/^\s*\d+\s+(\d+)/ ) { + $fileSize=$1; + } + } + close($fd); + } + else { + # wild ass guess of compression of 65% + $fileSize=int(($fileSize*100)/(100-65)); + } } - } - return($fileSize); + return($fileSize); } sub dbinfoCalcEstimate($$$) { - my ($self, $key, $estimateSizePerEntry)=@_; + my ($self, $key, $estimateSizePerEntry)=@_; - my $fileSize=$self->dbinfoGetFileSize($key); + my $fileSize=$self->dbinfoGetFileSize($key); - my $countEstimate=int($fileSize/$estimateSizePerEntry); + my $countEstimate=int($fileSize/$estimateSizePerEntry); - $self->dbinfoAdd($key."_list_file", $self->{imdbListFiles}->{$key}); - $self->dbinfoAdd($key."_list_file_size", int(-s "$self->{imdbListFiles}->{$key}")); - $self->dbinfoAdd($key."_list_file_size_uncompressed", $fileSize); - $self->dbinfoAdd($key."_list_count_estimate", $countEstimate); - return($countEstimate); + $self->dbinfoAdd($key."_list_file", $self->{imdbListFiles}->{$key}); + $self->dbinfoAdd($key."_list_file_size", int(-s "$self->{imdbListFiles}->{$key}")); + $self->dbinfoAdd($key."_list_file_size_uncompressed", $fileSize); + $self->dbinfoAdd($key."_list_count_estimate", $countEstimate); + return($countEstimate); } sub dbinfoCalcBytesPerEntry($$$) { - my ($self, $key, $calcActualForThisNumber)=@_; + my ($self, $key, $calcActualForThisNumber)=@_; - my $fileSize=$self->dbinfoGetFileSize($key); + my $fileSize=$self->dbinfoGetFileSize($key); - return(int($fileSize/$calcActualForThisNumber)); + return(int($fileSize/$calcActualForThisNumber)); } -sub invokeStage($$) +sub gettitleshash($$) { - my ($self, $stage)=@_; - - my $startTime=time(); - if ( $stage == 1 ) { - $self->status("parsing Movies list for stage $stage.."); - my $countEstimate=$self->dbinfoCalcEstimate("movies", 47); - - my $num=$self->readMoviesOrGenres("Movies", $countEstimate, "$self->{imdbListFiles}->{movies}"); - if ( $num < 0 ) { - if ( $num == -2 ) { - $self->error("you need to download $self->{imdbListFiles}->{movies} from ftp.imdb.com"); - } - return(1); - } - elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) { - my $better=$self->dbinfoCalcBytesPerEntry("movies", $num); - $self->status("ARG estimate of $countEstimate for movies needs updating, found $num ($better bytes/entry)"); - } - $self->dbinfoAdd("db_stat_movie_count", "$num"); - - $self->status("writing stage1 data .."); - { - my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0); - my $progress=Term::ProgressBar->new({name => "writing titles", - count => $countEstimate, - ETA => 'linear'}) - if ($self->{showProgressBar}); - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - - open(OUT, "> $self->{imdbDir}/stage$stage.data") || die "$self->{imdbDir}/stage$stage.data:$!"; - my $count=0; - for my $movie (@{$self->{movies}}) { - print OUT "$movie\n"; - - $count++; - if ($self->{showProgressBar}) { - # re-adjust target so progress bar doesn't seem too wonky - if ( $count > $countEstimate ) { - $countEstimate = $progress->target($count+100); - $next_update=$progress->update($count); - } - elsif ( $count > $next_update ) { - $next_update=$progress->update($count); - } - } - } - $progress->update($countEstimate) if ($self->{showProgressBar}); - close(OUT); - delete($self->{movies}); - } - } - elsif ( $stage == 2 ) { - $self->status("parsing Directors list for stage $stage.."); - - my $countEstimate=$self->dbinfoCalcEstimate("directors", 258); - - my $num=$self->readCastOrDirectors("Directors", $countEstimate, "$self->{imdbListFiles}->{directors}"); - if ( $num < 0 ) { - if ( $num == -2 ) { - $self->error("you need to download $self->{imdbListFiles}->{directors} from ftp.imdb.com (see http://www.imdb.com/interfaces)"); - } - return(1); - } - elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) { - my $better=$self->dbinfoCalcBytesPerEntry("directors", $num); - $self->status("ARG estimate of $countEstimate for directors needs updating, found $num ($better bytes/entry)"); - } - $self->dbinfoAdd("db_stat_director_count", "$num"); - - $self->status("writing stage2 data .."); - { - my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0); - my $progress=Term::ProgressBar->new({name => "writing directors", - count => $countEstimate, - ETA => 'linear'}) - if ($self->{showProgressBar}); - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - - my $count=0; - open(OUT, "> $self->{imdbDir}/stage$stage.data") || die "$self->{imdbDir}/stage$stage.data:$!"; - for my $key (keys %{$self->{movies}}) { - my %dir; - for (split('\|', $self->{movies}{$key})) { - $dir{$_}++; - } - my @list; - for (keys %dir) { - push(@list, sprintf("%03d:%s", $dir{$_}, $_)); - } - my $value=""; - for my $c (reverse sort {$a cmp $b} @list) { - my ($num, $name)=split(':', $c); - $value.=$name."|"; - } - $value=~s/\|$//o; - print OUT "$key\t$value\n"; - - $count++; - if ($self->{showProgressBar}) { - # re-adjust target so progress bar doesn't seem too wonky - if ( $count > $countEstimate ) { - $countEstimate = $progress->target($count+100); - $next_update=$progress->update($count); - } - elsif ( $count > $next_update ) { - $next_update=$progress->update($count); - } - } - } - $progress->update($countEstimate) if ($self->{showProgressBar}); - close(OUT); - delete($self->{movies}); - } - #unlink("$self->{imdbDir}/stage1.data"); - } - elsif ( $stage == 3 ) { - $self->status("parsing Actors list for stage $stage.."); - - #print "re-reading movies into memory for reverse lookup..\n"; - my $countEstimate=$self->dbinfoCalcEstimate("actors", 449); - - my $num=$self->readCastOrDirectors("Actors", $countEstimate, "$self->{imdbListFiles}->{actors}"); - if ( $num < 0 ) { - if ( $num == -2 ) { - $self->error("you need to download $self->{imdbListFiles}->{actors} from ftp.imdb.com (see http://www.imdb.com/interfaces)"); - } - return(1); - } - elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) { - my $better=$self->dbinfoCalcBytesPerEntry("actors", $num); - $self->status("ARG estimate of $countEstimate for actors needs updating, found $num ($better bytes/entry)"); - } - $self->dbinfoAdd("db_stat_actor_count", "$num"); - - $self->status("writing stage3 data .."); - { - my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0); - my $progress=Term::ProgressBar->new({name => "writing actors", - count => $countEstimate, - ETA => 'linear'}) - if ($self->{showProgressBar}); - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - - my $count=0; - open(OUT, "> $self->{imdbDir}/stage$stage.data") || die "$self->{imdbDir}/stage$stage.data:$!"; - for my $key (keys %{$self->{movies}}) { - print OUT "$key\t$self->{movies}{$key}\n"; - - $count++; - if ($self->{showProgressBar}) { - # re-adjust target so progress bar doesn't seem too wonky - if ( $count > $countEstimate ) { - $countEstimate = $progress->target($count+100); - $next_update=$progress->update($count); - } - elsif ( $count > $next_update ) { - $next_update=$progress->update($count); - } - } - } - $progress->update($countEstimate) if ($self->{showProgressBar}); - close(OUT); - delete($self->{movies}); - } - } - elsif ( $stage == 4 ) { - $self->status("parsing Actresses list for stage $stage.."); - - my $countEstimate=$self->dbinfoCalcEstimate("actresses", 483); - my $num=$self->readCastOrDirectors("Actresses", $countEstimate, "$self->{imdbListFiles}->{actresses}"); - if ( $num < 0 ) { - if ( $num == -2 ) { - $self->error("you need to download $self->{imdbListFiles}->{actresses} from ftp.imdb.com (see http://www.imdb.com/interfaces)"); - } - return(1); - } - elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) { - my $better=$self->dbinfoCalcBytesPerEntry("actresses", $num); - $self->status("ARG estimate of $countEstimate for actresses needs updating, found $num ($better bytes/entry)"); - } - $self->dbinfoAdd("db_stat_actress_count", "$num"); - - $self->status("writing stage4 data .."); - { - my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0); - my $progress=Term::ProgressBar->new({name => "writing actresses", - count => $countEstimate, - ETA => 'linear'}) - if ($self->{showProgressBar}); - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - - my $count=0; - open(OUT, "> $self->{imdbDir}/stage$stage.data") || die "$self->{imdbDir}/stage$stage.data:$!"; - for my $key (keys %{$self->{movies}}) { - print OUT "$key\t$self->{movies}{$key}\n"; - $count++; - if ($self->{showProgressBar}) { - # re-adjust target so progress bar doesn't seem too wonky - if ( $count > $countEstimate ) { - $countEstimate = $progress->target($count+100); - $next_update=$progress->update($count); - } - elsif ( $count > $next_update ) { - $next_update=$progress->update($count); - } - } - } - $progress->update($countEstimate) if ($self->{showProgressBar}); - close(OUT); - delete($self->{movies}); - } - #unlink("$self->{imdbDir}/stage3.data"); - } - elsif ( $stage == 5 ) { - $self->status("parsing Genres list for stage $stage.."); - my $countEstimate=$self->dbinfoCalcEstimate("genres", 68); - - my $num=$self->readMoviesOrGenres("Genres", $countEstimate, "$self->{imdbListFiles}->{genres}"); - if ( $num < 0 ) { - if ( $num == -2 ) { - $self->error("you need to download $self->{imdbListFiles}->{genres} from ftp.imdb.com"); - } - return(1); - } - elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) { - my $better=$self->dbinfoCalcBytesPerEntry("genres", $num); - $self->status("ARG estimate of $countEstimate for genres needs updating, found $num ($better bytes/entry)"); - } - $self->dbinfoAdd("db_stat_genres_count", "$num"); - - $self->status("writing stage5 data .."); - { - my $countEstimate=$self->dbinfoGet("db_stat_genres_count", 0); - my $progress=Term::ProgressBar->new({name => "writing genres", - count => $countEstimate, - ETA => 'linear'}) - if ($self->{showProgressBar}); - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - - open(OUT, "> $self->{imdbDir}/stage$stage.data") || die "$self->{imdbDir}/stage$stage.data:$!"; - my $count=0; - for my $movie (keys %{$self->{movies}}) { - print OUT "$movie\t$self->{movies}->{$movie}\n"; - - $count++; - if ($self->{showProgressBar}) { - # re-adjust target so progress bar doesn't seem too wonky - if ( $count > $countEstimate ) { - $countEstimate = $progress->target($count+100); - $next_update=$progress->update($count); - } - elsif ( $count > $next_update ) { - $next_update=$progress->update($count); - } - } - } - $progress->update($countEstimate) if ($self->{showProgressBar}); - close(OUT); - delete($self->{movies}); - } - } - elsif ( $stage == 6 ) { - $self->status("parsing Ratings list for stage $stage.."); - my $countEstimate=$self->dbinfoCalcEstimate("ratings", 68); - - my $num=$self->readRatings($countEstimate, "$self->{imdbListFiles}->{ratings}"); - if ( $num < 0 ) { - if ( $num == -2 ) { - $self->error("you need to download $self->{imdbListFiles}->{ratings} from ftp.imdb.com"); - } - return(1); - } - elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) { - my $better=$self->dbinfoCalcBytesPerEntry("ratings", $num); - $self->status("ARG estimate of $countEstimate for ratings needs updating, found $num ($better bytes/entry)"); - } - $self->dbinfoAdd("db_stat_ratings_count", "$num"); - - $self->status("writing stage6 data .."); - { - my $countEstimate=$self->dbinfoGet("db_stat_ratings_count", 0); - my $progress=Term::ProgressBar->new({name => "writing ratings", - count => $countEstimate, - ETA => 'linear'}) - if ($self->{showProgressBar}); - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - - open(OUT, "> $self->{imdbDir}/stage$stage.data") || die "$self->{imdbDir}/stage$stage.data:$!"; - my $count=0; - for my $movie (keys %{$self->{movies}}) { - my @value=@{$self->{movies}->{$movie}}; - print OUT "$movie\t$value[0]\t$value[1]\t$value[2]\n"; - - $count++; - if ($self->{showProgressBar}) { - # re-adjust target so progress bar doesn't seem too wonky - if ( $count > $countEstimate ) { - $countEstimate = $progress->target($count+100); - $next_update=$progress->update($count); - } - elsif ( $count > $next_update ) { - $next_update=$progress->update($count); - } - } - } - $progress->update($countEstimate) if ($self->{showProgressBar}); - close(OUT); - delete($self->{movies}); - } - } - elsif ( $stage == 7 ) { - $self->status("parsing Keywords list for stage $stage.."); - - if ( !defined($self->{imdbListFiles}->{keywords}) ) { - $self->status("no keywords file downloaded, see --with-keywords details in documentation"); - return(0); - } - - my $countEstimate=5630000; - my $num=$self->readKeywords($countEstimate, "$self->{imdbListFiles}->{keywords}"); - if ( $num < 0 ) { - if ( $num == -2 ) { - $self->error("you need to download $self->{imdbListFiles}->{keywords} from ftp.imdb.com"); - } - return(1); - } - elsif ( abs($num - $countEstimate) > $countEstimate*.05 ) { - $self->status("ARG estimate of $countEstimate for keywords needs updating, found $num"); - } - $self->dbinfoAdd("keywords_list_file", "$self->{imdbListFiles}->{keywords}"); - $self->dbinfoAdd("keywords_list_file_size", -s "$self->{imdbListFiles}->{keywords}"); - $self->dbinfoAdd("db_stat_keywords_count", "$num"); - - $self->status("writing stage$stage data .."); - { - my $countEstimate=$self->dbinfoGet("db_stat_keywords_count", 0); - my $progress=Term::ProgressBar->new({name => "writing keywords", - count => $countEstimate, - ETA => 'linear'}) - if ($self->{showProgressBar}); - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - - open(OUT, "> $self->{imdbDir}/stage$stage.data") || die "$self->{imdbDir}/stage$stage.data:$!"; - - my $count=0; - for my $movie (keys %{$self->{movies}}) { - print OUT "$movie\t$self->{movies}->{$movie}\n"; - - $count++; - if ($self->{showProgressBar}) { - # re-adjust target so progress bar doesn't seem too wonky - if ( $count > $countEstimate ) { - $countEstimate = $progress->target($count+100); - $next_update=$progress->update($count); - } - elsif ( $count > $next_update ) { - $next_update=$progress->update($count); - } - } - } - $progress->update($countEstimate) if ($self->{showProgressBar}); - close(OUT); - delete($self->{movies}); - } - } - elsif ( $stage == 8 ) { - $self->status("parsing Plot list for stage $stage.."); - - if ( !defined($self->{imdbListFiles}->{plot}) ) { - $self->status("no plot file downloaded, see --with-plot details in documentation"); - return(0); - } - - my $countEstimate=222222; - my $num=$self->readPlots($countEstimate, "$self->{imdbListFiles}->{plot}"); - if ( $num < 0 ) { - if ( $num == -2 ) { - $self->error("you need to download $self->{imdbListFiles}->{plot} from ftp.imdb.com"); - } - return(1); - } - elsif ( abs($num - $countEstimate) > $countEstimate*.05 ) { - $self->status("ARG estimate of $countEstimate for plots needs updating, found $num"); - } - $self->dbinfoAdd("plots_list_file", "$self->{imdbListFiles}->{plot}"); - $self->dbinfoAdd("plots_list_file_size", -s "$self->{imdbListFiles}->{plot}"); - $self->dbinfoAdd("db_stat_plots_count", "$num"); - - $self->status("writing stage$stage data .."); - { - my $countEstimate=$self->dbinfoGet("db_stat_plots_count", 0); - my $progress=Term::ProgressBar->new({name => "writing plots", - count => $countEstimate, - ETA => 'linear'}) - if ($self->{showProgressBar}); - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - - open(OUT, "> $self->{imdbDir}/stage$stage.data") || die "$self->{imdbDir}/stage$stage.data:$!"; - - my $count=0; - for my $movie (keys %{$self->{movies}}) { - print OUT "$movie\t$self->{movies}->{$movie}\n"; - - $count++; - if ($self->{showProgressBar}) { - # re-adjust target so progress bar doesn't seem too wonky - if ( $count > $countEstimate ) { - $countEstimate = $progress->target($count+100); - $next_update=$progress->update($count); - } - elsif ( $count > $next_update ) { - $next_update=$progress->update($count); - } - } - } - $progress->update($countEstimate) if ($self->{showProgressBar}); - close(OUT); - delete($self->{movies}); - } - } - elsif ( $stage == $self->{stageLast} ) { - my $tab=sprintf("\t"); - - $self->status("indexing all previous stage's data for stage ".$self->{stageLast}.".."); - - $self->status("parsing stage 1 data (movie list).."); - my %movies; - { - my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0); - my $progress=Term::ProgressBar->new({name => "reading titles", - count => $countEstimate, - ETA => 'linear'}) - if ($self->{showProgressBar}); - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - - open(IN, "< $self->{imdbDir}/stage1.data") || die "$self->{imdbDir}/stage1.data:$!"; - while() { - chop(); - $movies{$_}=""; - - if ($self->{showProgressBar}) { - # re-adjust target so progress bar doesn't seem too wonky - if ( $. > $countEstimate ) { - $countEstimate = $progress->target($.+100); - $next_update=$progress->update($.); - } - elsif ( $. > $next_update ) { - $next_update=$progress->update($.); - } - } - } - close(IN); - $progress->update($countEstimate) if ($self->{showProgressBar}); - } - - $self->status("merging in stage 2 data (directors).."); - if ( 1 ) { - my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0); - my $progress=Term::ProgressBar->new({name => "merging directors", - count => $countEstimate, - ETA => 'linear'}) - if ($self->{showProgressBar}); - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - - open(IN, "< $self->{imdbDir}/stage2.data") || die "$self->{imdbDir}/stage2.data:$!"; - while() { - chop(); - s/^([^\t]+)\t//o; - if ( !defined($movies{$1}) ) { - $self->error("directors list references unidentified title '$1'"); - next; - } - $movies{$1}=$_; - - if ($self->{showProgressBar}) { - # re-adjust target so progress bar doesn't seem too wonky - if ( $. > $countEstimate ) { - $countEstimate = $progress->target($.+100); - $next_update=$progress->update($.); - } - elsif ( $. > $next_update ) { - $next_update=$progress->update($.); - } - } - } - $progress->update($countEstimate) if ($self->{showProgressBar}); - close(IN); - } - - if ( 1 ) { - # fill in default for movies we didn't have a director for - for my $key (keys %movies) { - if ( !length($movies{$key})) { - $movies{$key}="<>"; - } - } - } - - $self->status("merging in stage 3 data (actors).."); - if ( 1 ) { - my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0); - my $progress=Term::ProgressBar->new({name => "merging actors", - count => $countEstimate, - ETA => 'linear'}) - if ($self->{showProgressBar}); - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - - open(IN, "< $self->{imdbDir}/stage3.data") || die "$self->{imdbDir}/stage3.data:$!"; - while() { - chop(); - s/^([^\t]+)\t//o; - my $dbkey=$1; - my $val=$movies{$dbkey}; - if ( !defined($val) ) { - $self->error("actors list references unidentified title '$dbkey'"); - next; + # load the titles list (stage1.data) into memory + + my ($self, $countEstimate)=@_; + my $startTime=time(); + my $lineCount=0; + + undef $self->{titleshash}; + + $self->beginProgressBar('loading titles list', $countEstimate); + + open(IN, "< $self->{imdbDir}/stage1.data") || die "$self->{imdbDir}/stage1.data:$!"; + my $count=0; + my $maxidxid=0; + while() { + chomp(); + my $line=$_; + next if ( length($line) == 0 ); + #$self->status("read line $lineCount:$line"); + $lineCount++; + + # check the database version number + if ($lineCount == 1) { + if ( m/^0000000:version ([\d\.]*)$/ ) { + if ($1 ne $VERSION) { + $self->error("incorrect database version"); + return(1); + } else { + next; + } + } else { + $self->error("missing database version at line $lineCount"); + return(1); + } } - if ( $val=~m/$tab/o ) { - $movies{$dbkey}=$val."|".$_; + + + if (index($line, ":") != -1 ) { + $count++; + + # extract the title-idx-id and the film-title + # 0000002:army%20of%20darkness%20%281992%29 Army of Darkness (1992) 1992 movie 0000002 + # + my ($midxid, $mhashkey, $mtitle) = $line =~ m/^(\d*):(.*?)\t+(.*?)\t/; + + if ($midxid && $mtitle) { + $self->{titleshash}{$mtitle} = int($midxid); # build the hash + + $maxidxid = $midxid if ( $midxid > $maxidxid ); + } + + $self->updateProgressBar('', $lineCount); } else { - $movies{$dbkey}=$val.$tab.$_; + $self->error("$lineCount: unrecognized format (missing tab)"); + $self->updateProgressBar('', $lineCount); } - if ($self->{showProgressBar}) { - # re-adjust target so progress bar doesn't seem too wonky - if ( $. > $countEstimate ) { - $countEstimate = $progress->target($.+100); - $next_update=$progress->update($.); - } - elsif ( $. > $next_update ) { - $next_update=$progress->update($.); - } - } - } - $progress->update($countEstimate) if ($self->{showProgressBar}); - close(IN); - } - - $self->status("merging in stage 4 data (actresses).."); - if ( 1 ) { - my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0); - my $progress=Term::ProgressBar->new({name => "merging actresses", - count => $countEstimate, - ETA => 'linear'}) - if ($self->{showProgressBar}); - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - - open(IN, "< $self->{imdbDir}/stage4.data") || die "$self->{imdbDir}/stage4.data:$!"; - while() { - chop(); - s/^([^\t]+)\t//o; - my $dbkey=$1; - my $val=$movies{$dbkey}; - if ( !defined($val) ) { - $self->error("actresses list references unidentified title '$dbkey'"); - next; + } + + $self->endProgressBar(); + + $self->status(sprintf("found ".withThousands($count)." titles in ". + withThousands($lineCount-1)." lines in %d seconds",time()-$startTime)); # drop 1 for the "version" line + + close(IN); + + #----------------------------------------------------------- + return($count, $maxidxid); +} + +sub dedupe($$$) +{ + # basic deduping of data entries + + my ($self, $data, $sep)=@_; + + my @outarr; + my @arr = split( ($sep eq '|' ? '\|' : $sep) , $$data); + my %out; + + foreach my $v (@arr) { + my ($a, $b) = $v =~ m/^(\d*):?(.*)\s*$/; + if (!defined $out{$b}) { + push @outarr, $v; + $out{$b} = $v; + } + } + + $$data = join($sep, @outarr); + return; +} + +sub stripbilling($$$) +{ + # strip the billing from the names + # also strip the "(I)" etc suffix from names + + my ($self, $data, $sep)=@_; + + my @outarr; + my @arr = split( ($sep eq '|' ? '\|' : $sep) , $$data); + + foreach my $v (@arr) { + my ($a, $b) = $v =~ m/^(\d*):?(.*)\s*$/; + $b=~s/\s\([IVXL]+\)\[/\[/o; + $b=~s/\s\([IVXL]+\)$//o; + push @outarr, $b; + } + + $$data = join($sep, @outarr); + return; +} + +sub sortnames($$$) +{ + # basic sorting of names + + my ($self, $data, $sep)=@_; + + my @arr = split( ($sep eq '|' ? '\|' : $sep) , $$data); + + $$data = join($sep, sort(@arr) ); + return; +} + +sub stripprogtype($$) +{ + # strip the (TV) or (V) or (VG) suffix from title + + my ($self, $data)=@_; + + my ($midx, $mtitle, $mrest) = $$data =~ m/^(.*?)\t(.*?)\t(.*)$/; + + $mtitle =~ s/\s(\((TV|V|VG)\))//; + + $$data = $midx ."\t". $mtitle ."\t". $mrest; + return; +} + +sub readfilesbyidxid($$$$) +{ + # read lines from the data files 2..8 looking for matches on a passed idxid + # (don't use this for stage1 data - use a call to readdatafile to simply get the next record + + my ($self, $fhs, $fdat, $idxid)=@_; + + while (my ($stage, $fh) = each ( %$fhs )) { + + $fdat->{$stage} = { k=>0, v=>'' } if !defined $fdat->{$stage}{k}; + + if ($fdat->{$stage}{k} < $idxid) { + #print STDERR "fetching from $stage ".$fdat->{$stage}{k}." < $idxid \n"; + + my ($fstage, $fidxid, $fdata) = $self->readdatafile( $fhs->{$stage}, $stage, $idxid, -1); + + if ($self->{usefilesort}) { + # if we are using filesort then there will be multiple records with the same idxid + # we need to fetch all of these and combine them + my $_fidxid = $fidxid; + while ( $_fidxid == $fidxid && $_fidxid != 9999999 ) { + # read next record + (my $_fstage, $_fidxid, my $_fdata) = $self->readdatafile( $fhs->{$stage}, $stage, $idxid, $_fidxid ); + if ($_fidxid == $fidxid) { + $fdata .= '|' . $_fdata; + } + } + + # need to dedupe our merged data + ($fstage, $fidxid, $fdata) = $self->tidydatafile( $fstage, $fidxid, $fdata ); + + } + + # store the file record + $fdat->{$stage} = { k=>$fidxid, v=>$fdata }; + } + } + + + # here's a fudge: we need to merge the actors (stage 3) and actresses (stage 4) together + my @pnames; + push ( @pnames, $fdat->{3}{v} ) if ( $fdat->{3}{k} == $idxid ); + push ( @pnames, $fdat->{4}{v} ) if ( $fdat->{4}{k} == $idxid ); + + if (scalar @pnames) { + # join the two data values, sort, strip... + my $pnames = join('|', @pnames); + + $self->sortnames(\$pnames, '|'); # sorts by "billing:name" + $self->stripbilling(\$pnames, '|'); # strip "billing:" and "(I)" on name + + ### ...and then store in one of the actors/actresses value while nulling the other + if ( $fdat->{3}{k} == $idxid ) { + $fdat->{3}{v} = $pnames; + $fdat->{4}{v} = ':::' if ( $fdat->{4}{k} == $idxid ); + } + elsif ( $fdat->{4}{k} == $idxid ) { + $fdat->{4}{v} = $pnames; + $fdat->{3}{v} = ':::' if ( $fdat->{3}{k} == $idxid ); + } } - if ( $val=~m/$tab/o ) { - $movies{$dbkey}=$val."|".$_; + # end fudge + + return; +} + +sub readdatafile($$$$$) +{ + my ($self, $fh, $stage, $idxid, $lidxid)=@_; + + # read a line from a file + + my $line; + + # if we have a parked record then use that one + if ( defined $self->{datafile}{$stage} ) { + $line = $self->{datafile}{$stage}; + undef $self->{datafile}{$stage}; + + } else { + if ( eof($fh) ) { + return ($stage, 9999999, ''); + } + defined( $line = readline $fh ) or die "readline failed on file for stage $stage : $!"; + } + + # extract the idxid from the start of each line + # 0000002:army%20of%20darkness%20%281992%29 Army of Darkness (1992) 1992 movie 0000002 + my ($midxid, $mdata) = $line =~ m/^(\d*):(.*)$/; + + if ($midxid) { + + # there should not be any records in datafile n which are not in datafile 1 + if ( $midxid < $idxid ) { + $self->error("unexpected record in stage $stage data file at $midxid (expected $idxid)"); } else { - $movies{$dbkey}=$val.$tab.$_; + # processing on the data for each interim file + ($stage, $midxid, $mdata) = $self->tidydatafile( $stage, $midxid, $mdata ); + } + + # if the incoming idxid has changed then park the record + if ( $lidxid != -1 && $midxid != $lidxid ) { + $self->{datafile}{$stage} = $line; } - if ($self->{showProgressBar}) { - # re-adjust target so progress bar doesn't seem too wonky - if ( $. > $countEstimate ) { - $countEstimate = $progress->target($.+100); - $next_update=$progress->update($.); - } - elsif ( $. > $next_update ) { - $next_update=$progress->update($.); - } - } - } - $progress->update($countEstimate) if ($self->{showProgressBar}); - close(IN); - } - if ( 1 ) { - # fill in placeholder if no actors were found - for my $key (keys %movies) { - if ( !($movies{$key}=~m/$tab/o) ) { - $movies{$key}.=$tab."<>"; - } - } - } - - $self->status("merging in stage 5 data (genres).."); - if ( 1 ) { - my $countEstimate=$self->dbinfoGet("db_stat_genres_count", 1); # '1' prevents the spurious "(nothing to do)" msg - my $progress=Term::ProgressBar->new({name => "merging genres", - count => $countEstimate, - ETA => 'linear'}) - if ($self->{showProgressBar}); - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - - open(IN, "< $self->{imdbDir}/stage5.data") || die "$self->{imdbDir}/stage5.data:$!"; - while() { - chop(); - s/^([^\t]+)\t//o; - my $dbkey=$1; - my $genres=$_; - my $val=$movies{$dbkey}; - if ( !defined($val) ) { - $self->error("genres list references unidentified title '$1'"); - next; - } - $movies{$dbkey}.=$tab.$genres; - - if ($self->{showProgressBar}) { - # re-adjust target so progress bar doesn't seem too wonky - if ( $. > $countEstimate ) { - $countEstimate = $progress->target($.+100); - $next_update=$progress->update($.); - } - elsif ( $. > $next_update ) { - $next_update=$progress->update($.); - } - } - } - $progress->update($countEstimate) if ($self->{showProgressBar}); - close(IN); - } - - if ( 1 ) { - # fill in placeholder if no genres were found - for my $key (keys %movies) { - my $val=$movies{$key}; - my $t=index($val, $tab); - if ( $t == -1 ) { - die "corrupt entry '$key' '$val'"; - } - if ( index($val, $tab, $t+1) == -1 ) { - $movies{$key}.=$tab."<>"; - } - } - } - - $self->status("merging in stage 6 data (ratings).."); - if ( 1 ) { - my $countEstimate=$self->dbinfoGet("db_stat_ratings_count", 1); # '1' prevents the spurious "(nothing to do)" msg - my $progress=Term::ProgressBar->new({name => "merging ratings", - count => $countEstimate, - ETA => 'linear'}) - if ($self->{showProgressBar}); - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - - open(IN, "< $self->{imdbDir}/stage6.data") || die "$self->{imdbDir}/stage6.data:$!"; - while() { - chop(); - s/^([^\t]+)\t([^\t]+)\t([^\t]+)\t([^\t]+)$//o; - my $dbkey=$1; - my ($ratingDist, $ratingVotes, $ratingRank)=($2,$3,$4); - - my $val=$movies{$dbkey}; - if ( !defined($val) ) { - $self->error("ratings list references unidentified title '$1'"); - next; - } - $movies{$dbkey}.=$tab.$ratingDist.$tab.$ratingVotes.$tab.$ratingRank; - - if ($self->{showProgressBar}) { - # re-adjust target so progress bar doesn't seem too wonky - if ( $. > $countEstimate ) { - $countEstimate = $progress->target($.+100); - $next_update=$progress->update($.); - } - elsif ( $. > $next_update ) { - $next_update=$progress->update($.); - } - } - } - $progress->update($countEstimate) if ($self->{showProgressBar}); - close(IN); - } - - if ( 1 ) { - # fill in placeholder if no genres were found - for my $key (keys %movies) { - my $val=$movies{$key}; - - my $t=index($val, $tab); - if ( $t == -1 ) { - die "corrupt entry '$key' '$val'"; - } - my $j=index($val, $tab, $t+1); - if ( $j == -1 ) { - die "corrupt entry '$key' '$val'"; - } - if ( index($val, $tab, $j+1) == -1 ) { - $movies{$key}.=$tab."<>".$tab."<>".$tab."<>"; - } - } - } - - $self->status("merging in stage 7 data (keywords).."); - #if ( 1 ) { # this stage is optional - if ( -f "$self->{imdbDir}/stage7.data" ) { - my $countEstimate=$self->dbinfoGet("db_stat_keywords_count", 1); # '1' prevents the spurious "(nothing to do)" msg - my $progress=Term::ProgressBar->new({name => "merging keywords", - count => $countEstimate, - ETA => 'linear'}) - if ($self->{showProgressBar}); - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - - open(IN, "< $self->{imdbDir}/stage7.data") || die "$self->{imdbDir}/stage7.data:$!"; - while() { - chop(); - s/^([^\t]+)\t+//o; - my $dbkey=$1; - my $keywords=$_; - if ( !defined($movies{$dbkey}) ) { - $self->error("keywords list references unidentified title '$1'"); - next; - } - $movies{$dbkey}.=$tab.$keywords; - - if ($self->{showProgressBar}) { - # re-adjust target so progress bar doesn't seem too wonky - if ( $. > $countEstimate ) { - $countEstimate = $progress->target($.+100); - $next_update=$progress->update($.); - } - elsif ( $. > $next_update ) { - $next_update=$progress->update($.); - } - } - } - $progress->update($countEstimate) if ($self->{showProgressBar}); - close(IN); - } - - if ( 1 ) { - # fill in default for movies we didn't have any keywords for - for my $key (keys %movies) { - my $val=$movies{$key}; - #keyword is 6th entry - my $t = 0; - for my $i (0..4) { - $t=index($val, $tab, $t); - if ( $t == -1 ) { - die "Corrupt entry '$key' '$val'"; - } - $t+=1; - } - if ( index($val, $tab, $t) == -1 ) { - $movies{$key}.=$tab."<>"; - } - } - } - - $self->status("merging in stage 8 data (plots).."); - #if ( 1 ) { # this stage is optional - if ( -f "$self->{imdbDir}/stage8.data" ) { - my $countEstimate=$self->dbinfoGet("db_stat_plots_count", 1); # '1' prevents the spurious "(nothing to do)" msg - my $progress=Term::ProgressBar->new({name => "merging plots", - count => $countEstimate, - ETA => 'linear'}) - if ($self->{showProgressBar}); - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - - open(IN, "< $self->{imdbDir}/stage8.data") || die "$self->{imdbDir}/stage8.data:$!"; - while() { - chop(); - s/^([^\t]+)\t+//o; - my $dbkey=$1; - my $plot=$_; - if ( !defined($movies{$dbkey}) ) { - $self->error("plot list references unidentified title '$1'"); - next; - } - $movies{$dbkey}.=$tab.$plot; - - if ($self->{showProgressBar}) { - # re-adjust target so progress bar doesn't seem too wonky - if ( $. > $countEstimate ) { - $countEstimate = $progress->target($.+100); - $next_update=$progress->update($.); - } - elsif ( $. > $next_update ) { - $next_update=$progress->update($.); - } - } - } - $progress->update($countEstimate) if ($self->{showProgressBar}); - close(IN); - } - if ( 1 ) { - # fill in default for movies we didn't have any plot for - for my $key (keys %movies) { - my $val=$movies{$key}; - #plot is 7th entry - my $t = 0; - for my $i (0..5) { - $t=index($val, $tab, $t); - if ( $t == -1 ) { - die "Corrupt entry '$key' '$val'"; - } - $t+=1; - } - if ( index($val, $tab, $t) == -1 ) { - $movies{$key}.=$tab."<>"; - } - } - } - - #unlink("$self->{imdbDir}/stage1.data"); - #unlink("$self->{imdbDir}/stage2.data"); - #unlink("$self->{imdbDir}/stage3.data"); - # --------------------------------------------------------------------------------------- + } + + return ($stage, $midxid, $mdata); +} + +sub tidydatafile($$$$) +{ + my ($self, $stage, $midxid, $mdata)=@_; + + # tidy/reformat the data from a stagex.data file + + if ($midxid) { + + # processing on the data for each interim file + + # movies #1 : strip the (TV) (V) markers from the movie title + # directors #2 : (i) dedupe (ii) sort into name order (not correct but there's no sequencing in the imdb data) + # actors/actresses #3,#4 : (i) dedeupe (ii) sort into billing order (iii) strip billing id Note: need to merge actors and actresses + # genres #5 : (i) dedupe + # ratings #6 : (i) split elements and separate by tabs + # keywords #7 : (i) dedupe, (ii) replace separator with comma + # plots #8 : + # + if ($stage == 1) { + $self->stripprogtype(\$mdata); + + } elsif ($stage == 2) { + $self->dedupe(\$mdata, '|'); + $self->stripbilling(\$mdata, '|'); + $self->sortnames(\$mdata, '|'); # sorts by "lastname, firstname" + + } elsif ($stage == 3 || $stage == 4) { + $self->dedupe(\$mdata, '|'); + # defer sorting and strip billing deferred until after we have joined actors + actresses + ## $self->sortnames(\$mdata, '|'); # sorts by "billing:name" + ## $self->stripbilling(\$mdata, '|'); + + } elsif ($stage == 5) { + $self->dedupe(\$mdata, '|'); + + } elsif ($stage == 6) { + $mdata =~ s/;/\t/g; # replace ";" separator with tabs + + } elsif ($stage == 7) { + $self->dedupe(\$mdata, '|'); + $mdata =~ s/\|/,/g; + + } elsif ($stage == 8) { + # noop + } + + } + + return ($stage, $midxid, $mdata); +} +sub invokeStage($$) +{ + my ($self, $stage)=@_; - # - # note: not all movies end up with a cast, but we include them anyway. - # + my $startTime=time(); + + #---------------------------------------------------------------------------- + if ( $stage == 1 ) { + + $self->status("parsing Movies list for stage $stage ..."); + my $countEstimate=$self->dbinfoCalcEstimate("movies", 45); - my %nmovies; - { - my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0); - my $progress=Term::ProgressBar->new({name => "computing index", - count => $countEstimate, - ETA => 'linear'}) - if ($self->{showProgressBar}); - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - - my $count=0; - for my $key (keys %movies) { - my $dbkey=$key; - - # drop episode information - ex: {Twelve Angry Men (1954)} - $dbkey=~s/\s*\{[^\}]+\}//go; - - # todo - this would make things easier - # change double-quotes around title to be (made-for-tv) suffix instead - if ( $dbkey=~m/^\"/o && #" - $dbkey=~m/\"\s*\(/o ) { #" - $dbkey.=" (tv_series)"; - } - # how rude, some entries have (TV) appearing more than once. - $dbkey=~s/\(TV\)\s*\(TV\)$/(TV)/o; - - my $qualifier; - if ( $dbkey=~s/\s+\(TV\)$//o ) { - $qualifier="tv_movie"; - } - elsif ( $dbkey=~s/\s+\(mini\) \(tv_series\)$// ) { - $qualifier="tv_mini_series"; - } - elsif ( $dbkey=~s/\s+\(tv_series\)$// ) { - $qualifier="tv_series"; - } - elsif ( $dbkey=~s/\s+\(mini\)$//o ) { - $qualifier="tv_mini_series"; - } - elsif ( $dbkey=~s/\s+\(V\)$//o ) { - $qualifier="video_movie"; - } - elsif ( $dbkey=~s/\s+\(VG\)$//o ) { - #$qualifier="video_game"; - delete($movies{$key}); - next; + # if we are using --filesort then write output file direct (and not use a hash) + if ($self->{usefilesort}) { + open($self->{fhdata}, ">", "$self->{imdbDir}/stage$stage.data.tmp") || die "$self->{imdbDir}/stage$stage.data.tmp:$!"; } - else { - $qualifier="movie"; + + my ($num, $numout) = $self->readMovies("Movies", $countEstimate, "$self->{imdbListFiles}->{movies}", $stage); + + if ($self->{usefilesort}) { + close($self->{fhdata}); } - #if ( $dbkey=~s/\s+\((tv_series|tv_mini_series|tv_movie|video_movie|video_game)\)$//o ) { - # $qualifier=$1; - #} - my $year; - my $title=$dbkey; - - if ( $title=~m/^\"/o && $title=~m/\"\s*\(/o ) { #" - $title=~s/^\"//o; #" - $title=~s/\"(\s*\()/$1/o; #" - } - - if ( $title=~s/\s+\((\d\d\d\d)\)$//o || - $title=~s/\s+\((\d\d\d\d)\/[IVX]+\)$//o ) { - $year=$1; - } - elsif ( $title=~s/\s+\((\?\?\?\?)\)$//o || - $title=~s/\s+\((\?\?\?\?)\/[IVX]+\)$//o ) { - $year="0000"; + + if ( $num < 0 ) { + if ( $num == -2 ) { + $self->error("you need to download $self->{imdbListFiles}->{movies} from the ftp site, or use the --download option"); + } + return(1); } - else { - $self->error("movie list format failed to decode year from title '$title'"); - $year="0000"; + elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) { + my $better=$self->dbinfoCalcBytesPerEntry("movies", $num); + ##not accurate: $self->status("ARG estimate of $countEstimate for movies needs updating, found $num ($better bytes/entry)"); + } + $self->dbinfoAdd("db_stat_movie_count", "$numout"); + + #use Data::Dumper;print STDERR Dumper($self->{movieshash}); + #use Data::Dumper;my $_h="stage${stage}hash";print STDERR Dumper( $self->{$_h} ); + + + #----------------------------------------------------------- + # sort the title keys and write the stage1.data file + # + # if we are using --filesort then write output file direct (and not use a hash) + if ($self->{usefilesort}) { + + $self->beginProgressBar("writing stage $stage data", $self->dbinfoGet("db_stat_movie_count", 0) ); + + # movies are in an interim file (stage1.data.tmp). + # We need to (1) sort the file, + # (2) translate to stage1.data (adding the idxid) + # (3) store in %titleshash + my $res; + + # (1) sort the file in situ + $res = $self->sortfile($stage, "$self->{imdbDir}/stage$stage.data.tmp"); + # if (!$res) { do something? } + + # (2) & (3) read the sorted file and create out stage1.data while building titleshash hash + undef $self->{titleshash}; + + open(IN, "< $self->{imdbDir}/stage$stage.data.tmp") || die "$self->{imdbDir}/stage$stage.data.tmp:$!"; + open(OUT, "> $self->{imdbDir}/stage$stage.data") || die "$self->{imdbDir}/stage$stage.data:$!"; + print OUT '0000000:version '.$VERSION."\n"; + + my $count=0; + while() { + my $line=$_; + + $count++; + my $idxid=sprintf("%07d", $count); + + my ($k, $k2, $v2) = $line =~ m/^(.*?)\t(.*?)\t(.*?)$/; + + # the following equates to + # print OUT $idxid.":".$dbkey."\t".$title."\t".$year."\t".$qualifier."\t".$lineno."\n"; + print OUT $idxid.':'.$k."\t".$k2."\t".$v2."\t".$idxid."\n"; + + # and create a shared hash of $title=>$lineno (i.e. IDX 'id') + $self->{titleshash}{$k2} = $count; # store the idx id for this title + + + $self->updateProgressBar('', $count); + } + $self->endProgressBar(); + + $self->{maxid} = $count; # remember the largest values of title id (for loop stop) + + close(OUT); + close(IN); + + unlink "$self->{imdbDir}/stage$stage.data.tmp"; + + + } else { + + # movies data are in a hash (%movieshash) to we need to write that to disc (stage1.data) + + $self->beginProgressBar("writing stage $stage data", $num); + + open(OUT, "> $self->{imdbDir}/stage$stage.data") || die "$self->{imdbDir}/stage$stage.data:$!"; + print OUT '0000000:version '.$VERSION."\n"; + + my $count=0; + foreach my $k (sort keys( %{$self->{movieshash}} )) { + + while ( my ($k2, $v2) = each %{$self->{movieshash}{$k}} ) { # movieshash is a hash of hashes + + $count++; + my $idxid=sprintf("%07d", $count); + + # the following equates to + # print OUT $idxid.":".$dbkey."\t".$title."\t".$year."\t".$qualifier."\t".$lineno."\n"; + print OUT $idxid.':'.$k."\t".$k2."\t".$v2."\t".$idxid."\n"; + + # and create a shared hash of $title=>$lineno (i.e. IDX 'id') + $self->{titleshash}{$k2} = $count; # store the int version of the id for this title + # (note multiple titles may have the same hashkey) + } + + delete( $self->{movieshash}{$k} ); + + $self->updateProgressBar('', $count); + } + + $self->endProgressBar(); + + $self->{maxid} = $count; # remember the largest values of title id (for loop stop) + + close(OUT); + } + + #use Data::Dumper;print STDERR Dumper( $self->{titleshash} );die; + + } + + + #---------------------------------------------------------------------------- + elsif ( $stage >= 2 && $stage < $self->{stageLast} ) { + + # these stages need the hash of film-title=>idxid + # if we have come from stage 1 (i.e. "prep-stage=all" then we will have that from stage=1 + # otherwise we will need to build *.e.g "prep-stage=2" + # + if (!defined( $self->{titleshash} ) ) { + my $countEstimate = $self->dbinfoGet("db_stat_movie_count", 0); + my ($titlecount, $maxid) = $self->gettitleshash($countEstimate); + if ($titlecount == -1) { + $self->error('could not make title list - quitting'); + return(1); + } + $self->{maxid} = $maxid; # remember the largest values of title id (for loop stop) + #use Data::Dumper;print STDERR Dumper( $self->{titleshash} ); + } + + # nb: {stages} = { 1=>'movies', 2=>'directors', 3=>'actors', 4=>'actresses', 5=>'genres', 6=>'ratings', 7=>'keywords', 8=>'plot' }; + my $stagename = $self->{stages}{$stage}; + my $stagenametext = ucfirst $self->{stages}{$stage}; + + $self->status("parsing $stagenametext list for stage $stage ..."); + + # skip optional stages + if ( ( !defined $self->{imdbListFiles}->{$stagename} ) && ( defined $self->{optionalStages}->{$stagename} ) ) { + return(0); + } + + # approx average record length for each incoming data file (used to guesstimate number of records in file) + my %countestimates = ( 1=>'45', 2=> '40', 3=> '55', 4=> '55', 5=> '35', 6=> '65', 7=> '20', 8=> '50' ); + my $countEstimate = $self->dbinfoCalcEstimate($stagename, $countestimates{$stage}); + + my %stagefunctions = ( 1=>\&readMovies, 2=>\&readCastOrDirectors, + 3=>\&readCastOrDirectors, 4=>\&readCastOrDirectors, + 5=>\&readGenres, 6=>\&readRatings, + 7=>\&readKeywords, 8=>\&readPlots + ); + + + # if we are using --filesort then write output file direct (and not use a hash) + if ($self->{usefilesort}) { + open($self->{fhdata}, ">", "$self->{imdbDir}/stage$stage.data") || die "$self->{imdbDir}/stage$stage.data:$!"; + print {$self->{fhdata}} '0000000:version '.$VERSION."\n"; + } + + my $num=$stagefunctions{$stage}->($self, $stagenametext, $countEstimate, "$self->{imdbListFiles}->{$stagename}", $stage); + + if ($self->{usefilesort}) { + close($self->{fhdata}); + } + + if ( $num < 0 ) { + if ( $num == -2 ) { + $self->error("you need to download $self->{imdbListFiles}->{$stagename} from the ftp site, or use the --download option"); + } + return(1); + } + elsif ( $num > 0 && abs($num - $countEstimate) > $countEstimate*.10 ) { + my $better=$self->dbinfoCalcBytesPerEntry($stagename, $num); + $self->status("ARG estimate of $countEstimate for $stagename needs updating, found $num ($better bytes/entry)"); + } + $self->dbinfoAdd("db_stat_${stagename}_count", "$num"); + + + + #----------------------------------------------------------- + # print the title keys in IDX id order : write the stagex.data file + # + if ($self->{usefilesort}) { + + # file already written : just needs sorting (in situ) + my $f="$self->{imdbDir}/stage$stage.data"; + my $res = $self->sortfile($stage, $f); + # todo: check the reply? + + } else { + #use Data::Dumper;my $_h="stage${stage}hash";print STDERR Dumper( $self->{$_h} ); + + # write the stage.data file from the memory hash + + $self->beginProgressBar("writing stage $stage data", $num); + + open(OUT, "> $self->{imdbDir}/stage$stage.data") || die "$self->{imdbDir}/stage$stage.data:$!"; + print OUT '0000000:version '.$VERSION."\n"; + + # don't sort the hash keys - that will just cost memory. Just pull them out in numerical order. + my $h = "stage${stage}hash"; + # + # read the stage data hash in idxid order + for (my $i = 0; $i <= $self->{maxid}; $i++){ + + # write the extracted imdb data to a temporary file, preceeded by the IDX id for each record + my $k = sprintf("%07d", $i); + + if ( $self->{$h}{$i} ) { + my $v = $self->{$h}{$i}; + delete ( $self->{$h}{$i} ); + # + print OUT $k.':'.$v."\n"; + } + + $self->updateProgressBar('', $i); + } + + $self->endProgressBar(); + + close(OUT); + + #use Data::Dumper;print STDERR "leftovers: $stage ".Dumper( $self->{$h} )."\n"; + + delete ( $self->{$h} ); + } + + #use Data::Dumper;print STDERR Dumper( $self->{titleshash} ); + } + + + #---------------------------------------------------------------------------- + elsif ( $stage == $self->{stageLast} ) { + + # delete existing IDX; trim stage1.data to IDX; merge stage 2-8.data into DAT + + # free up some memory + undef $self->{titleshash}; + + my $tab=sprintf("\t"); + + $self->status("indexing all previous stage's data for stage ".$self->{stageLast}."..."); + + + #---------------------------------------------------------------------- + # read all the parsed data files created in stages 1-8 and merges them + # read one record at a time from each file! + + my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0); + + $self->beginProgressBar('writing database', $countEstimate); + + open(IDX, "> $self->{moviedbIndex}") || die "$self->{moviedbIndex}:$!"; + open(DAT, "> $self->{moviedbData}") || die "$self->{moviedbData}:$!"; + + my $i; + my %fh; + for $i (1..($self->{stageLast}-1)) { + # skip optional files if they don't exist + if ( ($i == 7 && !( -f "$self->{imdbDir}/stage7.data" )) + || ($i == 8 && !( -f "$self->{imdbDir}/stage8.data" )) ) { + next; + } + # + open($fh{$i}, "< $self->{imdbDir}/stage$i.data") || die "$self->{imdbDir}/stage$i.data:$!"; + } + + # check the file version numbers + while (my ($k, $v) = each (%fh)) { + $_ = readline $v; + if ( m/^0000000:version ([\d\.]*)$/ ) { + if ($1 ne $VERSION) { + $self->error("incorrect database version in stage $k file"); + return(1); + } else { + next; + } + } else { + $self->error("missing database version in stage $k file"); + return(1); + } } - $title=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/og; + + + #---------------------------------------------------------------------- + my %fdat; + + my $count=0; + my $go=1; + while ($go) { + + last if ( eof($fh{1}) ); # I suppose we ought to check if there any recs remaining in the other files (todo) + + # read a movie record + my ($fstage, $fidxid, $fdata) = $self->readdatafile($fh{1}, 1, -1, -1); + + $fdat{$fstage} = { k=>$fidxid, v=>$fdata }; + + if ($fidxid) { + $count++; + + # get matching records from other data files + $self->readfilesbyidxid(\%fh, \%fdat, $fidxid); + + # merge data from other records + my $mdata = $fidxid.':'; + + for $i (2..($self->{stageLast}-1)) { + + # we can join actors and actresses - only 1 of them will have data now + next if ( $fdat{$i}{k} == $fidxid && $fdat{$i}{v} eq ':::' ); + # only output either actors or actresses but not both (otherwise we'll get an extra marker in the output + next if ($i == 3) && ( $fdat{3}{k} != $fidxid ); + next if ($i == 4) && ( $fdat{4}{k} != $fidxid ) && ( $fdat{3}{k} == $fidxid ); # don't output marker if we've just done it for actors + # drop through if actresses (#4) and no actors (#3) for this film + + + if ( $fdat{$i}{k} == $fidxid ) { + $mdata .= $fdat{$i}{v}; + } + else { + # don't data for this stage ($i) so just print the 'empty' marker + $mdata .= '<>'; + if ($i == 6) { $mdata .= "\t".'<>'."\t".'<>'; } # fudge to add extra spacers in ratings data + } + + $mdata .= "\t" unless $i == ($self->{stageLast}-1); + } + + #print STDERR "mdata ".$mdata."\n"; + + # write the DAT record + print DAT $mdata ."\n"; + + # write the IDX record + print IDX $fdata ."\n"; + } - my $hashkey=lc("$title ($year)"); - $hashkey=~s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/oeg; - if ( defined($movies{$hashkey}) ) { - die "unable to place moviedb key for $key, report to xmltv-devel\@lists.sf.net"; - } - die "title \"$title\" contains a tab" if ( $title=~m/\t/o ); - #print "key:$dbkey\n\ttitle=$title\n\tyear=$year\n\tqualifier=$qualifier\n"; - #print "key $key: value=\"$movies{$key}\"\n"; - - $nmovies{$hashkey}=$dbkey.$tab.$year.$tab.$qualifier.$tab.delete($movies{$key}); - $count++; - - if ($self->{showProgressBar}) { - # re-adjust target so progress bar doesn't seem too wonky - if ( $count > $countEstimate ) { - $countEstimate = $progress->target($count+100); - $next_update=$progress->update($count); - } - elsif ( $count > $next_update ) { - $next_update=$progress->update($count); - } - } - } - $progress->update($countEstimate) if ($self->{showProgressBar}); - - if ( scalar(keys %movies) != 0 ) { - die "what happened, we have keys left ?"; - } - undef(%movies); - } - - { - my $countEstimate=$self->dbinfoGet("db_stat_movie_count", 0); - my $progress=Term::ProgressBar->new({name => "writing database", - count => $countEstimate, - ETA => 'linear'}) - if ($self->{showProgressBar}); - $progress->minor(0) if ($self->{showProgressBar}); - $progress->max_update_rate(1) if ($self->{showProgressBar}); - my $next_update=0; - - open(IDX, "> $self->{moviedbIndex}") || die "$self->{moviedbIndex}:$!"; - open(DAT, "> $self->{moviedbData}") || die "$self->{moviedbData}:$!"; - my $count=0; - for my $key (sort {$a cmp $b} keys %nmovies) { - my $val=delete($nmovies{$key}); - #print "movie $key: $val\n"; - #$val=~s/^([^\t]+)\t([^\t]+)\t([^\t]+)\t//o || die "internal failure ($key:$val)"; - my ($dbkey, $year, $qualifier,$directors,$actors,@rest)=split('\t', $val); - #die ("no 1") if ( !defined($dbkey)); - #die ("no 2") if ( !defined($year)); - #die ("no 3") if ( !defined($qualifier)); - #die ("no 4") if ( !defined($directors)); - #die ("no 5") if ( !defined($actors)); - #print "key:$key\n\ttitle=$dbkey\n\tyear=$year\n\tqualifier=$qualifier\n"; + $self->updateProgressBar('', $count); + } + + $self->endProgressBar(); + + $self->status(sprintf("wrote ".withThousands($count)." titles in %d seconds",time()-$startTime)); - #my ($directors, $actors)=split('\t', $val); + close(IDX); + close(IN); + while (my ($k, $v) = each (%fh)) { + close($v); + } + - my $details=""; - if ( $directors eq "<>" ) { - $details.="<>"; - } - else { - # sort directors by last name, removing duplicates - my $last=''; - for my $name (sort {$a cmp $b} split('\|', $directors)) { - if ( $name ne $last ) { - $details.="$name|"; - $last=$name; - } - } - $details=~s/\|$//o; + # --------------------------------------------------------------------------------------- + + $self->dbinfoAdd("db_version", $XMLTV::IMDB::VERSION); + + if ( $self->dbinfoSave() ) { + $self->error("$self->{moviedbInfo}:$!"); + return(1); } - #print " $dbkey: $val\n"; - if ( $actors eq "<>" ) { - $details.=$tab."<>"; + $self->status("running quick sanity check on database indexes..."); + my $imdb=new XMLTV::IMDB('imdbDir' => $self->{imdbDir}, + 'verbose' => $self->{verbose}); + + if ( -e "$self->{moviedbOffline}" ) { + unlink("$self->{moviedbOffline}"); } - else { - $details.=$tab; - # sort actors by billing, removing repeated entries - # be warned, two actors may have the same billing level - my $last=''; - for my $c (sort {$a cmp $b} split('\|', $actors)) { - my ($billing, $name)=split(':', $c); - # remove Host/Narrators from end - # BUG - should remove (I)'s from actors/actresses names when details are generated - $name=~s/\s\([IVX]+\)\[/\[/o; - $name=~s/\s\([IVX]+\)$//o; - - if ( $name ne $last ) { - $details.="$name|"; - $last=$name; - } - #print " $c: split gives'$billing' and '$name'\n"; - } - $details=~s/\|$//o; - } - $count++; - my $lineno=sprintf("%07d", $count); - print IDX $key."\t".$dbkey."\t".$year."\t".$qualifier."\t".$lineno."\n"; - print DAT $lineno.":".$details."\t".join($tab, @rest)."\n"; - - if ($self->{showProgressBar}) { - # re-adjust target so progress bar doesn't seem too wonky - if ( $count > $countEstimate ) { - $countEstimate = $progress->target($count+100); - $next_update=$progress->update($count); - } - elsif ( $count > $next_update ) { - $next_update=$progress->update($count); - } - } - } - $progress->update($countEstimate) if ($self->{showProgressBar}); - close(DAT); - close(IDX); - } + if ( my $errline=$imdb->sanityCheckDatabase() ) { + open(OFF, "> $self->{moviedbOffline}") || die "$self->{moviedbOffline}:$!"; + print OFF $errline."\n"; + print OFF "one of the prep stages' must have produced corrupt data\n"; + print OFF "report the following details to xmltv-devel\@lists.sf.net\n"; - $self->dbinfoAdd("db_version", $XMLTV::IMDB::VERSION); + my $info=XMLTV::IMDB::loadDBInfo($self->{moviedbInfo}); + if ( ref $info eq 'SCALAR' ) { + print OFF "\tdbinfo file corrupt\n"; + print OFF "\t$info"; + } + else { + for my $key (sort keys %{$info}) { + print OFF "\t$key:$info->{$key}\n"; + } + } + print OFF "database taken offline\n"; + close(OFF); + open(OFF, "< $self->{moviedbOffline}") || die "$self->{moviedbOffline}:$!"; + while() { + chop(); + $self->error($_); + } + close(OFF); + return(1); + } + $self->status("sanity intact :)"); + } + else { + $self->error("tv_imdb: invalid stage $stage: only 1-".$self->{stageLast}." are valid"); + return(1); + } + $self->dbinfoAdd("seconds_to_complete_prep_stage_$stage", (time()-$startTime)); if ( $self->dbinfoSave() ) { - $self->error("$self->{moviedbInfo}:$!"); - return(1); + $self->error("$self->{moviedbInfo}:$!"); + return(1); } - - $self->status("running quick sanity check on database indexes..."); - my $imdb=new XMLTV::IMDB('imdbDir' => $self->{imdbDir}, - 'verbose' => $self->{verbose}); - - if ( -e "$self->{moviedbOffline}" ) { - unlink("$self->{moviedbOffline}"); - } - - if ( my $errline=$imdb->sanityCheckDatabase() ) { - open(OFF, "> $self->{moviedbOffline}") || die "$self->{moviedbOffline}:$!"; - print OFF $errline."\n"; - print OFF "one of the prep stages' must have produced corrupt data\n"; - print OFF "report the following details to xmltv-devel\@lists.sf.net\n"; - - my $info=XMLTV::IMDB::loadDBInfo($self->{moviedbInfo}); - if ( ref $info eq 'SCALAR' ) { - print OFF "\tdbinfo file corrupt\n"; - print OFF "\t$info"; - } - else { - for my $key (sort keys %{$info}) { - print OFF "\t$key:$info->{$key}\n"; - } - } - print OFF "database taken offline\n"; - close(OFF); - open(OFF, "< $self->{moviedbOffline}") || die "$self->{moviedbOffline}:$!"; - while() { - chop(); - $self->error($_); - } - close(OFF); - return(1); - } - $self->status("sanity intact :)"); - } - else { - $self->error("tv_imdb: invalid stage $stage: only 1-".$self->{stageLast}." are valid"); - return(1); - } - - $self->dbinfoAdd("seconds_to_complete_prep_stage_$stage", (time()-$startTime)); - if ( $self->dbinfoSave() ) { - $self->error("$self->{moviedbInfo}:$!"); - return(1); - } - return(0); + return(0); } sub crunchStage($$) { - my ($self, $stage)=@_; + my ($self, $stage)=@_; - if ( $stage == $self->{stageLast} ) { - # check all the pre-requisite stages have been run - for (my $st=1 ; $st < $self->{stageLast}; $st++ ) { - if ( !$self->stageComplete($st) ) { - #$self->error("prep stages must be run in sequence.."); - $self->error("prepStage $st either has never been run or failed"); - if ( grep { $_ == $st } values %{$self->{optionalStages}} ) { - $self->error("data for this stage will NOT be added"); - } else { - $self->error("rerun tv_imdb with --prepStage=$st"); - return(1); - } - } - } - } - - if ( -f "$self->{moviedbInfo}" && $stage != 1 ) { - my $ret=$self->dbinfoLoad(); - if ( $ret ) { - $self->error($ret); - return(1); - } - } - - $self->redirect("$self->{imdbDir}/stage$stage.log") || return(1); - my $ret=$self->invokeStage($stage); - $self->redirect(undef); - - if ( $ret == 0 ) { - if ( $self->{errorCountInLog} == 0 ) { - $self->status("prep stage $stage succeeded with no errors"); + if ( $stage == $self->{stageLast} ) { + # check all the pre-requisite stages have been run + for (my $st=1 ; $st < $self->{stageLast}; $st++ ) { + if ( !$self->stageComplete($st) ) { + #$self->error("prep stages must be run in sequence.."); + $self->error("prepStage $st either has never been run or failed"); + if ( grep { $_ == $st } values %{$self->{optionalStages}} ) { + $self->error("data for this stage will NOT be added"); ####### todo: unless flag present + } else { + $self->error("rerun tv_imdb with --prepStage=$st"); + return(1); + } + } + } } - else { - $self->status("prep stage $stage succeeded with $self->{errorCountInLog} errors in $self->{imdbDir}/stage$stage.log"); - if ( $stage == $self->{stageLast} && $self->{errorCountInLog} > 30 && $self->{errorCountInLog} < 80 ) { - $self->status("this stage commonly produces around 60 (or so) warnings because of imdb"); - $self->status("list file inconsistancies, they can usually be safely ignored"); - } - } - } - else { - if ( $self->{errorCountInLog} == 0 ) { - $self->status("prep stage $stage failed (with no logged errors)"); + + if ( -f "$self->{moviedbInfo}" && $stage != 1 ) { + my $ret=$self->dbinfoLoad(); + if ( $ret ) { + $self->error($ret); + return(1); + } + } + + # open stage logfile and run the requested stage + $self->redirect("$self->{imdbDir}/stage$stage.log") || return(1); + my $ret=$self->invokeStage($stage); + $self->redirect(undef); + + if ( $ret == 0 ) { + if ( $self->{errorCountInLog} == 0 ) { + $self->status("prep stage $stage succeeded with no errors"); + } + else { + $self->status("prep stage $stage succeeded with $self->{errorCountInLog} errors in $self->{imdbDir}/stage$stage.log"); + if ( $stage == $self->{stageLast} && $self->{errorCountInLog} > 30 && $self->{errorCountInLog} < 80 ) { + $self->status("this stage commonly produces around 60 (or so) warnings because of imdb"); + $self->status("list file inconsistancies, they can usually be safely ignored"); + } + } } else { - $self->status("prep stage $stage failed with $self->{errorCountInLog} errors in $self->{imdbDir}/stage$stage.log"); + if ( $self->{errorCountInLog} == 0 ) { + $self->status("prep stage $stage failed (with no logged errors)"); + } + else { + $self->status("prep stage $stage failed with $self->{errorCountInLog} errors in $self->{imdbDir}/stage$stage.log"); + } } - } - return($ret); + return($ret); } 1; diff -Nru xmltv-0.6.3/lib/xmltv.pl xmltv-1.0.0/lib/xmltv.pl --- xmltv-0.6.3/lib/xmltv.pl 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/lib/xmltv.pl 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,192 @@ +#!perl -w +# +# This is a quick XMLTV shell routing to use with the windows exe +# +# A single EXE is needed to allow sharing of modules and dlls of all the +# programs. +# +# Now users PAR::Packer to build the exe. It takes a very long time on first run, which can +# appear to be a problem. +# +# There currently isn't a way for PAR::Packer to warn users about a first time run. +# I've modified the boot.c file in Par::Packer to do that. It's not great as it also +# displays when building, but it's good enough. Here's what the change is (for documenation purposes) +# I'm trying to work with the PAR::Packer folks for a better fix. +# +# boot.c:188 +# rc = my_mkdir(stmpdir, 0700); +#// 2021-01-18 rmeden hack to print a message on first run +# if ( rc == 0 ) fprintf(stderr,"Note: This will take a while on first run\n"); +#// rmeden +# if ( rc == -1 && errno != EEXIST) { +# +# +# Robert Eden rmeden@yahoo.com +# + +use File::Basename; +use Carp; +use XMLTV; +use Date::Manip; +use DateTime; +use Params::Validate; +use Date::Language; +use Class::MethodMaker; +use Class::MethodMaker::Engine; + +$Carp::MaxEvalLen=40; # limit confess output + +# +# this check should not be done, at least not this way. it prevents some regular expressions! +# +## Check for error of running from 'Run' dialogue box with redirection, +## which Run doesn't understand, +## +#if (grep /[<>|]/, @ARGV) { +# warn < 12; + $tz += 24 if $tz < -12; + $tz= sprintf("%+03d00",$tz); + + $ENV{TZ}= $tz; + +} #timezone +print STDERR "Timezone is $ENV{TZ}\n" unless $opt_quiet; + + +$cmd = shift || ""; + +# --version (and abbreviations thereof) +if (index('--version', $cmd) == 0 and length $cmd >= 3) { + print "xmltv $XMLTV::VERSION\n"; + exit; +} + +# +# some programs use a "share" directory +# +if ($cmd eq 'tv_grab_na_dd', + or $cmd eq 'tv_grab_na_icons', + ) +{ + unless (grep(/^--share/i,@ARGV)) # don't add our --share if one supplied + { + my $dir = dirname($0); # get full program path + $dir =~ s!\\!/!g; # use / not \ + $dir .= "/share/xmltv"; + unless (-d $dir ) + { + die "directory $dir not found\n If not kept with the executable, specify with --share\n" + } + print STDERR "adding '--share=$dir'\n" unless $opt_quiet; + push @ARGV,"--share",$dir; + } +} + +# +# special hack, allow "exec" to execute an arbitrary script +# This will be used to allow XMLTV.EXE modules to be used on beta code w/o an alpha exe +# +# Note, no extra modules are included in the EXE. There is no guarantee this will work +# it is an unsupported hack. +# +# syntax XMLTV.EXE exec filename --options +# +if ($cmd eq 'exec') +{ + my $exe=shift; + $0=$exe; + print "doing $exe\n"; + print STDERR "STDERR doing $exe\n"; + do "./$exe"; + print STDERR $@ if length($@); + print "STDOUT $@" if length($@); + exit 1 if length($@); + exit 0; +} + +# +# scan through attached files and execute program if found +# + +#main thread! + +$files=PAR::read_file("exe_files.txt"); +foreach my $exe (split(/ /,$files)) +{ + next unless length($exe)>3; #ignore trash + $_=$exe; + s!^.+/!!g; + $cmds{$_}=1; # build command list (just in case) + + next unless $cmd eq $_; + + $exe="script/$cmd"; + +# +# execute our command +# +# $0 = $_; # set $0 to our script +# print STDERR "STDERR about to execute $exe\n"; +# print STDOUT "STDOUT about to execute $exe\n"; + do $exe; +# print STDERR "STDERR got <$!> <$?> <$^E> <$@>\n"; +# print STDOUT "STDOUT got <$!> <$?> <$^E> <$@>\n"; + print STDERR $@ if length($@); + exit 1 if length($@); + exit 0; +} + +# +# command not found, print error +# +if ($cmd eq "" ) + { + print STDERR "you must specify the program to run\n for example: $0 tv_grab_fi --configure\n"; + } +else + { + print STDERR "$cmd is not a valid command.\n"; + } + +print STDERR "Valid commands are:\n"; +@cmds=sort keys %cmds; +$rows = int($#cmds / 3)+1; + +map {$_='' unless defined $_} @cmds[0..($rows*3+2)]; +unshift @cmds,undef; + +foreach (1..$rows) +{ + printf STDERR " %-20s %-20s %-20s\n",@cmds[$_,$rows+$_,2*$rows+$_]; +} +exit 1; + diff -Nru xmltv-0.6.3/lib/XMLTV.pm.in xmltv-1.0.0/lib/XMLTV.pm.in --- xmltv-0.6.3/lib/XMLTV.pm.in 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/lib/XMLTV.pm.in 2021-02-09 10:49:46.000000000 +0000 @@ -11,7 +11,7 @@ # the xmltv package as a whole. This number should be checked by the # mkdist tool. # -our $VERSION = '0.6.3'; +our $VERSION = '1.0.0'; # Work around changing behaviour of XML::Twig. On some systems (like # mine) it always returns UTF-8 data unless KeepEncoding is specified. diff -Nru xmltv-0.6.3/Makefile.PL xmltv-1.0.0/Makefile.PL --- xmltv-0.6.3/Makefile.PL 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/Makefile.PL 2021-02-09 10:49:46.000000000 +0000 @@ -57,7 +57,7 @@ ); our $VERSION; -$VERSION = '0.6.3'; +$VERSION = '1.0.0'; # Fragment of Makefile text to give the directory where files should # be installed. The extra '.' in the middle of the path is to avoid @@ -277,27 +277,27 @@ 'HTTP::Cookies' => 0, }, }, - # { name => 'tv_grab_ch_search', - # blurb => 'Grabber for Switzerland', - # exes => [ 'grab/ch_search/tv_grab_ch_search' ], - # deps => [ 'grab/ch_search/tv_grab_ch_search' => [ 'grab/ch_search/tv_grab_ch_search.in' ] ], - # pl_files => { 'grab/ch_search/tv_grab_ch_search.PL' => 'grab/ch_search/tv_grab_ch_search' }, - # to_clean => [ 'grab/ch_search/tv_grab_ch_search' ], - # grab_need_share => [ 'ch_search' ], - # prereqs => { 'HTML::Entities' => 1.27, - # 'HTML::TreeBuilder' => 0, - # 'HTTP::Cookies' => 0, - # 'URI::Escape' => 0, - # 'URI::URL' => 0, }, - # }, - - { name => 'tv_grab_dk_dr', - blurb => 'Grabber for Denmark (dr.dk)', - exes => [ 'grab/dk_dr/tv_grab_dk_dr' ], - prereqs => { 'DateTime' => 0, - 'IO::Scalar' => 0, }, + { name => 'tv_grab_ch_search', + blurb => 'Grabber for Switzerland', + exes => [ 'grab/ch_search/tv_grab_ch_search' ], + deps => [ 'grab/ch_search/tv_grab_ch_search' => [ 'grab/ch_search/tv_grab_ch_search.in' ] ], + pl_files => { 'grab/ch_search/tv_grab_ch_search.PL' => 'grab/ch_search/tv_grab_ch_search' }, + to_clean => [ 'grab/ch_search/tv_grab_ch_search' ], + grab_need_share => [ 'ch_search' ], + prereqs => { 'HTML::Entities' => 1.27, + 'HTML::TreeBuilder' => 0, + 'HTTP::Cookies' => 0, + 'URI::Escape' => 0, + 'URI::URL' => 0, }, }, + # { name => 'tv_grab_dk_dr', + # blurb => 'Grabber for Denmark (dr.dk)', + # exes => [ 'grab/dk_dr/tv_grab_dk_dr' ], + # prereqs => { 'DateTime' => 0, + # 'IO::Scalar' => 0, }, + # }, + { name => 'tv_grab_eu_epgdata', blurb => '$$ Grabber for some European countries (epgdata.com)', exes => [ 'grab/eu_epgdata/tv_grab_eu_epgdata' ], @@ -472,20 +472,6 @@ 'URI::Encode' => 0, }, }, - # { name => 'tv_grab_se_swedb', - # blurb => 'Grabber for Sweden', - # exes => [ 'grab/se_swedb/tv_grab_se_swedb' ], - # pl_files => { 'grab/se_swedb/tv_grab_se_swedb.PL' - # => 'grab/se_swedb/tv_grab_se_swedb' }, - # to_clean => [ 'grab/se_swedb/tv_grab_se_swedb' ], - # deps => [ 'grab/se_swedb/tv_grab_se_swedb' - # => [ 'grab/se_swedb/tv_grab_se_swedb.in' ] ], - # prereqs => { 'Compress::Zlib' => 0, - # 'HTTP::Cache::Transparent' => 0, - # 'IO::Scalar' => 0, - # 'XML::LibXML' => 0, }, - # }, - { name => 'tv_grab_tr', blurb => 'Grabber for Turkey (Digiturk)', exes => [ 'grab/tr/tv_grab_tr' ], @@ -496,17 +482,17 @@ 'URI::Escape' => 0, }, }, - { name => 'tv_grab_uk_bleb', - blurb => 'Fast alternative grabber for the UK', - exes => [ 'grab/uk_bleb/tv_grab_uk_bleb' ], - pl_files => { 'grab/uk_bleb/tv_grab_uk_bleb.PL' => 'grab/uk_bleb/tv_grab_uk_bleb' }, - share_files => { 'grab/uk_bleb/icon_urls' => 'tv_grab_uk_bleb/icon_urls' }, - to_clean => [ 'grab/uk_bleb/tv_grab_uk_bleb' ], - deps => [ 'grab/uk_bleb/tv_grab_uk_bleb' => [ 'grab/uk_bleb/tv_grab_uk_bleb.in' ] ], - grab_need_share => [ 'uk_bleb' ], - prereqs => { 'Archive::Zip' => 0, - 'IO::Scalar' => 0, }, - }, + # { name => 'tv_grab_uk_bleb', + # blurb => 'Fast alternative grabber for the UK', + # exes => [ 'grab/uk_bleb/tv_grab_uk_bleb' ], + # pl_files => { 'grab/uk_bleb/tv_grab_uk_bleb.PL' => 'grab/uk_bleb/tv_grab_uk_bleb' }, + # share_files => { 'grab/uk_bleb/icon_urls' => 'tv_grab_uk_bleb/icon_urls' }, + # to_clean => [ 'grab/uk_bleb/tv_grab_uk_bleb' ], + # deps => [ 'grab/uk_bleb/tv_grab_uk_bleb' => [ 'grab/uk_bleb/tv_grab_uk_bleb.in' ] ], + # grab_need_share => [ 'uk_bleb' ], + # prereqs => { 'Archive::Zip' => 0, + # 'IO::Scalar' => 0, }, + # }, { name => 'tv_grab_uk_tvguide', blurb => 'Grabber for UK and Ireland using TV Guide website', @@ -929,14 +915,11 @@ # $inherited .= q{ -xmltv.exe :: $(EXE_FILES) lib/exe_wrap.pl lib/exe_opt.pl +xmltv.exe :: $(EXE_FILES) lib/xmltv.pl lib/exe_opt.pl echo $(EXE_FILES) >exe_files.txt perl lib/exe_opt.pl $(VERSION) >exe_opt.txt - echo -lib $(INST_ARCHLIB) --lib $(INST_LIB) >>exe_opt.txt - echo -add "$(EXE_FILES)" >>exe_opt.txt - echo -bind exe_files.txt >>exe_opt.txt - echo -exe xmltv.exe >>exe_opt.txt - perlapp @exe_opt.txt lib/exe_wrap.pl + echo -a exe_files.txt >>exe_opt.txt + pp_autolink -o xmltv.exe --cachedeps=pp.cache --reusable @exe_opt.txt lib/xmltv.pl $(EXE_FILES) $(RM_F) exe_files.txt $(RM_F) exe_opt.txt diff -Nru xmltv-0.6.3/MANIFEST xmltv-1.0.0/MANIFEST --- xmltv-0.6.3/MANIFEST 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/MANIFEST 2021-02-09 10:49:46.000000000 +0000 @@ -119,9 +119,6 @@ grab/pt_meo/tv_grab_pt_meo grab/pt_vodafone/test.conf grab/pt_vodafone/tv_grab_pt_vodafone -grab/se_swedb/test.conf -grab/se_swedb/tv_grab_se_swedb.PL -grab/se_swedb/tv_grab_se_swedb.in grab/test_grabbers grab/tr/test.conf grab/tr/tv_grab_tr @@ -166,7 +163,7 @@ lib/XMLTV.pm.PL lib/XMLTV.pm.in lib/exe_opt.pl -lib/exe_wrap.pl +lib/xmltv.pl lib/set_share_dir.pl t/README t/parallel_test @@ -914,20 +911,40 @@ t/data-tv_imdb/lists/movies.list t/data-tv_imdb/lists/plot.list t/data-tv_imdb/lists/ratings.list +t/data-tv_imdb/After-data-freeze.xml +t/data-tv_imdb/After-data-freeze.xml-expected +t/data-tv_imdb/Cast-actor-with-generation.xml +t/data-tv_imdb/Cast-actor-with-generation.xml-expected +t/data-tv_imdb/Cast-actors-and-actresses.xml +t/data-tv_imdb/Cast-actors-and-actresses.xml-expected +t/data-tv_imdb/Cast-billing.xml +t/data-tv_imdb/Cast-billing.xml-expected +t/data-tv_imdb/Cast-duplicate.xml +t/data-tv_imdb/Cast-duplicate.xml-expected +t/data-tv_imdb/Cast-host-or-narrator.xml +t/data-tv_imdb/Cast-host-or-narrator.xml-expected +t/data-tv_imdb/Cast-name-with-suffix.xml +t/data-tv_imdb/Cast-name-with-suffix.xml-expected +t/data-tv_imdb/Cast-role.xml +t/data-tv_imdb/Cast-role.xml-expected +t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml +t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml-expected +t/data-tv_imdb/Director-name-with-suffix.xml +t/data-tv_imdb/Director-name-with-suffix.xml-expected +t/data-tv_imdb/Director-with-generation.xml +t/data-tv_imdb/Director-with-generation.xml-expected +t/data-tv_imdb/Genres-duplicate.xml +t/data-tv_imdb/Genres-duplicate.xml-expected +t/data-tv_imdb/Genres-multiple.xml +t/data-tv_imdb/Genres-multiple.xml-expected +t/data-tv_imdb/Genres-single.xml +t/data-tv_imdb/Genres-single.xml-expected +t/data-tv_imdb/Movie1.xml +t/data-tv_imdb/Movie1.xml-expected t/data-tv_imdb/Movie1-case-insensitive.xml t/data-tv_imdb/Movie1-case-insensitive.xml-expected t/data-tv_imdb/Movie1-movies-only.xml t/data-tv_imdb/Movie1-movies-only.xml-expected -t/data-tv_imdb/Movie1.xml -t/data-tv_imdb/Movie1.xml-expected -t/data-tv_imdb/Movie100-years.xml -t/data-tv_imdb/Movie100-years.xml-expected -t/data-tv_imdb/Movie101-movie-and-tv.xml -t/data-tv_imdb/Movie101-movie-and-tv.xml-expected -t/data-tv_imdb/Movie21-accents.xml -t/data-tv_imdb/Movie21-accents.xml-expected -t/data-tv_imdb/Movie22-dots.xml -t/data-tv_imdb/Movie22-dots.xml-expected t/data-tv_imdb/Movie3-and-amp.xml t/data-tv_imdb/Movie3-and-amp.xml-expected t/data-tv_imdb/Movie5-ignore-punc.xml @@ -936,10 +953,30 @@ t/data-tv_imdb/Movie5-with-punc.xml-expected t/data-tv_imdb/Movie6-articles.xml t/data-tv_imdb/Movie6-articles.xml-expected -t/data-tv_imdb/Show1-movies-only.xml -t/data-tv_imdb/Show1-movies-only.xml-expected +t/data-tv_imdb/Movie21-accents.xml +t/data-tv_imdb/Movie21-accents.xml-expected +t/data-tv_imdb/Movie22-dots.xml +t/data-tv_imdb/Movie22-dots.xml-expected +t/data-tv_imdb/Movie100-years.xml +t/data-tv_imdb/Movie100-years.xml-expected +t/data-tv_imdb/Movie101-movie-and-tv.xml +t/data-tv_imdb/Movie101-movie-and-tv.xml-expected +t/data-tv_imdb/Movie-same-year-movie-and-series.xml +t/data-tv_imdb/Movie-same-year-movie-and-series.xml-expected +t/data-tv_imdb/Movie-startswith-hyphen.xml +t/data-tv_imdb/Movie-startswith-hyphen.xml-expected +t/data-tv_imdb/Movie-two-in-same-year.xml +t/data-tv_imdb/Movie-two-in-same-year.xml-expected +t/data-tv_imdb/Movie-with-aka.xml +t/data-tv_imdb/Movie-with-aka.xml-expected +t/data-tv_imdb/Movie-with-unknown-year.xml +t/data-tv_imdb/Movie-with-unknown-year.xml-expected +t/data-tv_imdb/Ratings.xml +t/data-tv_imdb/Ratings.xml-expected t/data-tv_imdb/Show1.xml t/data-tv_imdb/Show1.xml-expected +t/data-tv_imdb/Show1-movies-only.xml +t/data-tv_imdb/Show1-movies-only.xml-expected t/test_tv_imdb.t t/data/tv_sort_all_UTF8.expected t/data/tv_sort_amp_xml_amp_xml.expected @@ -1377,6 +1414,47 @@ t/data/tv_grep_on_before_200302161330_UTC_test_remove_some_overlapping_xml.expected t/data/tv_grep_premiere_test_remove_some_overlapping_xml.expected t/data/tv_grep_previously_shown_test_remove_some_overlapping_xml.expected +t/data/tv_grep_channel_id_exp_sat_all_UTF8.expected +t/data/tv_grep_channel_id_exp_sat_amp_xml.expected +t/data/tv_grep_channel_id_exp_sat_amp_xml_amp_xml.expected +t/data/tv_grep_channel_id_exp_sat_amp_xml_clump_xml.expected +t/data/tv_grep_channel_id_exp_sat_amp_xml_dups_xml.expected +t/data/tv_grep_channel_id_exp_sat_amp_xml_empty_xml.expected +t/data/tv_grep_channel_id_exp_sat_amp_xml_empty_xml_empty_xml_clump_xml.expected +t/data/tv_grep_channel_id_exp_sat_attrs_xml.expected +t/data/tv_grep_channel_id_exp_sat_clump_extract_1_xml.expected +t/data/tv_grep_channel_id_exp_sat_clump_extract_xml.expected +t/data/tv_grep_channel_id_exp_sat_clump_xml.expected +t/data/tv_grep_channel_id_exp_sat_clump_xml_amp_xml.expected +t/data/tv_grep_channel_id_exp_sat_clump_xml_clump_xml.expected +t/data/tv_grep_channel_id_exp_sat_clump_xml_dups_xml.expected +t/data/tv_grep_channel_id_exp_sat_clump_xml_empty_xml.expected +t/data/tv_grep_channel_id_exp_sat_dups_xml.expected +t/data/tv_grep_channel_id_exp_sat_dups_xml_amp_xml.expected +t/data/tv_grep_channel_id_exp_sat_dups_xml_clump_xml.expected +t/data/tv_grep_channel_id_exp_sat_dups_xml_dups_xml.expected +t/data/tv_grep_channel_id_exp_sat_dups_xml_empty_xml.expected +t/data/tv_grep_channel_id_exp_sat_empty_xml.expected +t/data/tv_grep_channel_id_exp_sat_empty_xml_amp_xml.expected +t/data/tv_grep_channel_id_exp_sat_empty_xml_clump_xml.expected +t/data/tv_grep_channel_id_exp_sat_empty_xml_dups_xml.expected +t/data/tv_grep_channel_id_exp_sat_empty_xml_empty_xml.expected +t/data/tv_grep_channel_id_exp_sat_intervals_xml.expected +t/data/tv_grep_channel_id_exp_sat_length_xml.expected +t/data/tv_grep_channel_id_exp_sat_overlap_xml.expected +t/data/tv_grep_channel_id_exp_sat_simple_xml.expected +t/data/tv_grep_channel_id_exp_sat_simple_xml_x_whatever_xml.expected +t/data/tv_grep_channel_id_exp_sat_sort1_xml.expected +t/data/tv_grep_channel_id_exp_sat_sort2_xml.expected +t/data/tv_grep_channel_id_exp_sat_sort_xml.expected +t/data/tv_grep_channel_id_exp_sat_test_empty_xml.expected +t/data/tv_grep_channel_id_exp_sat_test_livre_xml.expected +t/data/tv_grep_channel_id_exp_sat_test_remove_some_overlapping_xml.expected +t/data/tv_grep_channel_id_exp_sat_test_sort_by_channel_xml.expected +t/data/tv_grep_channel_id_exp_sat_test_xml.expected +t/data/tv_grep_channel_id_exp_sat_test_xml_test_xml.expected +t/data/tv_grep_channel_id_exp_sat_whitespace_xml.expected +t/data/tv_grep_channel_id_exp_sat_x_whatever_xml.expected t/data/tv_remove_some_overlapping_all_UTF8.expected t/data/tv_remove_some_overlapping_amp_xml.expected t/data/tv_remove_some_overlapping_amp_xml_amp_xml.expected diff -Nru xmltv-0.6.3/README.md xmltv-1.0.0/README.md --- xmltv-0.6.3/README.md 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/README.md 2021-02-09 10:49:46.000000000 +0000 @@ -4,10 +4,11 @@

    -# XMLTV 0.6.3 +# XMLTV 1.0.0 ## Table of Contents -- [XMLTV 0.6.2](#xmltv-063) + +- [XMLTV](#xmltv) * [Description](#description) * [Changes](#changes) * [Installation (Package)](#installation-package) @@ -165,6 +166,7 @@ Tk (tv_check) Tk::TableMatrix (tv_check) URI (for some of the grabbers, part of URI) +URI::Encode (tv_grab_pt_vodafone) URI::Escape (for some of the grabbers, part of URI) XML::DOM (tv_grab_is) XML::LibXSLT (tv_grab_is) @@ -184,6 +186,7 @@ PerlIO::gzip (can make tv_imdb a bit faster) Term::ProgressBar (displays pretty progress bars) Unicode::String (improved character handling in tv_to_latex) +URI::Escape::XS (faster URI handling) ``` ### JSON libraries @@ -256,4 +259,4 @@ Finally, we run an IRC channel #xmltv on Freenode. Please join us! --- Nick Morrott, knowledgejunkie@gmail.com, 2020-08-22 +-- Nick Morrott, knowledgejunkie@gmail.com, 2021-02-07 diff -Nru xmltv-0.6.3/t/data/clump_extract.xml xmltv-1.0.0/t/data/clump_extract.xml --- xmltv-0.6.3/t/data/clump_extract.xml 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/t/data/clump_extract.xml 2021-02-09 10:49:46.000000000 +0000 @@ -1,3 +1,6 @@ + + + Kilroy diff -Nru xmltv-0.6.3/t/data/test_empty.xml xmltv-1.0.0/t/data/test_empty.xml --- xmltv-0.6.3/t/data/test_empty.xml 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/t/data/test_empty.xml 2021-02-09 10:49:46.000000000 +0000 @@ -1,3 +1,6 @@ + + + A programme with empty stuff that should not be written out again diff -Nru xmltv-0.6.3/t/data/test_remove_some_overlapping.xml xmltv-1.0.0/t/data/test_remove_some_overlapping.xml --- xmltv-0.6.3/t/data/test_remove_some_overlapping.xml 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/t/data/test_remove_some_overlapping.xml 2021-02-09 10:49:46.000000000 +0000 @@ -1,3 +1,6 @@ + + + Container A diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_all_UTF8.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_all_UTF8.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_all_UTF8.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_all_UTF8.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,35 @@ + + + + + + 3SAT + + + blah + blah + Blah Blah Blah. + + blah + a + b + + 19901011 + ES + 2 . 9 . 0/1 + + + + English + + + PG + + + 3/3 + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_amp_xml_amp_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_amp_xml_amp_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_amp_xml_amp_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_amp_xml_amp_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_amp_xml_clump_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_amp_xml_clump_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_amp_xml_clump_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_amp_xml_clump_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_amp_xml_dups_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_amp_xml_dups_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_amp_xml_dups_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_amp_xml_dups_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_amp_xml_empty_xml_empty_xml_clump_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_amp_xml_empty_xml_empty_xml_clump_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_amp_xml_empty_xml_empty_xml_clump_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_amp_xml_empty_xml_empty_xml_clump_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_amp_xml_empty_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_amp_xml_empty_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_amp_xml_empty_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_amp_xml_empty_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_amp_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_amp_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_amp_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_amp_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_attrs_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_attrs_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_attrs_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_attrs_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_clump_extract_1_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_clump_extract_1_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_clump_extract_1_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_clump_extract_1_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_clump_extract_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_clump_extract_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_clump_extract_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_clump_extract_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_clump_xml_amp_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_clump_xml_amp_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_clump_xml_amp_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_clump_xml_amp_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_clump_xml_clump_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_clump_xml_clump_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_clump_xml_clump_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_clump_xml_clump_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_clump_xml_dups_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_clump_xml_dups_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_clump_xml_dups_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_clump_xml_dups_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_clump_xml_empty_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_clump_xml_empty_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_clump_xml_empty_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_clump_xml_empty_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_clump_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_clump_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_clump_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_clump_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_dups_xml_amp_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_dups_xml_amp_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_dups_xml_amp_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_dups_xml_amp_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_dups_xml_clump_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_dups_xml_clump_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_dups_xml_clump_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_dups_xml_clump_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_dups_xml_dups_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_dups_xml_dups_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_dups_xml_dups_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_dups_xml_dups_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_dups_xml_empty_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_dups_xml_empty_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_dups_xml_empty_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_dups_xml_empty_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_dups_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_dups_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_dups_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_dups_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_empty_xml_amp_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_empty_xml_amp_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_empty_xml_amp_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_empty_xml_amp_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_empty_xml_clump_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_empty_xml_clump_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_empty_xml_clump_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_empty_xml_clump_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_empty_xml_dups_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_empty_xml_dups_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_empty_xml_dups_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_empty_xml_dups_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_empty_xml_empty_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_empty_xml_empty_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_empty_xml_empty_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_empty_xml_empty_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_empty_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_empty_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_empty_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_empty_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_intervals_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_intervals_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_intervals_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_intervals_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_length_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_length_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_length_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_length_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_overlap_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_overlap_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_overlap_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_overlap_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_simple_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_simple_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_simple_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_simple_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_simple_xml_x_whatever_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_simple_xml_x_whatever_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_simple_xml_x_whatever_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_simple_xml_x_whatever_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_sort1_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_sort1_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_sort1_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_sort1_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_sort2_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_sort2_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_sort2_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_sort2_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_sort_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_sort_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_sort_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_sort_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_test_empty_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_test_empty_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_test_empty_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_test_empty_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_test_livre_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_test_livre_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_test_livre_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_test_livre_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_test_remove_some_overlapping_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_test_remove_some_overlapping_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_test_remove_some_overlapping_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_test_remove_some_overlapping_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_test_sort_by_channel_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_test_sort_by_channel_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_test_sort_by_channel_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_test_sort_by_channel_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_test_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_test_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_test_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_test_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,35 @@ + + + + + + 3SAT + + + blah + blah + Blah Blah Blah. + + blah + a + b + + 19901011 + ES + 2 . 9 . 0/1 + + + + English + + + PG + + + 3/3 + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_test_xml_test_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_test_xml_test_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_test_xml_test_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_test_xml_test_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,62 @@ + + + + + + 3SAT + + + blah + blah + Blah Blah Blah. + + blah + a + b + + 19901011 + ES + 2 . 9 . 0/1 + + + + English + + + PG + + + 3/3 + + + + + blah + blah + Blah Blah Blah. + + blah + a + b + + 19901011 + ES + 2 . 9 . 0/1 + + + + English + + + PG + + + 3/3 + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_whitespace_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_whitespace_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_whitespace_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_whitespace_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_x_whatever_xml.expected xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_x_whatever_xml.expected --- xmltv-0.6.3/t/data/tv_grep_channel_id_exp_sat_x_whatever_xml.expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data/tv_grep_channel_id_exp_sat_x_whatever_xml.expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/After-data-freeze.xml xmltv-1.0.0/t/data-tv_imdb/After-data-freeze.xml --- xmltv-0.6.3/t/data-tv_imdb/After-data-freeze.xml 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/After-data-freeze.xml 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,8 @@ + + + + + Unarmed Man + 2019 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/After-data-freeze.xml-expected xmltv-1.0.0/t/data-tv_imdb/After-data-freeze.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/After-data-freeze.xml-expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/After-data-freeze.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,9 @@ + + + + + + Unarmed Man + 2019 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Cast-actors-and-actresses.xml xmltv-1.0.0/t/data-tv_imdb/Cast-actors-and-actresses.xml --- xmltv-0.6.3/t/data-tv_imdb/Cast-actors-and-actresses.xml 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Cast-actors-and-actresses.xml 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,8 @@ + + + + + Titanic + 1997 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Cast-actors-and-actresses.xml-expected xmltv-1.0.0/t/data-tv_imdb/Cast-actors-and-actresses.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Cast-actors-and-actresses.xml-expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Cast-actors-and-actresses.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,15 @@ + + + + + + Titanic + + Leonardo DiCaprio + Kate Winslet + + 1997 + Movie + https://www.imdb.com/find?q=Titanic%20%281997%29&s=tt&exact=true + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Cast-actor-with-generation.xml xmltv-1.0.0/t/data-tv_imdb/Cast-actor-with-generation.xml --- xmltv-0.6.3/t/data-tv_imdb/Cast-actor-with-generation.xml 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Cast-actor-with-generation.xml 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,8 @@ + + + + + Murder101 + 2014 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Cast-actor-with-generation.xml-expected xmltv-1.0.0/t/data-tv_imdb/Cast-actor-with-generation.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Cast-actor-with-generation.xml-expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Cast-actor-with-generation.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,14 @@ + + + + + + Murder101 + + Percy Daggs III + + 2014 + Movie + https://www.imdb.com/find?q=Murder101%20%282014%29&s=tt&exact=true + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Cast-billing.xml xmltv-1.0.0/t/data-tv_imdb/Cast-billing.xml --- xmltv-0.6.3/t/data-tv_imdb/Cast-billing.xml 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Cast-billing.xml 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,8 @@ + + + + + #Rip + 2013 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Cast-billing.xml-expected xmltv-1.0.0/t/data-tv_imdb/Cast-billing.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Cast-billing.xml-expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Cast-billing.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,16 @@ + + + + + + #Rip + + Marilyn Ghigliotti + Missi Pyle + Naomi Grossman + + 2013 + Movie + https://www.imdb.com/find?q=%23Rip%20%282013%29&s=tt&exact=true + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Cast-duplicate.xml xmltv-1.0.0/t/data-tv_imdb/Cast-duplicate.xml --- xmltv-0.6.3/t/data-tv_imdb/Cast-duplicate.xml 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Cast-duplicate.xml 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,8 @@ + + + + + #SketchPack + 2015 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Cast-duplicate.xml-expected xmltv-1.0.0/t/data-tv_imdb/Cast-duplicate.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Cast-duplicate.xml-expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Cast-duplicate.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,14 @@ + + + + + + #SketchPack + + Lucy Scott-Smith + + 2015 + TV Series + https://www.imdb.com/find?q=%22%23SketchPack%22%20%282015%29&s=tt&exact=true + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Cast-host-or-narrator.xml xmltv-1.0.0/t/data-tv_imdb/Cast-host-or-narrator.xml --- xmltv-0.6.3/t/data-tv_imdb/Cast-host-or-narrator.xml 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Cast-host-or-narrator.xml 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,28 @@ + + + + + Bookclub + 2015 + + + LolliLove + 2004 + + + Breaking Genres + 2015 + + + The Jean Bowring Show + 1957 + + + New Now Next Awards + 2008 + + + 3 Weeks in Yerevan + 2016 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Cast-host-or-narrator.xml-expected xmltv-1.0.0/t/data-tv_imdb/Cast-host-or-narrator.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Cast-host-or-narrator.xml-expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Cast-host-or-narrator.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,59 @@ + + + + + + Bookclub + + Fabio Huwyler + + 2015 + TV Series + https://www.imdb.com/find?q=%22Bookclub%22%20%282015%29&s=tt&exact=true + + + LolliLove + + Peter Alton + + 2004 + Movie + https://www.imdb.com/find?q=LolliLove%20%282004%29&s=tt&exact=true + + + Breaking Genres + + Amrit Singh + + 2015 + TV Movie + https://www.imdb.com/find?q=Breaking%20Genres%20%282015%29&s=tt&exact=true + + + The Jean Bowring Show + + Jean Bowring + + 1957 + TV Series + https://www.imdb.com/find?q=%22The%20Jean%20Bowring%20Show%22%20%281957%29&s=tt&exact=true + + + New Now Next Awards + + Gloria Bigelow + + 2008 + TV Movie + https://www.imdb.com/find?q=New%20Now%20Next%20Awards%20%282008%29&s=tt&exact=true + + + 3 Weeks in Yerevan + + Mary Asatryan + + 2016 + Movie + https://www.imdb.com/find?q=3%20Weeks%20in%20Yerevan%20%282016%29&s=tt&exact=true + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Cast-name-with-suffix.xml xmltv-1.0.0/t/data-tv_imdb/Cast-name-with-suffix.xml --- xmltv-0.6.3/t/data-tv_imdb/Cast-name-with-suffix.xml 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Cast-name-with-suffix.xml 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,9 @@ + + + + + #Selfie + cast: Elizabeth Kent should appear twice + 2015 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Cast-name-with-suffix.xml-expected xmltv-1.0.0/t/data-tv_imdb/Cast-name-with-suffix.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Cast-name-with-suffix.xml-expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Cast-name-with-suffix.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,17 @@ + + + + + + #Selfie + cast: Elizabeth Kent should appear twice + + Karina Cornwell + Elizabeth Kent + Elizabeth Kent + + 2015 + Movie + https://www.imdb.com/find?q=%23Selfie%20%282015%29&s=tt&exact=true + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Cast-role.xml xmltv-1.0.0/t/data-tv_imdb/Cast-role.xml --- xmltv-0.6.3/t/data-tv_imdb/Cast-role.xml 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Cast-role.xml 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,12 @@ + + + + + #REV + 2015 + + + Titanic + 1997 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Cast-role.xml-expected xmltv-1.0.0/t/data-tv_imdb/Cast-role.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Cast-role.xml-expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Cast-role.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,25 @@ + + + + + + #REV + + Poroma Banerjee + Sharon Zachariah + + 2015 + Movie + https://www.imdb.com/find?q=%23REV%20%282015%29&s=tt&exact=true + + + Titanic + + Leonardo DiCaprio + Kate Winslet + + 1997 + Movie + https://www.imdb.com/find?q=Titanic%20%281997%29&s=tt&exact=true + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml xmltv-1.0.0/t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml --- xmltv-0.6.3/t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,12 @@ + + + + + #Illusion + 2014 + + + #iScream + 2014 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml-expected xmltv-1.0.0/t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml-expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Director-multiple-and-duplicate-directors.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,24 @@ + + + + + + #Illusion + + Teodora Berglund + Alexandra Jousset + + 2014 + Movie + https://www.imdb.com/find?q=%23Illusion%20%282014%29&s=tt&exact=true + + + #iScream + + Gibran Tanwir + + 2014 + Movie + https://www.imdb.com/find?q=%23iScream%20%282014%29&s=tt&exact=true + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Director-name-with-suffix.xml xmltv-1.0.0/t/data-tv_imdb/Director-name-with-suffix.xml --- xmltv-0.6.3/t/data-tv_imdb/Director-name-with-suffix.xml 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Director-name-with-suffix.xml 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,8 @@ + + + + + Grease Monkeys + 1979 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Director-name-with-suffix.xml-expected xmltv-1.0.0/t/data-tv_imdb/Director-name-with-suffix.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Director-name-with-suffix.xml-expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Director-name-with-suffix.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,14 @@ + + + + + + Grease Monkeys + + Mark Aaron + + 1979 + Movie + https://www.imdb.com/find?q=Grease%20Monkeys%20%281979%29&s=tt&exact=true + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Director-with-generation.xml xmltv-1.0.0/t/data-tv_imdb/Director-with-generation.xml --- xmltv-0.6.3/t/data-tv_imdb/Director-with-generation.xml 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Director-with-generation.xml 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,8 @@ + + + + + The Meek + 2017 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Director-with-generation.xml-expected xmltv-1.0.0/t/data-tv_imdb/Director-with-generation.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Director-with-generation.xml-expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Director-with-generation.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,14 @@ + + + + + + The Meek + + Harold Jackson III + + 2017 + Movie + https://www.imdb.com/find?q=The%20Meek%20%282017%29&s=tt&exact=true + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Genres-duplicate.xml xmltv-1.0.0/t/data-tv_imdb/Genres-duplicate.xml --- xmltv-0.6.3/t/data-tv_imdb/Genres-duplicate.xml 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Genres-duplicate.xml 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,8 @@ + + + + + 'C'-Man + 1949 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Genres-duplicate.xml-expected xmltv-1.0.0/t/data-tv_imdb/Genres-duplicate.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Genres-duplicate.xml-expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Genres-duplicate.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,14 @@ + + + + + + 'C'-Man + 1949 + Movie + Crime + Drama + Film-Noir + https://www.imdb.com/find?q=%27C%27-Man%20%281949%29&s=tt&exact=true + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Genres-multiple.xml xmltv-1.0.0/t/data-tv_imdb/Genres-multiple.xml --- xmltv-0.6.3/t/data-tv_imdb/Genres-multiple.xml 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Genres-multiple.xml 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,8 @@ + + + + + [Film #9 Title] + 2015 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Genres-multiple.xml-expected xmltv-1.0.0/t/data-tv_imdb/Genres-multiple.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Genres-multiple.xml-expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Genres-multiple.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,14 @@ + + + + + + [Film #9 Title] + 2015 + Movie + Comedy + Fantasy + Short + https://www.imdb.com/find?q=%5BFilm%20%239%20Title%5D%20%282015%29&s=tt&exact=true + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Genres-single.xml xmltv-1.0.0/t/data-tv_imdb/Genres-single.xml --- xmltv-0.6.3/t/data-tv_imdb/Genres-single.xml 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Genres-single.xml 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,8 @@ + + + + + (Mon) Jour de chance + 2004 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Genres-single.xml-expected xmltv-1.0.0/t/data-tv_imdb/Genres-single.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Genres-single.xml-expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Genres-single.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,12 @@ + + + + + + (Mon) Jour de chance + 2004 + Movie + Short + https://www.imdb.com/find?q=%28Mon%29%20Jour%20de%20chance%20%282004%29&s=tt&exact=true + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/lists/actors.list xmltv-1.0.0/t/data-tv_imdb/lists/actors.list --- xmltv-0.6.3/t/data-tv_imdb/lists/actors.list 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/lists/actors.list 2021-02-09 10:49:46.000000000 +0000 @@ -11,3 +11,17 @@ Campbell, Bruce (I) Army of Darkness (1992) [Ash] <1> Actor, Bruce (I) Movie1 (1990) [Ash] <1> Movie2 (1991) [Ash] <1> +Dibnah, Fred A Tribute to Fred Dibnah (2004) (TV) (archive footage) [Himself] <2> + Dig with Dibnah (2004) (TV) [Himself - Presenter] <1> + Fred Dibnah: Steeplejack (1979) (TV) [Himself] <1> +DiCaprio, Leonardo 'Catch Me If You Can': Behind the Camera (2003) (V) [Himself] <6> + Titanic (1997) [Jack Dawson] <1> +Huwyler, Fabio "Bookclub" (2015) [Himself - Host] +Daggs III, Percy Murder101 (2014) [Carlyle] <9> +Alton, Peter LolliLove (2004) (voice) [Narrator] <3> +Singh, Amrit (I) 2016 Winter Film Awards (2016) (TV) [Presenter] + A Social Conversation with Bernie (2016) (TV) [Himself - Host] + Breaking Genres (2015) (TV) [Himself - Host] +Singh, Amit (I) Corporate (2006) + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/lists/actresses.list xmltv-1.0.0/t/data-tv_imdb/lists/actresses.list --- xmltv-0.6.3/t/data-tv_imdb/lists/actresses.list 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/lists/actresses.list 2021-02-09 10:49:46.000000000 +0000 @@ -10,3 +10,24 @@ ---- ------ Actor, Betty (I) Movie1 (1990) [Betty] <1> Movie2 (1991) [Betty] <1> +Banerjee, Poroma (II) #REV (2015) [Cinematographer] +Zachariah, Sharon #REV (2015) [Interviewee] +Ghigliotti, Marilyn #Rip (2013) (voice) [Lydia Walters] <1> +Griffin, Martina #Rip (2013) [Juanita] <10> +Grossman, Naomi (II) #Rip (2013) [Bella Tiavas] <3> +Lee, Michelle (XXXVI) #Rip (2013) [Female News Anchor] <11> +Leonards, Ammie #Rip (2013) [CourtNay] <6> +Pyle, Missi #Rip (2013) [Lydia Walters] <2> +Shea, Beth #Rip (2013) [Liz Tanner] <4> +Cornwell, Karina (II) #Selfie (2015) (as Karina Cornell) [Robot Girl] +Kent, Elizabeth (V) #Selfie (2015) [The Woman] +Kent, Elizabeth (VI) #Selfie (2015) [The Woman] +Winslet, Kate 11th Annual Screen Actors Guild Awards (2005) (TV) [Herself - Nominee & Presenter] + Titanic (1997) [Rose Dewitt Bukater] <2> + Reflections on Titanic (2012) [Herself] <3> +Asatryan, Mary 3 Weeks in Yerevan (2016) [Radio Host #2] +Bowring, Jean "The Jean Bowring Show" (1957) [Herself - Hostess] +Bigelow, Gloria New Now Next Awards (2008) (TV) [Herself - Host] +Haze, Roxxy "#BedTimeBitchin" (2014) [Herself - Host] +Scott-Smith, Lucy "#SketchPack" (2015) +Scott-Smith, Lucy "#SketchPack" (2015) [Various (2015)] diff -Nru xmltv-0.6.3/t/data-tv_imdb/lists/directors.list xmltv-1.0.0/t/data-tv_imdb/lists/directors.list --- xmltv-0.6.3/t/data-tv_imdb/lists/directors.list 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/lists/directors.list 2021-02-09 10:49:46.000000000 +0000 @@ -14,3 +14,15 @@ Director,In1915 Movie100 (1915) Director,In1943 Movie100 (1943) Director,In1953 Movie100 (1953) +Aaron, Mark (I) Grease Monkeys (1979) + The Rivermen (1980) +Berglund, Teodora (II) #Illusion (2014) + #Illusion (2014) (co-director) +Jousset, Alexandra #Illusion (2014) + #Illusion (2014) (co-director) +Tanwir, Gibran #iScream (2014) (segment "Beauty Boarding") + #iScream (2014) (segment "Caller ID") + #iScream (2014) (segment "Nightmare") + #iScream (2014) (segment "The Anniversary") + #iScream (2014) (segment "VooDoo") +Jackson III, Harold The Meek (2017) diff -Nru xmltv-0.6.3/t/data-tv_imdb/lists/genres.list xmltv-1.0.0/t/data-tv_imdb/lists/genres.list --- xmltv-0.6.3/t/data-tv_imdb/lists/genres.list 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/lists/genres.list 2021-02-09 10:49:46.000000000 +0000 @@ -9,5 +9,13 @@ Army of Darkness (1992) Horror Movie1 (1990) Horror Movie2 (1991) Mystery +[Film #9 Title] (2015) Comedy +[Film #9 Title] (2015) Fantasy +[Film #9 Title] (2015) Short +(Mon) Jour de chance (2004) Short +'C'-Man (1949) Crime +'C'-Man (1949) Drama +'C'-Man (1949) Film-Noir +'C'-Man (1949) Crime diff -Nru xmltv-0.6.3/t/data-tv_imdb/lists/movies.list xmltv-1.0.0/t/data-tv_imdb/lists/movies.list --- xmltv-0.6.3/t/data-tv_imdb/lists/movies.list 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/lists/movies.list 2021-02-09 10:49:46.000000000 +0000 @@ -30,6 +30,7 @@ Een Movie20 (1991) 1991 Movie21 aeiouaecnssy (1991) 1991 Movie22 dots (1991) 1991 +"The Show1" (2002) 1991 "The Show1" (2002) {Episode title1 (#1.1)} 1991 "The Show1" (2002) {Episode title2 (#1.2)} 1991 "The Show1" (2002) {Episode title1 (#2.1)} 1991 @@ -45,3 +46,44 @@ Movie101 (1993) (V) 1993 "Movie101" (1988) 1988 "Movie101" (1988) {Episode1 Part 1 (#8.1)} 1992 +'83 (2017/I) 2017 +'83 (2017/II) 2017 +Journey to the Center of the Earth (2008) 2008 +Journey to the Center of the Earth (2008) (TV) 2008 +Journey to the Center of the Earth (2008) (V) 2008 +"Ashes to Ashes" (2008) 2008 +Ashes to Ashes (2008) 2008 +California Cornflakes (????) ???? +Zed (????/II) ???? +Family Prayers (aka Karim & Suha) (2010) 2010 +"Grease Monkeys" (2003) 2003-???? +"Grease Monkeys" (2003) {Almost Blue (#1.4)} 2003 +Grease Monkeys (1979) 1979 +#Illusion (2014) 2014 +#iScream (2014) 2014 +#REV (2015) 2015 +#Rip (2013) 2013 +#Selfie (2015) 2015 +Titanic (1997) 1997 +Titanic (2012) 2012 +Fred Dibnah: Steeplejack (1979) (TV) 1979 +"Bookclub" (2015) 2015-???? +Murder101 (2014) 2014 +LolliLove (2004) 2004 +Breaking Genres (2015) (TV) 2015 +Corporate (2006) 2006 +3 Weeks in Yerevan (2016) 2016 +"The Jean Bowring Show" (1957) 1957-1960 +New Now Next Awards (2008) (TV) 2008 +"#BedTimeBitchin" (2014) 2014-???? +#ClivesClub: The Somers Solstice (2015) 2015 +"#SketchPack" (2015) 2015-???? +[Film #9 Title] (2015) 2015 +(Mon) Jour de chance (2004) 2004 +'C'-Man (1949) 1949 +"#nitTWITS" (2011) 2011-???? +The Meek (2015) 2015 +The Meek (2017) 2017 +-1: Minus One (2016) 2016 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/lists/ratings.list xmltv-1.0.0/t/data-tv_imdb/lists/ratings.list --- xmltv-0.6.3/t/data-tv_imdb/lists/ratings.list 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/lists/ratings.list 2021-02-09 10:49:46.000000000 +0000 @@ -9,3 +9,4 @@ 0000002211 000001 9.9 Army of Darkness (1992) 0000002211 000001 1.0 Movie1 (1990) 0000002211 000002 1.1 Movie2 (1991) + 1.1..2...5 8 7.0 "#nitTWITS" (2011) diff -Nru xmltv-0.6.3/t/data-tv_imdb/Movie100-years.xml-expected xmltv-1.0.0/t/data-tv_imdb/Movie100-years.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Movie100-years.xml-expected 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Movie100-years.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -9,7 +9,7 @@ 1915 Movie - http://us.imdb.com/M/title-exact?Movie100%20%281915%29 + https://www.imdb.com/find?q=Movie100%20%281915%29&s=tt&exact=true Movie100 @@ -18,7 +18,7 @@ 1914 Movie - http://us.imdb.com/M/title-exact?Movie100%20%281915%29 + https://www.imdb.com/find?q=Movie100%20%281915%29&s=tt&exact=true Movie100 @@ -27,7 +27,7 @@ 1913 Movie - http://us.imdb.com/M/title-exact?Movie100%20%281915%29 + https://www.imdb.com/find?q=Movie100%20%281915%29&s=tt&exact=true Movie100 @@ -40,7 +40,7 @@ 1916 Movie - http://us.imdb.com/M/title-exact?Movie100%20%281915%29 + https://www.imdb.com/find?q=Movie100%20%281915%29&s=tt&exact=true Movie100 @@ -49,7 +49,7 @@ 1917 Movie - http://us.imdb.com/M/title-exact?Movie100%20%281915%29 + https://www.imdb.com/find?q=Movie100%20%281915%29&s=tt&exact=true Movie100 @@ -62,7 +62,7 @@ 1943 Movie - http://us.imdb.com/M/title-exact?Movie100%20%281943%29 + https://www.imdb.com/find?q=Movie100%20%281943%29&s=tt&exact=true Movie100 @@ -71,7 +71,7 @@ 1953 Movie - http://us.imdb.com/M/title-exact?Movie100%20%281953%29 + https://www.imdb.com/find?q=Movie100%20%281953%29&s=tt&exact=true Movie100 @@ -81,6 +81,6 @@ Movie100 1993 Video Movie - http://us.imdb.com/M/title-exact?Movie100%20%281993%29 + https://www.imdb.com/find?q=Movie100%20%281993%29&s=tt&exact=true diff -Nru xmltv-0.6.3/t/data-tv_imdb/Movie101-movie-and-tv.xml-expected xmltv-1.0.0/t/data-tv_imdb/Movie101-movie-and-tv.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Movie101-movie-and-tv.xml-expected 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Movie101-movie-and-tv.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -6,23 +6,23 @@ Movie101 1992 Movie - http://us.imdb.com/M/title-exact?Movie101%20%281992%29 + https://www.imdb.com/find?q=Movie101%20%281992%29&s=tt&exact=true Movie101 1993 Movie - http://us.imdb.com/M/title-exact?Movie101%20%281993%29 + https://www.imdb.com/find?q=Movie101%20%281993%29&s=tt&exact=true Movie101 1988 TV Series - http://us.imdb.com/M/title-exact?%22Movie101%22%20%281988%29 + https://www.imdb.com/find?q=%22Movie101%22%20%281988%29&s=tt&exact=true Movie101 TV Series - http://us.imdb.com/M/title-exact?%22Movie101%22%20%281988%29 + https://www.imdb.com/find?q=%22Movie101%22%20%281988%29&s=tt&exact=true diff -Nru xmltv-0.6.3/t/data-tv_imdb/Movie1-case-insensitive.xml-expected xmltv-1.0.0/t/data-tv_imdb/Movie1-case-insensitive.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Movie1-case-insensitive.xml-expected 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Movie1-case-insensitive.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -15,7 +15,7 @@ Horror Horror Mystery - http://us.imdb.com/M/title-exact?Movie1%20%281990%29 + https://www.imdb.com/find?q=Movie1%20%281990%29&s=tt&exact=true 1.0/10 diff -Nru xmltv-0.6.3/t/data-tv_imdb/Movie1-movies-only.xml-expected xmltv-1.0.0/t/data-tv_imdb/Movie1-movies-only.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Movie1-movies-only.xml-expected 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Movie1-movies-only.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -15,7 +15,7 @@ Horror Horror Mystery - http://us.imdb.com/M/title-exact?Movie1%20%281990%29 + https://www.imdb.com/find?q=Movie1%20%281990%29&s=tt&exact=true 1.0/10 diff -Nru xmltv-0.6.3/t/data-tv_imdb/Movie1.xml-expected xmltv-1.0.0/t/data-tv_imdb/Movie1.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Movie1.xml-expected 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Movie1.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -15,7 +15,7 @@ Horror Horror Mystery - http://us.imdb.com/M/title-exact?Movie1%20%281990%29 + https://www.imdb.com/find?q=Movie1%20%281990%29&s=tt&exact=true 1.0/10 diff -Nru xmltv-0.6.3/t/data-tv_imdb/Movie21-accents.xml-expected xmltv-1.0.0/t/data-tv_imdb/Movie21-accents.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Movie21-accents.xml-expected 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Movie21-accents.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -6,90 +6,90 @@ Movie21 aeiouaecnssy 1991 Movie - http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%29 + https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&s=tt&exact=true Movie21 aeiouaecnssy Movie21 Àeiouaecnssy 1991 Movie - http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%29 + https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&s=tt&exact=true Movie21 aeiouaecnssy Movie21 aÈiouaecnssy 1991 Movie - http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%29 + https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&s=tt&exact=true Movie21 aeiouaecnssy Movie21 aeÌouaecnssy 1991 Movie - http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%29 + https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&s=tt&exact=true Movie21 aeiouaecnssy Movie21 aeiÒuaecnssy 1991 Movie - http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%29 + https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&s=tt&exact=true Movie21 aeiouaecnssy Movie21 aeioÙaecnssy 1991 Movie - http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%29 + https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&s=tt&exact=true Movie21 aeiouaecnssy Movie21 aeiouÆcnssy 1991 Movie - http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%29 + https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&s=tt&exact=true Movie21 aeiouaecnssy Movie21 aeiouaeÇnssy 1991 Movie - http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%29 + https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&s=tt&exact=true Movie21 aeiouaecnssy Movie21 aeiouaecÑssy 1991 Movie - http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%29 + https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&s=tt&exact=true Movie21 aeiouaecnssy Movie21 aeiouaecnßy 1991 Movie - http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%29 + https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&s=tt&exact=true Movie21 aeiouaecnssy Movie21 aeiouaecnssÝ 1991 Movie - http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%29 + https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&s=tt&exact=true Movie21 aeiouaecnssy Movie21 ÀÈÌÒÙæÇÑßÝ 1991 Movie - http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%29 + https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&s=tt&exact=true Movie21 aeiouaecnssy Movie21 ÀÈÌÒÙæÇÑßÝ¿ 1991 Movie - http://us.imdb.com/M/title-exact?Movie21%20aeiouaecnssy%20%281991%29 + https://www.imdb.com/find?q=Movie21%20aeiouaecnssy%20%281991%29&s=tt&exact=true diff -Nru xmltv-0.6.3/t/data-tv_imdb/Movie22-dots.xml-expected xmltv-1.0.0/t/data-tv_imdb/Movie22-dots.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Movie22-dots.xml-expected 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Movie22-dots.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -6,20 +6,20 @@ Movie22 dots 1991 Movie - http://us.imdb.com/M/title-exact?Movie22%20dots%20%281991%29 + https://www.imdb.com/find?q=Movie22%20dots%20%281991%29&s=tt&exact=true Movie22 dots M.o.v.i.e.2.2. dots 1991 Movie - http://us.imdb.com/M/title-exact?Movie22%20dots%20%281991%29 + https://www.imdb.com/find?q=Movie22%20dots%20%281991%29&s=tt&exact=true Movie22 dots Movie22 d.o.t.s. 1991 Movie - http://us.imdb.com/M/title-exact?Movie22%20dots%20%281991%29 + https://www.imdb.com/find?q=Movie22%20dots%20%281991%29&s=tt&exact=true diff -Nru xmltv-0.6.3/t/data-tv_imdb/Movie3-and-amp.xml-expected xmltv-1.0.0/t/data-tv_imdb/Movie3-and-amp.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Movie3-and-amp.xml-expected 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Movie3-and-amp.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -6,26 +6,26 @@ Movie3 and more 1991 Movie - http://us.imdb.com/M/title-exact?Movie3%20and%20more%20%281991%29 + https://www.imdb.com/find?q=Movie3%20and%20more%20%281991%29&s=tt&exact=true Movie3 and more Movie3 & more 1991 Movie - http://us.imdb.com/M/title-exact?Movie3%20and%20more%20%281991%29 + https://www.imdb.com/find?q=Movie3%20and%20more%20%281991%29&s=tt&exact=true Movie4 & more 1991 Movie - http://us.imdb.com/M/title-exact?Movie4%20%26%20more%20%281991%29 + https://www.imdb.com/find?q=Movie4%20%26%20more%20%281991%29&s=tt&exact=true Movie4 & more Movie4 and more 1991 Movie - http://us.imdb.com/M/title-exact?Movie4%20%26%20more%20%281991%29 + https://www.imdb.com/find?q=Movie4%20%26%20more%20%281991%29&s=tt&exact=true diff -Nru xmltv-0.6.3/t/data-tv_imdb/Movie5-ignore-punc.xml-expected xmltv-1.0.0/t/data-tv_imdb/Movie5-ignore-punc.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Movie5-ignore-punc.xml-expected 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Movie5-ignore-punc.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -6,20 +6,20 @@ Movie5 no punctuation 1991 Movie - http://us.imdb.com/M/title-exact?Movie5%20no%20punctuation%20%281991%29 + https://www.imdb.com/find?q=Movie5%20no%20punctuation%20%281991%29&s=tt&exact=true Movie5 no punctuation Movie5 no .....punctuation 1991 Movie - http://us.imdb.com/M/title-exact?Movie5%20no%20punctuation%20%281991%29 + https://www.imdb.com/find?q=Movie5%20no%20punctuation%20%281991%29&s=tt&exact=true Movie5 no punctuation Movie5:Movie5 no punctuation 1991 Movie - http://us.imdb.com/M/title-exact?Movie5%20no%20punctuation%20%281991%29 + https://www.imdb.com/find?q=Movie5%20no%20punctuation%20%281991%29&s=tt&exact=true diff -Nru xmltv-0.6.3/t/data-tv_imdb/Movie5-with-punc.xml-expected xmltv-1.0.0/t/data-tv_imdb/Movie5-with-punc.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Movie5-with-punc.xml-expected 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Movie5-with-punc.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -6,6 +6,6 @@ Movie5's with punctuation 1992 Movie - http://us.imdb.com/M/title-exact?Movie5%27s%20with%20punctuation%20%281992%29 + https://www.imdb.com/find?q=Movie5%27s%20with%20punctuation%20%281992%29&s=tt&exact=true diff -Nru xmltv-0.6.3/t/data-tv_imdb/Movie6-articles.xml-expected xmltv-1.0.0/t/data-tv_imdb/Movie6-articles.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Movie6-articles.xml-expected 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Movie6-articles.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -6,188 +6,188 @@ The Movie6 1991 Movie - http://us.imdb.com/M/title-exact?The%20Movie6%20%281991%29 + https://www.imdb.com/find?q=The%20Movie6%20%281991%29&s=tt&exact=true The Movie7 1991 Movie - http://us.imdb.com/M/title-exact?The%20Movie7%20%281991%29 + https://www.imdb.com/find?q=The%20Movie7%20%281991%29&s=tt&exact=true The Movie7 Movie7, The 1991 Movie - http://us.imdb.com/M/title-exact?The%20Movie7%20%281991%29 + https://www.imdb.com/find?q=The%20Movie7%20%281991%29&s=tt&exact=true A Movie8 1991 Movie - http://us.imdb.com/M/title-exact?A%20Movie8%20%281991%29 + https://www.imdb.com/find?q=A%20Movie8%20%281991%29&s=tt&exact=true A Movie8 Movie8, A 1991 Movie - http://us.imdb.com/M/title-exact?A%20Movie8%20%281991%29 + https://www.imdb.com/find?q=A%20Movie8%20%281991%29&s=tt&exact=true Une Movie9 1991 Movie - http://us.imdb.com/M/title-exact?Une%20Movie9%20%281991%29 + https://www.imdb.com/find?q=Une%20Movie9%20%281991%29&s=tt&exact=true Une Movie9 Movie9, Une 1991 Movie - http://us.imdb.com/M/title-exact?Une%20Movie9%20%281991%29 + https://www.imdb.com/find?q=Une%20Movie9%20%281991%29&s=tt&exact=true Les Movie10 1991 Movie - http://us.imdb.com/M/title-exact?Les%20Movie10%20%281991%29 + https://www.imdb.com/find?q=Les%20Movie10%20%281991%29&s=tt&exact=true Les Movie10 Movie10, Les 1991 Movie - http://us.imdb.com/M/title-exact?Les%20Movie10%20%281991%29 + https://www.imdb.com/find?q=Les%20Movie10%20%281991%29&s=tt&exact=true Los Movie11 1991 Movie - http://us.imdb.com/M/title-exact?Los%20Movie11%20%281991%29 + https://www.imdb.com/find?q=Los%20Movie11%20%281991%29&s=tt&exact=true Los Movie11 Movie11, Los 1991 Movie - http://us.imdb.com/M/title-exact?Los%20Movie11%20%281991%29 + https://www.imdb.com/find?q=Los%20Movie11%20%281991%29&s=tt&exact=true Las Movie12 1991 Movie - http://us.imdb.com/M/title-exact?Las%20Movie12%20%281991%29 + https://www.imdb.com/find?q=Las%20Movie12%20%281991%29&s=tt&exact=true Las Movie12 Movie12, Las 1991 Movie - http://us.imdb.com/M/title-exact?Las%20Movie12%20%281991%29 + https://www.imdb.com/find?q=Las%20Movie12%20%281991%29&s=tt&exact=true L' Movie13 1991 Movie - http://us.imdb.com/M/title-exact?L%27%20Movie13%20%281991%29 + https://www.imdb.com/find?q=L%27%20Movie13%20%281991%29&s=tt&exact=true L' Movie13 Movie13, L' 1991 Movie - http://us.imdb.com/M/title-exact?L%27%20Movie13%20%281991%29 + https://www.imdb.com/find?q=L%27%20Movie13%20%281991%29&s=tt&exact=true Le Movie14 1991 Movie - http://us.imdb.com/M/title-exact?Le%20Movie14%20%281991%29 + https://www.imdb.com/find?q=Le%20Movie14%20%281991%29&s=tt&exact=true Le Movie14 Movie14, Le 1991 Movie - http://us.imdb.com/M/title-exact?Le%20Movie14%20%281991%29 + https://www.imdb.com/find?q=Le%20Movie14%20%281991%29&s=tt&exact=true La Movie15 1991 Movie - http://us.imdb.com/M/title-exact?La%20Movie15%20%281991%29 + https://www.imdb.com/find?q=La%20Movie15%20%281991%29&s=tt&exact=true La Movie15 Movie15, La 1991 Movie - http://us.imdb.com/M/title-exact?La%20Movie15%20%281991%29 + https://www.imdb.com/find?q=La%20Movie15%20%281991%29&s=tt&exact=true El Movie16 1991 Movie - http://us.imdb.com/M/title-exact?El%20Movie16%20%281991%29 + https://www.imdb.com/find?q=El%20Movie16%20%281991%29&s=tt&exact=true El Movie16 Movie16, El 1991 Movie - http://us.imdb.com/M/title-exact?El%20Movie16%20%281991%29 + https://www.imdb.com/find?q=El%20Movie16%20%281991%29&s=tt&exact=true Das Movie17 1991 Movie - http://us.imdb.com/M/title-exact?Das%20Movie17%20%281991%29 + https://www.imdb.com/find?q=Das%20Movie17%20%281991%29&s=tt&exact=true Das Movie17 Movie17, Das 1991 Movie - http://us.imdb.com/M/title-exact?Das%20Movie17%20%281991%29 + https://www.imdb.com/find?q=Das%20Movie17%20%281991%29&s=tt&exact=true De Movie18 1991 Movie - http://us.imdb.com/M/title-exact?De%20Movie18%20%281991%29 + https://www.imdb.com/find?q=De%20Movie18%20%281991%29&s=tt&exact=true De Movie18 Movie18, De 1991 Movie - http://us.imdb.com/M/title-exact?De%20Movie18%20%281991%29 + https://www.imdb.com/find?q=De%20Movie18%20%281991%29&s=tt&exact=true Het Movie19 1991 Movie - http://us.imdb.com/M/title-exact?Het%20Movie19%20%281991%29 + https://www.imdb.com/find?q=Het%20Movie19%20%281991%29&s=tt&exact=true Het Movie19 Movie19, Het 1991 Movie - http://us.imdb.com/M/title-exact?Het%20Movie19%20%281991%29 + https://www.imdb.com/find?q=Het%20Movie19%20%281991%29&s=tt&exact=true Een Movie20 1991 Movie - http://us.imdb.com/M/title-exact?Een%20Movie20%20%281991%29 + https://www.imdb.com/find?q=Een%20Movie20%20%281991%29&s=tt&exact=true Een Movie20 Movie20, Een 1991 Movie - http://us.imdb.com/M/title-exact?Een%20Movie20%20%281991%29 + https://www.imdb.com/find?q=Een%20Movie20%20%281991%29&s=tt&exact=true diff -Nru xmltv-0.6.3/t/data-tv_imdb/Movie-same-year-movie-and-series.xml xmltv-1.0.0/t/data-tv_imdb/Movie-same-year-movie-and-series.xml --- xmltv-0.6.3/t/data-tv_imdb/Movie-same-year-movie-and-series.xml 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Movie-same-year-movie-and-series.xml 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,14 @@ + + + + + Journey to the Center of the Earth + Multiple titles (movie,video,tv) with same title+year + 2008 + + + Ashes to Ashes + Movie and tv-series with same title+year + 2008 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Movie-same-year-movie-and-series.xml-expected xmltv-1.0.0/t/data-tv_imdb/Movie-same-year-movie-and-series.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Movie-same-year-movie-and-series.xml-expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Movie-same-year-movie-and-series.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,15 @@ + + + + + + Journey to the Center of the Earth + Multiple titles (movie,video,tv) with same title+year + 2008 + + + Ashes to Ashes + Movie and tv-series with same title+year + 2008 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Movie-startswith-hyphen.xml xmltv-1.0.0/t/data-tv_imdb/Movie-startswith-hyphen.xml --- xmltv-0.6.3/t/data-tv_imdb/Movie-startswith-hyphen.xml 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Movie-startswith-hyphen.xml 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,8 @@ + + + + + -1: Minus One + 2016 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Movie-startswith-hyphen.xml-expected xmltv-1.0.0/t/data-tv_imdb/Movie-startswith-hyphen.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Movie-startswith-hyphen.xml-expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Movie-startswith-hyphen.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,11 @@ + + + + + + -1: Minus One + 2016 + Movie + https://www.imdb.com/find?q=-1%3A%20Minus%20One%20%282016%29&s=tt&exact=true + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Movie-two-in-same-year.xml xmltv-1.0.0/t/data-tv_imdb/Movie-two-in-same-year.xml --- xmltv-0.6.3/t/data-tv_imdb/Movie-two-in-same-year.xml 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Movie-two-in-same-year.xml 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,9 @@ + + + + + '83 + tv_imdb cannot identify a sole hit - two films in same year with this title + 2017 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Movie-two-in-same-year.xml-expected xmltv-1.0.0/t/data-tv_imdb/Movie-two-in-same-year.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Movie-two-in-same-year.xml-expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Movie-two-in-same-year.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,10 @@ + + + + + + '83 + tv_imdb cannot identify a sole hit - two films in same year with this title + 2017 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Movie-with-aka.xml xmltv-1.0.0/t/data-tv_imdb/Movie-with-aka.xml --- xmltv-0.6.3/t/data-tv_imdb/Movie-with-aka.xml 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Movie-with-aka.xml 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,8 @@ + + + + + Family Prayers + 2010 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Movie-with-aka.xml-expected xmltv-1.0.0/t/data-tv_imdb/Movie-with-aka.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Movie-with-aka.xml-expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Movie-with-aka.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,11 @@ + + + + + + Family Prayers + 2010 + Movie + https://www.imdb.com/find?q=Family%20Prayers%20%282010%29&s=tt&exact=true + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Movie-with-unknown-year.xml xmltv-1.0.0/t/data-tv_imdb/Movie-with-unknown-year.xml --- xmltv-0.6.3/t/data-tv_imdb/Movie-with-unknown-year.xml 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Movie-with-unknown-year.xml 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,14 @@ + + + + + Zed + + + Zed + 2010 + + + California Cornflakes + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Movie-with-unknown-year.xml-expected xmltv-1.0.0/t/data-tv_imdb/Movie-with-unknown-year.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Movie-with-unknown-year.xml-expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Movie-with-unknown-year.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,15 @@ + + + + + + Zed + + + Zed + 2010 + + + California Cornflakes + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Ratings.xml xmltv-1.0.0/t/data-tv_imdb/Ratings.xml --- xmltv-0.6.3/t/data-tv_imdb/Ratings.xml 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Ratings.xml 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,8 @@ + + + + + #nitTWITS + 2011 + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Ratings.xml-expected xmltv-1.0.0/t/data-tv_imdb/Ratings.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Ratings.xml-expected 1970-01-01 00:00:00.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Ratings.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -0,0 +1,14 @@ + + + + + + #nitTWITS + 2011 + TV Series + https://www.imdb.com/find?q=%22%23nitTWITS%22%20%282011%29&s=tt&exact=true + + 7.0/10 + + + diff -Nru xmltv-0.6.3/t/data-tv_imdb/Show1.xml-expected xmltv-1.0.0/t/data-tv_imdb/Show1.xml-expected --- xmltv-0.6.3/t/data-tv_imdb/Show1.xml-expected 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/t/data-tv_imdb/Show1.xml-expected 2021-02-09 10:49:46.000000000 +0000 @@ -5,12 +5,12 @@ The Show1 TV Series - http://us.imdb.com/M/title-exact?%22The%20Show1%22%20%282002%29 + https://www.imdb.com/find?q=%22The%20Show1%22%20%282002%29&s=tt&exact=true The Show1 1990 TV Series - http://us.imdb.com/M/title-exact?%22The%20Show1%22%20%282002%29 + https://www.imdb.com/find?q=%22The%20Show1%22%20%282002%29&s=tt&exact=true diff -Nru xmltv-0.6.3/t/test_filters.t xmltv-1.0.0/t/test_filters.t --- xmltv-0.6.3/t/test_filters.t 2020-09-07 15:02:53.000000000 +0000 +++ xmltv-1.0.0/t/test_filters.t 2021-02-09 10:49:46.000000000 +0000 @@ -86,6 +86,7 @@ [ [ 'tv_grep', '--category', 'g', '--or', '--title', 'h' ], 1 ], [ [ 'tv_grep', '-i', '--category', 'i', '--title', 'j' ], 1 ], [ [ 'tv_grep', '-i', '--category', 'i', '--title', 'h' ], 1 ], + [ [ 'tv_grep', '--channel-id-exp', 'sat' ], 1 ], ); }