
markusb at netspace
Nov 18, 2004, 3:43 PM
Post #4 of 30
(10355 views)
Permalink
|
Okay, this looks good but I can't get it to work for me... I am in Adelaide, get all sorts of compile errors when it runs. Had thread errors etc initially, but recompiled Perl with threads so that is fixed, but I guess I have stuffed something when editing the script, not being a programming/scripting person, I am tearing my hair out. Does anyone have an already edited script for Adelaide? thanks markus Michael Cheshire (Mailing Lists) wrote: > Actually there were several more spelling mistakes.. > > Woops :) > > ----- Original Message ----- From: "Eyal Lebedinsky" > <eyal [at] eyal> > To: "Discussion about mythtv" <mythtv-users [at] mythtv> > Sent: Wednesday, November 17, 2004 11:03 PM > Subject: Re: [mythtv-users] New Australian XMLTV grabber > > >> Michael Cheshire (Mailing Lists) wrote: >> >>> The great script, updated with foxtel channels. >> >> >> If I did not break anything else then this is the same thing, as an >> attachment >> so that whitespace is not lost, and with one misspelling fixed... >> >> -- >> Eyal Lebedinsky (eyal [at] eyal) <http://samba.org/eyal/> >> > > > -------------------------------------------------------------------------------- > > > >> #!/usr/bin/perl -w >> # Australian TV Guide XMLTV grabber by Damon Searle >> # Derived from a yahoo XMLTV grabber by Ron Kellam which was itself... >> # Derived from original code by Justin Hawkins >> # >> # This program is free software; you can redistribute it and/or modify >> # it under the terms of the GNU General Public License as published by >> # the Free Software Foundation; either version 2 of the License, or >> # (at your option) any later version. >> # >> # This program is distributed in the hope that it will be useful, >> # but WITHOUT ANY WARRANTY; without even the implied warranty of >> # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> # GNU General Public License for more details. >> # >> # You should have received a copy of the GNU General Public License >> # along with this program; if not, write to the Free Software >> # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA >> 02111-1307 USA >> >> # 30 Oct 2004 >> # Damon Searle <djsearle [at] netspace> >> # - wrote first version >> # - gets data from NineMSN as a backup. Its not that fancy, >> # 31 Oct 2004 >> # Fred Donelly <fdonelly [at] hotmail> >> # - added an option so that the output file can be specified on the >> # command line and from the quick test I gave it, it now works with >> # mythfilldatabase. >> # - $offset set to +1000 at the top and then had "+1000" set in a >> # output string further down rather than the variable >> # 4 Nov 2004 >> # Paul Andreassen <paulx [at] andreassen> >> # - learned some perl and now wants to go back to python >> # - added and then reduced status info >> # - retry on failure to getstore >> # - changed cache to '/var/local/tv_grab_au' >> # - added threading for each day >> # 5 Nov 2004 >> # - improved threading with use of queue >> # Eyal Lebedinsky <eyal [at] eyal> >> # - easier location selection >> # 8 Nov 2004 >> # Paul >> # - fixed pid=0 bug >> # - did some merging, I hate merging >> # 9 Nov 2004 >> # Rob Hill <rob [at] dot> >> # - added Sydney >> # 10 Nov 2004 >> # Mary Wright <mwright [at] taz-devil> >> # - digital info for Sydney >> # Paul >> # - more cleanup and improved error checking >> # - used mirror instead of getstore to get any updates >> # - mirror didn't work replaced with own smarts to check for updates >> to times >> # 11 Nov 2004 >> # - added program name in check >> # 13 Nov 2004 >> # - added freesd for Brisbane >> # 14 Nov 2004 >> # - --configure to exit nicely >> # - if no program data then skip program nicely, mainly for foxtel data >> # - added foxtel channels >> # 17 Nov 2004 >> # - added remaining foxtel channels >> # Eyal Lebedinsky <eyal [at] eyal> >> # - Fix misspelling Unknows -> Unknown >> # - Note: is Sydney now is on summer time +1100? >> >> use strict; >> use Getopt::Long; >> use XMLTV; >> use LWP::Simple; >> use Date::Manip; >> use File::Path; >> use threads; >> use Thread::Queue; >> >> # Instructions: >> # Select your region and source. >> # If your location isn't listed below, go to >> # http://tvguide.ninemsn.com.au/guide/ select your area >> # look at the last number in the URL before ".asp" and set >> # the region variable below. Then put the channel names as listed >> # on the tv guide site into the variables below. >> # Then set your XMLTV ids from the database in the XMLTVID_URL variable. >> # >> # If it doesn't work with mythfilldatabase, try: >> # ./tv_grab_au >> # mythfilldatabase --file 1 -1 /var/local/tv_grab_au/guide.xml >> >> # pick your region >> # >> my $location = "Canberra"; >> #my $location = "Brisbane"; >> #my $location = "Sydney"; >> #my $location = "Australia"; >> >> # pick your source >> # >> my $source = "free"; >> #my $source = "freesd"; >> #my $source = "freehd"; >> #my $source = "foxtel"; >> >> # choose the XMLID URL suffix that mythtv knows >> # >> my $XMLTVID_URL = "d1.com.au"; >> >> # change to how you think it should work >> my $days_to_grab = 7; >> my $threads = 5; >> my $retrys = 3; >> my $secondsbeforeretry = 2; >> >> # Variables >> my $guide_url = "http://tvguide.ninemsn.com.au/guide/"; >> my $details_url = >> "http://tvguide.ninemsn.com.au/closeup/default.asp?pid="; >> my $cache_dir = "/var/local/tv_grab_au"; >> >> my $XMLTV_prefix = $source . "." . $location . "."; >> my $XMLTV_suffix = "." . $XMLTVID_URL; >> >> my $region; my $offset; >> my %channels; >> >> if ("Canberra" eq $location) { >> $region = "126"; >> $offset = "+1100"; >> if ("free" eq $source) { >> $channels{"ABC NSW"}="2"; >> $channels{"Prime Southern"}="PrimS"; >> $channels{"SBS Sydney"}="SBS"; >> $channels{"Southern Cross TEN Capital"}="10Cap"; >> $channels{"WIN Television NSW"}="WIN" >> } elsif ("freesd" eq $source or "freehd" eq $source) { >> $channels{"ABC NSW"}="2"; >> $channels{"Prime Southern"}="7"; >> $channels{"SBS Sydney"}="SBS"; >> $channels{"Southern Cross TEN Capital"}="10"; >> $channels{"WIN Television NSW"}="9" >> } else { >> print "Unknown source '$source' for $location\n"; >> exit (1); >> } >> } elsif ("Brisbane" eq $location) { >> $region = "79"; >> $offset = "+1000"; >> if (("free" eq $source)||("freesd" eq $source)) { >> $channels{"ABC QLD"}="2"; >> $channels{"Channel Seven Queensland"}="7"; >> $channels{"SBS Queensland"}="SBS"; >> $channels{"Southern Cross TEN Queensland"}="10"; >> $channels{"WIN Television QLD"}="9"; >> } else { >> print "Unknown source '$source' for $location\n"; >> exit (1); >> } >> } elsif ("Sydney" eq $location) { >> $region = "73"; >> $offset = "+1100"; >> if (("free" eq $source)||("freesd" eq $source)) { >> $channels{"ABC NSW"}="2"; >> $channels{"Channel Seven Sydney"}="7"; >> $channels{"SBS Sydney"}="SBS"; >> $channels{"Network TEN Sydney"}="10"; >> $channels{"Channel Nine Sydney"}="9"; >> } else { >> print "Unknown source '$source' for $location\n"; >> exit (1); >> } >> } elsif ("Adelaide" eq $location) { >> $region = "81"; >> $offset = "+0930"; >> if (("free" eq $source)||("freesd" eq $source)) { >> $channels{"ABC SA"}="2"; >> $channels{"Channel Seven Adelaide"}="7"; >> $channels{"SBS"}="SBS"; >> $channels{"Network TEN Adekaude"}="10"; >> $channels{"Channel Nine Adekaude"}="9"; >> } else { >> print "Unknown source '$source' for $location\n"; >> exit (1); >> } >> } elsif ("Australia" eq $location) { >> $region = "123"; >> $offset = "+0930"; >> if ("foxtel" eq $source) { >> $channels{"Arena TV"}="Arena"; >> $channels{"BBC World"}="BBC"; >> $channels{"Cartoon Network"}="Cartoon"; >> $channels{"Channel [V]"}="Red"; >> $channels{"CNBC"}="CNBC"; >> $channels{"CNN"}="CNN"; >> $channels{"Discovery Channel"}="Disc"; >> $channels{"FOX News"}="FoxFNC"; >> $channels{"FOX8"}="FOX"; >> $channels{"MAX"}="FoxMMX"; >> $channels{"National Geographic Channel"}="NatGe"; >> $channels{"Nickelodeon"}="Nick"; >> $channels{"Showtime"}="Show"; >> $channels{"Showtime 2"}="FoxSH2"; >> $channels{"Sky News"}="SkyNews"; >> $channels{"TV1"}="TV1"; >> $channels{"UKTV"}="UKTV"; >> $channels{"Showtime Greats"}="ShowGreats"; >> $channels{"World Movies"}="wmov"; >> $channels{"WCH"}="WCH"; >> $channels{"TVSN"}="TVSN"; >> $channels{"Sky Racing"}="SkyRa"; >> $channels{"Ovation"}="Ovation"; >> $channels{"Disney Channel"}="Disney"; >> $channels{"Animal Planet"}="Animal"; >> $channels{"The Comedy Channel"}="Com"; >> $channels{"The LifeStyle Channel"}="Lifes"; >> $channels{"FOX Sports 1"}="FoxFS1"; >> $channels{"Movie One"}="Movie1"; >> $channels{"TCM"}="TCM"; >> $channels{"MTV"}="MTV"; >> $channels{"FOX Sports 2"}="FoxSP2"; >> $channels{"FOX Footy Channel"}="FFC"; >> $channels{"Movie Extra"}="MovieEx"; >> $channels{"Hallmark Channel"}="Hall"; >> $channels{"The History Channel"}="FoxHST"; >> $channels{"ESPN"}="ESPN"; >> $channels{"FOX Classics"}="FoxCLA"; >> $channels{"Movie Greats"}="MovieGr"; >> } else { >> print "Unknown source '$source' for $location\n"; >> exit (1); >> } >> } else { >> print "Unknown location '$location'\n"; >> exit (1); >> } >> >> my $prog_ref; >> my $chan_ref; >> >> foreach my $channel (keys %channels) >> { >> $$chan_ref{$channel} = >> { >> 'id' => $XMLTV_prefix . $channels{$channel} . $XMLTV_suffix, >> 'display-name' => [ [ $channel, undef ]] >> }; >> } >> >> >> # Options >> my $opt_days; >> my $opt_output; >> my $opt_configfile; >> my $opt_configure = 0; >> >> GetOptions('days=i' => \$opt_days, >> 'output=s' => \$opt_output, >> 'config-file=s' => \$opt_configfile, >> 'configure' => \$opt_configure, >> ); >> >> if ($opt_days) { >> $days_to_grab = $opt_days >> } >> >> if (!($opt_output)) { >> $opt_output = $cache_dir . "/guide.xml"; >> } >> >> # $opt_configfile should probably do something >> ('/home/mythtv/.mythtv/tv_grab_au.xmltv') >> >> if ($opt_configure == 1) >> { >> print "configuration must be done in this script $0\n"; >> exit (0); >> } >> >> print "grabing $days_to_grab days into $opt_output\n"; >> >> >> >> >> print "starting $threads threads\n"; >> >> my @thrlist; >> my $datepids = Thread::Queue->new; >> >> for (my $thread=0; $thread<$threads; $thread++) >> { >> push @thrlist, threads->new(\&fetch_details); >> } >> >> print "loading queue\n"; >> >> my $currentday = &ParseDate("today"); >> my $day_counter = 1; >> while ($day_counter <= $days_to_grab) >> { >> my $date = &UnixDate($currentday, "%d%m%Y"); >> my @day_lines = get_day($date,1); >> if (@day_lines == 0) >> { >> $currentday = &DateCalc($currentday, "+ 1 day"); >> $day_counter++; >> next; >> } >> >> my @pids; >> my @rowspans; >> my @names; >> foreach my $line (@day_lines) >> { >> foreach my $link (split /\n|tr|TR|TD|tr/, $line ) >> { >> if ($link =~ /closeup\/default.asp/) >> { >> my $rowspan = $link; >> $rowspan =~ s/.+rowspan=//g; >> $rowspan =~ s/ .+//g; >> >> my $name = $link; >> $name =~ s/.+target=new>(<P>|)//g; >> $name =~ s/<\/a>.+//g; >> >> $link =~ s/.+pid=//g; >> $link =~ s/".+//g; #" >> if (($rowspan =~ /\d+/) and ($link =~ /\d\d+/)) >> { >> push @pids, $link; >> push @rowspans, $rowspan; >> push @names, $name; >> } >> } >> } >> } >> >> if (changed_guide($date,@pids,@rowspans,@names)) >> { >> for (my $count=0; $count <= $#pids; $count++) >> { >> $datepids->enqueue($date . "-" . $pids[$count]); >> } >> } >> >> $day_counter++; >> $currentday = &DateCalc($currentday, "+ 1 day"); >> } >> >> for (my $thread=0; $thread<$threads; $thread++) >> { >> $datepids->enqueue(0 . "-" . 0); >> } >> >> print "queue is complete\n"; >> >> foreach my $thr (@thrlist) >> { >> $thr->join; >> } >> >> print "all threads done\n"; >> print "building xml structure\n"; >> >> $currentday = &ParseDate("today"); >> $day_counter = 1; >> while ($day_counter <= $days_to_grab) >> { >> my @pids; >> my $date = &UnixDate($currentday, "%d%m%Y"); >> >> my $guide_prn_file = $cache_dir . "/" . $date . "/guide.prn"; >> if (open(PRN, $guide_prn_file)) >> { >> my @prn = split />/, <PRN>; >> close(PRN); >> >> if ($#prn > 1) >> { >> my $pidlast = ($#prn + 1)/3 - 1; >> @pids=@prn[0..$pidlast]; >> } >> else >> { >> print "no pids in $guide_prn_file\n"; >> @pids=(); >> } >> } >> else >> { >> print "can't read $guide_prn_file\n"; >> @pids=() >> } >> >> my $retry = 0; >> foreach my $pid (@pids) >> { >> my @details = get_details($date, $pid); >> if (@details == 0) >> { >> next; >> } >> >> my $show_details_table = ""; >> my $use_line = 0; >> my $close_html = 0; >> foreach my $line (@details) >> { >> if ($line =~ /bgColor=#f7f3e8/) >> { >> $use_line = 0; >> } >> if ($use_line == 1) >> { >> $show_details_table .= $line; >> } >> if ($line =~ /bgcolor=#ffffff/) >> { >> $use_line = 1; >> } >> if ($line =~ /<\/HTML>/) >> { >> $close_html = 1; >> } >> } >> >> if ($close_html == 0) >> { >> my $name = $cache_dir . "/" . $date . "/" . $pid . ".html"; >> if ($retry++ >= $retrys) >> { >> print "giving up on truncated $name\n"; >> $retry=0; >> next; >> } >> unlink $name; >> push @pids, $pid; >> print "t"; # truncated >> sleep($secondsbeforeretry); >> next; >> } >> >> if ((length $show_details_table) == 0) >> { >> print "m"; # missing: can't do anything about this >> $retry=0; >> next; >> } >> >> $show_details_table =~ s/<[^>]*>/\n/g; >> $show_details_table =~ s/\ \;//g; >> #$show_details_table =~ s/<BR>|<TR>|<TD><B><b><\/B><\/b>/\n/g; >> #$show_details_table =~ s/Genre://g; >> #$show_details_table =~ s/Rated:/\n/g; >> my $count = 0; >> >> my $channel = ""; >> my $start_date = &UnixDate($currentday, "%Y-%m-%d"); >> my $time; >> my $title1 = ""; >> my $title2 = ""; >> my $genre = ""; >> my $descr = ""; >> my $details = ""; >> my $duration; >> >> >> #print $show_details_table. "\n\n\n"; >> foreach my $line (split /\n/, $show_details_table) >> { >> if ($count == 4){ >> #print "Time: " . $line . "\n"; >> $time = $line; >> } >> elsif ($count == 7){ >> $channel = $line; >> #print "Channel: " . $line . "\n"; >> } >> elsif ($count == 19){ >> $title1 = $line; >> #print "Program: " . $line . "\n"; >> } >> elsif ($count == 20){ >> $line =~ s/ - //g; >> $title2 = $line; >> #print "Subtitle: " . $line . "\n"; >> } >> elsif ($count == 21){ >> $line =~ s/\D//g; >> $duration = $line; >> #print "Run time: " . $line . "\n"; >> } >> elsif ($count == 22){ >> $line =~ s/[^A-Z]//g; >> $details = $line; >> #print "Rating: " . $line . "\n"; >> } >> elsif ($count == 26){ >> $line =~ s/ //g; >> $genre = $line; >> #print "Genre: " . $line . "\n"; >> } >> elsif ($count == 28 && $line =~ /[a-zA-Z]/){ >> $descr = $line; >> #print "Description: " . $line . "\n"; >> } >> #elsif ($count == 26 && $line =~ /[a-zA-Z]/){ >> # $descr = $line; >> # print "Description: " . $line . "\n"; >> #} >> #print $count .": " . $line . "\n"; >> ++$count; >> } >> >> >> my $start_time = &UnixDate($time, "%H:%M"); >> # my $start_datetime = $start_date . " " . $start_time; >> if ($start_time =~ /00:|01:|02:|03:|04:|05:/) >> { >> $start_date = &DateCalc($start_date, "+ 1 day"); >> } >> $start_date = &UnixDate($start_date, "%Y%m%d"); >> my $end_time = &DateCalc($start_time, " + " . $duration . "minutes"); >> $end_time = &UnixDate($end_time, "%H:%M"); >> >> my $end_date; >> if (&Date_Cmp($start_time, $end_time) <= 0) >> { >> $end_date = $start_date; >> } >> else >> { >> my $err; >> my $edate = &DateCalc($start_date, "+ 1 day", \$err); >> $end_date = &UnixDate($edate, "%Y%m%d"); >> } >> >> if (defined $channels{$channel}) >> { >> $channel = $XMLTV_prefix . $channels{$channel} . $XMLTV_suffix; >> } >> else >> { >> print "unknown channel $channel\n"; >> $retry=0; >> next; >> } >> >> my $start; >> my $stop; >> >> $start = $start_date . &UnixDate($start_time,"%H%M") . "00 " . $offset; >> $stop = $end_date . &UnixDate($end_time,"%H%M") . "00 " . $offset; >> >> my $a_prog = { >> channel => $channel, >> start => $start, >> stop => $stop, >> title => [ [ $title1, undef ] ] >> }; >> >> $descr =~ s/^\s+//; >> $descr =~ s/\s+$//; >> >> if ($title2) { $$a_prog{'sub-title'} = [ [ $title2, undef ] ]; } >> if ($descr) { $$a_prog{desc} = [ [ $descr, undef ] ]; } >> if ($genre) { $$a_prog{category} = [ [ $genre, undef ] ]; } >> >> push @$prog_ref, $a_prog; >> $retry=0; >> } >> >> $currentday = &DateCalc($currentday, "+ 1 day"); >> $day_counter++; >> } >> >> my $data = [ >> 'ISO-8859-1', >> { >> 'source-info-name' => 'http://tvguide.ninemsn.com.au/', >> 'generator-info-name' => 'NineMSN grabber', >> 'generator-info-url' => '', >> 'generator-info-name' => "XMLTV - tv_grab_au NineMSN v0.2" >> }, >> $chan_ref, >> $prog_ref >> ]; >> >> my $hour=&UnixDate(&ParseDate("now"),"%H"); >> if ($hour < 6) >> { >> print "can't update between 0:00 and 6:00\n"; >> # If we update between these hours we lose any data we had up to 6:00. >> # This is because the web site starts a day at 6:00 and ends at 6:00 >> the next day >> # This could be fixed by read the previous days info and adding the >> needed shows. >> # I did try adding the whole previous day but got lots of >> mythfilldatabase errors. >> exit(1); >> } >> >> print "writing file\n"; >> >> my $fh = new IO::File ">$opt_output"; >> XMLTV::write_data($data, OUTPUT=>$fh); >> >> print "done\n"; >> >> # subroutines >> sub get_day >> { >> my $date = shift; >> my $force = shift; >> my $url = $guide_url . $date . "_" . $region . ".asp"; >> >> my $guide_dir = $cache_dir . "/" . $date; >> my $guide_file = $guide_dir . "/guide.html"; >> mkpath ($guide_dir); >> >> for (my $retry=0; (($force==1) || (!(-e $guide_file))) && >> is_error(getstore($url, $guide_file)) && ($retry<$retrys); $retry++) >> { >> print "."; >> sleep($secondsbeforeretry); >> } >> >> my @guide_lines; >> if (open(GUIDE, $guide_file)) >> { >> @guide_lines = <GUIDE>; >> close(GUIDE); >> } >> else >> { >> @guide_lines = (); >> print "giving up on $guide_file\n"; >> } >> return @guide_lines; >> } >> >> sub get_details >> { >> my $date = shift; >> my $program_id = shift; >> >> my $url = $details_url . $program_id; >> my $guide_dir = $cache_dir . "/" . $date; >> my $details_file = $guide_dir . "/" . $program_id . ".html"; >> mkpath ($guide_dir); >> >> for (my $retry=0; (!(-e $details_file)) && is_error(getstore($url, >> $details_file)) && ($retry<$retrys); $retry++) >> { >> print "."; >> sleep($secondsbeforeretry); >> } >> >> my @details_lines; >> if (open(DETAILS, $details_file)) >> { >> @details_lines = <DETAILS>; >> close(DETAILS); >> } >> else >> { >> @details_lines = (); >> print "giving up on $details_file\n"; >> } >> return @details_lines; >> } >> >> sub fetch_details >> { >> my $datepid=$datepids->dequeue; >> my @datepidl=split /-/, $datepid; >> my $date = $datepidl[0]; >> my $pid = $datepidl[1]; >> >> while (($date!=0) and ($pid!=0)) >> { >> my $guide_dir = $cache_dir . "/" . $date; >> mkpath ($guide_dir); >> >> my $url = $details_url . $pid; >> my $details_file = $guide_dir . "/" . $pid . ".html"; >> >> for (my $retry=0; is_error(getstore($url, $details_file)) && >> ($retry<$retrys); $retry++) >> { >> sleep($secondsbeforeretry); >> } >> >> $datepid=$datepids->dequeue; >> @datepidl=split /-/, $datepid; >> $date = $datepidl[0]; >> $pid = $datepidl[1]; >> } >> } >> >> sub changed_guide >> { >> my $date = shift; >> my @pidsrowspansnames = @_; >> >> my $guide_prn_file = $cache_dir . "/" . $date . "/guide.prn"; >> if (open(PRN, $guide_prn_file)) >> { >> my @prn = split />/, <PRN>; >> close(PRN); >> >> if (($#prn > 1) and ($#prn == $#pidsrowspansnames)) >> { >> my $count; >> my $diff = ((($#prn+1)*2)/3)-1; >> for ($count=0; ($count <= $diff) && >> ($prn[$count]==$pidsrowspansnames[$count]); $count++) >> { } >> >> if ($count==($diff+1)) >> { >> for (; ($count <= $#prn) && ($prn[$count] eq >> $pidsrowspansnames[$count]); $count++) >> { } >> >> if ($count==($#prn+1)) >> { >> print "$date unchanged\n"; >> return 0; >> } >> } >> } >> } >> >> print "$date downloading\n"; >> >> if (open(PRN, ">", $guide_prn_file)) >> { >> for (my $count=0; $count<$#pidsrowspansnames; $count++) >> { >> print PRN "$pidsrowspansnames[$count]>"; >> } >> print PRN "$pidsrowspansnames[$#pidsrowspansnames]"; >> close(PRN); >> } >> else >> { >> print "can't open for writing $guide_prn_file\n"; >> } >> >> return 1; >> } >> _______________________________________________ mythtv-users mailing list mythtv-users [at] mythtv http://mythtv.org/cgi-bin/mailman/listinfo/mythtv-users
|