#!/usr/bin/perl use XML::RSS # # Little script to parse the tabbed movie sections off of hollywoodvideo.com. # (Now Playing, Coming Soon, Top 10, MVP # # Requires: # XML::RSS # wget # # Output: # For each section listed above, this script will produce .xml # (nowplaying.xml, mvp.xml, top10.xml, comingsoon.xml) # # Original author: Cameron Mallory # # Released under http://creativecommons.org/licenses/by/2.0/ # You are free: # * to copy, distribute, display, and perform the work # * to make derivative works # * to make commercial use of the work # $wget = "/usr/local/bin/wget"; $CHANNEL_WEBMASTER = 'your@email.com'; $ROOT_DIR = "/full/path/to/where/you/want/your/rss/to/go/"; ## ## Shouldn't need to change anything below here ## @data = `$wget -q -O - http://www.hollywoodvideo.com`; %movies = getMovieSections( @data ); generateRSS( %movies ); # # Use some RE's to parse out the movie sections # sub getMovieSections { my (@data) = @_; # # Don't parse the whole file, so stop when we reach a magic line # $isFinished = 0; foreach $line (@data){ chomp( $line ); if ( $line =~ /InitWidget\('condensed','uMLC4_rpt__ctl0_mc_rtc_139543',3, 3, 0\)/ ){ $cs = "top10"; } elsif ( $line =~ /InitWidget\('condensed','uMLC2_rpt__ctl0_mc_rtc_139465',3, 3, 0\)/ ){ $cs = "comingsoon"; } elsif ( $line =~ /InitWidget\('condensed','uMLC1_rpt__ctl0_mc_rtc_139446',2\.75, 3, 0\)/ ){ $cs = "nowplaying"; } elsif ( $line =~ /InitWidget\('condensed','uMLC3_rpt__ctl0_mc_rtc_140389',2\.5, 3, 0\)/ ){ $cs = "mvp"; } if ( $line =~/More Top 10 Movies/ ){ $isFinished = 1; } if ( $isFinished == 0 && $cs ne "" && $line =~ /href="\/movies\/movie.aspx(.*?)<\/a>/ ){ $match = "href=\"http://www.hollywoodvideo.com\/movies\/movie.aspx$1<\/a>::"; $movies{ $cs } .= $match; } } return %movies; } # # Generate RSS files for our movie sections # sub generateRSS { my (%movies ) = @_; chomp( $currDate = `date +'%a, %d %b %G %R:%S %Z'`); foreach $section ( keys %movies ){ open(F, ">$ROOT_DIR/$section.xml"); my $rss = new XML::RSS (version => '2.0'); $rss->channel( title => 'Hollywood Video - ' . $section, link => 'http://hollywoodvideo.com', language => 'en', description => 'Hollywood Video : RSS feeds', #pubDate => $currDate, lastBuildDate => $currDate, webMaster => $CHANNEL_WEBMASTER, ); @sectionMovies = split("::", $movies{ $section }); foreach $showData ( @sectionMovies ){ $showData =~ /href="(.*?)">(.*?)<\/a>/; $link = $1; $name = $2; $name =~ s/&/&/g; $name =~ s//>/g; $rss->add_item( title => $name, link => $link, description => $name, pubDate => $currDate ); } print F $rss->as_string; close(F); } }