#!/usr/bin/env python import urllib, re, time, rsslib """ Technorati.com doesn't seem to want to provide RSS feeds for their site. Currently will create RSS for: http://technorati.com/pop/news/ http://technorati.com/pop/books/ http://technorati.com/pop/movies/ http://technorati.com/pop/blogs/ Author: Cameron Mallory http://YOURSITE You may use this code below as you see fit, in any form whatsoever. """ # Fully qualified path to where the RSS files will be created. outputDir = "/home/username/your/rss/files/" url = "url" title = "title" desc = "desc" file = "file" regex = "regex" # Our RSS files we will create. Popular = { 0 : { url : 'http://technorati.com/pop/movies/' , title : 'Popular Movies', desc : 'Technorati Popular Movies RSS', file : 'popmovies.xml', regex : '(.*?)' }, 1 : { url : 'http://technorati.com/pop/news/' , title : 'Popular News', desc : 'Technorati Popular News RSS', file : 'popnews.xml', regex : '(.*?)' }, 2 : { url : 'http://technorati.com/pop/books/' , title : 'Popular Books', desc : 'Technorati Popular Books RSS', file : 'popbooks.xml', regex : '(.*?)' }, 3 : { url : 'http://technorati.com/pop/blogs/' , title : 'Popular Blogs', desc : 'Technorati Popular Blogs RSS', file : 'popblogs.xml', regex : '(.*?)' } } TITLE = "Technorati RSS by YOURSITE" def process ( d , p ): rss = rsslib.RSS() rss.channel.link = p[ url ] rss.channel.title = TITLE + " - " + p[ title ] rss.channel.description = p[ desc ] urls = re.compile( '

(.*?)<\/a><\/h2>' ).findall( d ) viewAlls = re.compile('(.*?)<\/blockquote>').findall( d ) for i in range(0, len(urls) ): item = rsslib.Item() item.link = urls[ i ] va = 'View All' item.description = descriptions[ i ] + "\n" + va item.title = titles[ i ] rss.addItem( item ) return rss.write() """ Let's get this party started """ for i in range( 0, len( Popular ) ): p = Popular[i] r = re.compile( p[ regex ] ) d = urllib.urlopen( p[ url ] ).read().replace("\n","") d = r.findall( d )[0] open( outputDir + p[ file ] , 'w' ).write( process( d , p ) ) tags = re.compile( '' ).findall( urllib.urlopen( "http://technorati.com/tags" ).read().replace("\n","") ) rss = rsslib.RSS() rss.channel.link = "http://technorati.com/tags/" rss.channel.title = "Technorati Top Tags RSS by YOURSITE"; rss.channel.description = "The top 200 tags from technorati.com" for t in tags: item = rsslib.Item() item.link = "http://technorati.com/tag/" + t item.title = "Technorati tag: " + t item.description = "" rss.addItem( item ) open( outputDir + "toptags.xml", "w").write( rss.write() )