Technorati RSS Feeds
Quite a few of you probably know about Technorati.
If you do, chances are you know about RSS. For some reason, Technorati doesn't RSSify all of their site. I felt cheated by this. So why not create my own?
Well. I did. If you'd like you can subscribe directly below. Or just get the source, and run it yourself. (Source code
requires the use of RSSlib)
Enjoy.
#!/usr/bin/env python
import urllib, re, time, rsslib
"""
Technorati.com doesn't seem to want to provide RSS feeds for their site.
Currently will create RSS for:
http://technorati.com/pop/news/
http://technorati.com/pop/books/
http://technorati.com/pop/movies/
http://technorati.com/pop/blogs/
Author: Cameron Mallory http://berserk.org
You may use this code below as you see fit, in any form whatsoever.
"""
# Fully qualified path to where the RSS files will be created.
outputDir = "/home/username/full/path/to/rss/files/"
url = "url"
title = "title"
desc = "desc"
file = "file"
regex = "regex"
# Our RSS files we will create.
Popular = {
0 : { url : 'http://technorati.com/pop/movies/' ,
title : 'Popular Movies',
desc : 'Technorati Popular Movies RSS',
file : 'popmovies.xml',
regex : '<!-- google_ad_section_start -->(.*?)<!-- google_ad_section_end -->'
},
1 : { url : 'http://technorati.com/pop/news/' ,
title : 'Popular News',
desc : 'Technorati Popular News RSS',
file : 'popnews.xml',
regex : '<!-- google_ad_section_start -->(.*?)<!-- google_ad_section_end -->'
},
2 : { url : 'http://technorati.com/pop/books/' ,
title : 'Popular Books',
desc : 'Technorati Popular Books RSS',
file : 'popbooks.xml',
regex : '<!-- google_ad_section_start -->(.*?)<!-- google_ad_section_end -->'
},
3 : { url : 'http://technorati.com/pop/blogs/' ,
title : 'Popular Blogs',
desc : 'Technorati Popular Blogs RSS',
file : 'popblogs.xml',
regex : '<!-- google_ad_section_start -->(.*?)<!-- google_ad_section_end -->'
}
}
TITLE = "Technorati RSS by YOURSITE"
def process ( d , p ):
rss = rsslib.RSS()
rss.channel.link = p[ url ]
rss.channel.title = TITLE + " - " + p[ title ]
rss.channel.description = p[ desc ]
urls = re.compile( '<h2><a href="(.*?)"' ).findall( d )
titles = re.compile( '<h2><a href=.*?title=.*?">(.*?)<\/a><\/h2>' ).findall( d )
viewAlls = re.compile('<a href="\/search\/(.*?)" class="more links"').findall( d )
descriptions = re.compile('<blockquote.*?>(.*?)<\/blockquote>').findall( d )
for i in range(0, len(urls) ):
item = rsslib.Item()
item.link = urls[ i ]
va = '<a href="http://technorati.com/search/' + viewAlls[ i ] + '">View All</a>'
item.description = descriptions[ i ] + "\n" + va
item.title = titles[ i ]
rss.addItem( item )
return rss.write()
"""
Let's get this party started
"""
for i in range( 0, len( Popular ) ):
p = Popular[i]
r = re.compile( p[ regex ] )
d = urllib.urlopen( p[ url ] ).read().replace("\n","")
d = r.findall( d )[0]
open( outputDir + p[ file ] , 'w' ).write( process( d , p ) )
tags = re.compile( '<a href="/tag/(.*?)".*?>' ).findall( urllib.urlopen( "http://technorati.com/tags" ).read().replace("\n","") )
rss = rsslib.RSS()
rss.channel.link = "http://technorati.com/tags/"
rss.channel.title = "Technorati Top Tags RSS by YOURSITE";
rss.channel.description = "The top 200 tags from technorati.com"
for t in tags:
item = rsslib.Item()
item.link = "http://technorati.com/tag/" + t
item.title = "Technorati tag: " + t
item.description = ""
rss.addItem( item )
open( outputDir + "toptags.xml", "w").write( rss.write() )
|



|