Technorati RSS Feeds

Quite a few of you probably know about Technorati.

If you do, chances are you know about RSS. For some reason, Technorati doesn't RSSify all of their site. I felt cheated by this. So why not create my own?

Well. I did. If you'd like you can subscribe directly below. Or just get the source, and run it yourself. (Source code requires the use of RSSlib)

Enjoy.

Popular NewsRSS
Popular BlogsRSS
Popular MoviesRSS
Popular BooksRSS
Popular TagsRSS


#!/usr/bin/env python
import urllib, re, time, rsslib
"""
Technorati.com doesn't seem to want to provide RSS feeds for their site.
Currently will create RSS for:
    http://technorati.com/pop/news/
    http://technorati.com/pop/books/
    http://technorati.com/pop/movies/
    http://technorati.com/pop/blogs/

Author: Cameron Mallory  http://berserk.org

You may use this code below as you see fit, in any form whatsoever.
"""
# Fully qualified path to where the RSS files will be created.
outputDir = "/home/username/full/path/to/rss/files/"
url = "url"
title = "title"
desc = "desc"
file = "file"
regex = "regex"

# Our RSS files we will create.  
Popular = { 
        0 : { url   : 'http://technorati.com/pop/movies/' ,
              title : 'Popular Movies',
              desc  : 'Technorati Popular Movies RSS',
              file  : 'popmovies.xml',
              regex : '<!-- google_ad_section_start -->(.*?)<!-- google_ad_section_end -->'
            },
        1 : { url   : 'http://technorati.com/pop/news/' ,
              title : 'Popular News',
              desc  : 'Technorati Popular News RSS',
              file  : 'popnews.xml',
              regex : '<!-- google_ad_section_start -->(.*?)<!-- google_ad_section_end -->'
            },
        2 : { url   : 'http://technorati.com/pop/books/' ,
              title : 'Popular Books',
              desc  : 'Technorati Popular Books RSS',
              file  : 'popbooks.xml',
              regex : '<!-- google_ad_section_start -->(.*?)<!-- google_ad_section_end -->'
            },
        3 : { url   : 'http://technorati.com/pop/blogs/' ,
              title : 'Popular Blogs',
              desc  : 'Technorati Popular Blogs RSS',
              file  : 'popblogs.xml',
              regex : '<!-- google_ad_section_start -->(.*?)<!-- google_ad_section_end -->'
            }
}

TITLE = "Technorati RSS by YOURSITE"

def process ( d , p ):
    rss = rsslib.RSS()
    rss.channel.link = p[ url ]
    rss.channel.title = TITLE + " - " + p[ title ]
    rss.channel.description = p[ desc ]

    urls = re.compile( '<h2><a href="(.*?)"' ).findall( d )
    titles = re.compile(  '<h2><a href=.*?title=.*?">(.*?)<\/a><\/h2>' ).findall( d )
    viewAlls = re.compile('<a href="\/search\/(.*?)" class="more links"').findall( d )
    descriptions = re.compile('<blockquote.*?>(.*?)<\/blockquote>').findall( d )

    for i in range(0, len(urls) ):
        item = rsslib.Item()
        item.link = urls[ i ]
        va = '<a href="http://technorati.com/search/' + viewAlls[ i ] + '">View All</a>'
        item.description = descriptions[ i ] + "\n" + va 
        item.title = titles[ i ]
        rss.addItem( item )
        
    return rss.write()

"""
Let's get this party started
"""
for i in range( 0, len( Popular ) ):
    p = Popular[i]
    r = re.compile( p[ regex ] )
    d = urllib.urlopen( p[ url ] ).read().replace("\n","")
    d = r.findall( d )[0]
    open( outputDir + p[ file ] , 'w' ).write( process( d , p ) )

tags = re.compile( '<a href="/tag/(.*?)".*?>' ).findall( urllib.urlopen( "http://technorati.com/tags" ).read().replace("\n","") )
rss = rsslib.RSS()
rss.channel.link = "http://technorati.com/tags/"
rss.channel.title = "Technorati Top Tags RSS by YOURSITE";
rss.channel.description = "The top 200 tags from technorati.com"
for t in tags:
	item = rsslib.Item()
	item.link = "http://technorati.com/tag/" + t
	item.title = "Technorati tag: " + t
	item.description =  ""
	rss.addItem( item )
open( outputDir + "toptags.xml", "w").write( rss.write() )