#!/usr/bin/env python # -*- coding: iso-8859-1 -*- # rss_parser.cgi www.braggtown.com 1/2006 # XML/RSS parser to grab and display feeds on web page # To add or remove feeds add a unique name and the rss url to the url_dict in the global class. # Don't forget to also add an option with that same feed name to your selector form. # The feed display works be using Python's string.replace(). The cgi replaces a variable, $FEEDS, with the gathered feed information. # Make sure that you place $FEEDS in your web page at the location you'd like the rss feeds displayed. # Thanks to GazaM and Van_Gogh from ubuntuforums.org for their contributions. ### Import Python modules import cgi, cgitb; cgitb.enable() from urllib import urlopen from xml.dom import minidom # Global. Class to hold global variables. Alleviates having to pass references between functions. class Global: feeds = [] super_item_object_list = [] feed_list = [] feed_urls = [] url_dict = {"slashdot": "http://rss.slashdot.org/Slashdot/slashdot", "osnews": "http://osnews.com/files/recent.rdf", \ "distorwatch": "http://distrowatch.com/news/dw.xml", "zdnet": "http://news.zdnet.com/2509-1_22-0-20.xml", \ "bbc": "http://fxfeeds.mozilla.org/rss20.xml", "thinkprogress": "http://thinkprogress.org/feed/", \ "asiatimes": "http://www.atimes.com/atimes/atol.xml", "ucimc": "http://indymedia.us/main-features-content.rss", \ "crooks": "http://www.crooksandliars.com/rss.xml", "guardian": "http://www.guardian.co.uk/rssfeed/0,,12,00.xml", \ "npr": "http://www.npr.org/rss/rss.php?id=1012", "bbctech": "http://newsrss.bbc.co.uk/rss/newsonline_world_edition/technology/rss.xml#", \ "google": "http://news.google.com/?output=rss", "freepress": "http://www.freepress.net/news/rss/news_all.rss"} # populateFeeds. Parse out variables passed from form def populateFeeds(form): feed = form['feed'] # Set feed var to object containing all feeds selected if type(feed) is type([]): # Feed var is list. Multiple RSS feeds selected for item in feed: Global.feeds.append(item.value) else: # Feed var is string. Single RSS feed Global.feeds.append(feed.value) # printForm. Print selector form def printForm(form_template): form_template = open(form_template, 'r') print "Content-Type: text/html\n" print form_template.read() form_template.close() # nameToUrl. Take feed name, return feed url def nameToUrl(): for name in Global.feeds: Global.feed_urls.append(Global.url_dict[name]) # getAnchors. Get list of names for named anchors for page navigation def getAnchors(): anchor_str = "| " for feed in Global.feed_list: anchor_str += """ %s | """ % (feed[0], feed[0]) #(name, name) return anchor_str # getFeeds. parser logic def getFeeds(rss_icon, display_template): for url in Global.feed_urls: try: content = urlopen(url).read() rss_xml = minidom.parseString(content) rss_xml.normalize() getFeedTitle(rss_xml) item_list = rss_xml.getElementsByTagName('item') getChildNodes(item_list) except IOError: print " No valid feed at ", url pass display_template = open(display_template, 'r') display_template_str = display_template.read() display_template.close() anchor_str = getAnchors() i = 0 feed_output = """

%s

""" % (anchor_str) + """

""" for feed in Global.feed_list: #(title, link, title) feed_output += """

%s

""" % (feed[0], feed[1], feed[0]) for story in Global.super_item_object_list[i]: try: if rss_icon: feed_output += """  """ % (rss_icon)# name, url, name feed_output += """%s
%s

""" % (story[1], story[0], story[2]) except: pass i += 1 feed_output += """

""" + """Back to top""" + "

" + "
" print "Content-Type: text/html\n" print display_template_str.replace('$FEEDS', feed_output) # getFeedTitle Aquire the title of each rss feed def getFeedTitle(rss_xml): channel_element = rss_xml.getElementsByTagName('channel') feed_title = (channel_element[0].getElementsByTagName('title')[0]).childNodes[0].data.encode('iso-8859-1') feed_link = (channel_element[0].getElementsByTagName('link')[0]).childNodes[0].data.encode('iso-8859-1') feed_list_object = [feed_title, feed_link] Global.feed_list.append(feed_list_object) # getChildNodes takes list of items and append child nodes of each item to list def getChildNodes(item_list): item_object_list = [] for item in item_list: try: title = (item.getElementsByTagName('title')[0]).childNodes[0].data.encode('iso-8859-1') link = (item.getElementsByTagName('link')[0]).childNodes[0].data.encode('iso-8859-1') description = (item.getElementsByTagName('description')[0]).childNodes[0].data.encode('iso-8859-1') item_object = [title, link, description] item_object_list.append(item_object) except: pass Global.super_item_object_list.append(item_object_list) # main application logic def main(): ### Set the following variables form_template = "../includes/select_form.html" # Set to location of your selection form rss_icon = "../images/rss.png" # Set to location of icon displayed next to titles. Change to False (no quotes) if no icon desired. display_template = "../includes/display_feeds.html" # Set to location of page feeds displayed in ### form = cgi.FieldStorage() if form.has_key('feed'): # Selector has been called previously. Show feeds. populateFeeds(form)#ok nameToUrl()#ok getFeeds(rss_icon, display_template) else: # Selector has not been called. Print form. printForm(form_template) ### Call main function if (__name__ == "__main__"): main()