-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrss_reader.py
More file actions
58 lines (41 loc) · 1.73 KB
/
rss_reader.py
File metadata and controls
58 lines (41 loc) · 1.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
"""
Aberdeen Python dojo of 26th November 2014
@author: Daniel Blasco
"""
import sys
import json
import urllib2
from xml.etree import ElementTree
def fetch_rss_file(url):
print "Retrieving RSS feed file from {}".format(url)
# https://docs.python.org/2/howto/urllib2.html
return urllib2.urlopen(url).read()
def parse_xml(text):
items = []
# With ElementTree parse XML from text format to a tree of python elements
# https://docs.python.org/2/library/xml.etree.elementtree.html#tutorial
root = ElementTree.fromstring(text)
# Use XPath expressions to find all the items in the XML tree
# https://docs.python.org/2/library/xml.etree.elementtree.html#elementtree-xpath
for xml_item in root.findall('.//item'):
item = {} # put all the data related to a item in a dictionary
# All RSS 2.0 needs to have for sure title, description and link.
# But it could have more properties which we don't know, like dates and images
# We iterate over all the elements to extract all the information
for child in xml_item:
item[child.tag] = child.text # FIXME: Image elements are not extracted properly here
items.append(item)
return items
def main(url):
try:
text = fetch_rss_file(url)
except Exception, e:
print "Error retrieving url: {}".format(e)
sys.exit(1) # Exit with error
# Extract data structures from the retrieved text
items = parse_xml(text)
# Just using JSON here to pretty print the result
# We could store the items in a database, generate HTML, create an API, etc...
print json.dumps(items, indent=2)
if __name__ == "__main__":
main("http://www.aberdeencity.gov.uk/accapps/rss/EventRSS.aspx")