1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # ============================================================================
21 # Author : Yves Marcoz
23 # Description : Simple RSS Reader
24 # ============================================================================
26 from os.path import isfile, isdir
27 from shutil import rmtree
28 from os import mkdir, remove
34 from BeautifulSoup import BeautifulSoup
35 from urlparse import urljoin
37 #CONFIGDIR="/home/user/.feedingit/"
40 return md5.new(string).hexdigest()
44 # if gconf.client_get_default().get_bool('/system/http_proxy/use_http_proxy'):
45 # port = gconf.client_get_default().get_int('/system/http_proxy/port')
46 # http = gconf.client_get_default().get_string('/system/http_proxy/host')
47 # proxy = proxy = urllib2.ProxyHandler( {"http":"http://%s:%s/"% (http,port)} )
48 # return (True, proxy)
49 # return (False, None)
51 # Enable proxy support for images and ArchivedArticles
52 #(proxy_support, proxy) = getProxy()
54 # opener = urllib2.build_opener(proxy)
55 # urllib2.install_opener(opener)
57 # Entry = {"title":XXX, "content":XXX, "date":XXX, "link":XXX, images = [] }
60 def __init__(self, configdir):
61 self.configdir = configdir
64 def addImage(self, key, baseurl, url):
65 filename = self.configdir+key+".d/"+getId(url)
66 if not isfile(filename):
68 #if url.startswith("http"):
69 # f = urllib2.urlopen(url)
71 f = urllib2.urlopen(urljoin(baseurl,url))
72 outf = open(filename, "w")
77 print "Could not download " + url
79 open(filename,"a").close() # "Touch" the file
80 if filename in self.images:
81 self.images[filename] += 1
83 self.images[filename] = 1
86 def removeImage(self, key, filename):
87 #filename = self.configdir+key+".d/"+getId(url)
89 self.images[filename] -= 1
91 self.images[filename] = 0 #Delete image
93 if self.images[filename] == 0:
94 remove(filename) #os.remove
95 del self.images[filename]
97 print "Could not remove image %s" % filename
100 def __init__(self, uniqueId, name, url, imageHandler):
108 self.updateTime = "Never"
109 self.uniqueId = uniqueId
110 self.imageHandler = imageHandler
112 def editFeed(self, url):
115 def saveFeed(self, configdir):
116 if not isdir(configdir+self.uniqueId+".d"):
117 mkdir(configdir+self.uniqueId+".d")
118 file = open(configdir+self.uniqueId+".d/feed", "w")
119 pickle.dump(self, file )
121 self.saveUnread(configdir)
123 def saveUnread(self, configdir):
124 if not isdir(configdir+self.uniqueId+".d"):
125 mkdir(configdir+self.uniqueId+".d")
126 file = open(configdir+self.uniqueId+".d/unread", "w")
127 pickle.dump(self.readItems, file )
130 def reloadUnread(self, configdir):
132 file = open(configdir+self.uniqueId+".d/unread", "r")
133 self.readItems = pickle.load( file )
136 for id in self.getIds():
137 if self.readItems[id]==False:
138 self.countUnread = self.countUnread + 1
141 return self.countUnread
143 def updateFeed(self, configdir, expiryTime=24, proxy=None, imageCache=False):
144 # Expiry time is in hours
146 tmp=feedparser.parse(self.url)
148 tmp=feedparser.parse(self.url, handlers = [proxy])
149 expiry = float(expiryTime) * 3600.
150 # Check if the parse was succesful (number of entries > 0, else do nothing)
151 if len(tmp["entries"])>0:
153 f = urllib2.urlopen(urljoin(tmp["feed"]["link"],"/favicon.ico"))
156 outf = open(configdir+self.uniqueId+".d/favicon.ico", "w")
162 traceback.print_exc()
165 #reversedEntries = self.getEntries()
166 #reversedEntries.reverse()
167 if not isdir(configdir+self.uniqueId+".d"):
168 mkdir(configdir+self.uniqueId+".d")
169 currentTime = time.time()
172 for entry in tmp["entries"]:
173 (dateTuple, date) = self.extractDate(entry)
177 entry["title"] = "No Title"
182 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
183 "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[] }
184 id = self.generateUniqueId(tmpEntry)
186 #articleTime = time.mktime(self.entries[id]["dateTuple"])
187 if not id in self.ids:
188 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
190 baseurl = tmpEntry["link"]
194 filename = self.imageHandler.addImage(self.uniqueId, baseurl, img['src'])
196 tmpEntry["images"].append(filename)
198 print "Error downloading image %s" % img
199 tmpEntry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html"
200 file = open(tmpEntry["contentLink"], "w")
201 file.write(soup.prettify())
203 tmpEntries[id] = tmpEntry
205 if id not in self.readItems:
206 self.readItems[id] = False
208 tmpEntries[id] = self.entries[id]
212 for entryId in oldIds:
213 if not entryId in tmpIds:
215 articleTime = time.mktime(self.entries[entryId]["dateTuple"])
216 if (currentTime - articleTime > 2*expiry):
217 self.removeEntry(entryId)
219 if (currentTime - articleTime > expiry) and (self.isEntryRead(entryId)):
220 # Entry is over 24 hours, and already read
221 self.removeEntry(entryId)
223 tmpEntries[entryId] = self.entries[entryId]
224 tmpIds.append(entryId)
226 print "Error purging old articles %s" % entryId
227 self.removeEntry(entryId)
229 self.entries = tmpEntries
236 if not self.readItems.has_key(id):
237 self.readItems[id] = False
238 if self.readItems[id]==False:
239 tmpUnread = tmpUnread + 1
240 keys = self.readItems.keys()
242 if not id in self.ids:
243 del self.readItems[id]
245 self.countUnread = tmpUnread
246 self.updateTime = time.asctime()
247 self.saveFeed(configdir)
249 def extractContent(self, entry):
251 if entry.has_key('summary'):
252 content = entry.get('summary', '')
253 if entry.has_key('content'):
254 if len(entry.content[0].value) > len(content):
255 content = entry.content[0].value
257 content = entry.get('description', '')
260 def extractDate(self, entry):
261 if entry.has_key("updated_parsed"):
262 date1 = entry["updated_parsed"]
263 date = time.strftime("%a, %d %b %Y %H:%M:%S",entry["updated_parsed"])
264 elif entry.has_key("published_parsed"):
265 date1 = entry["published_parsed"]
266 date = time.strftime("%a, %d %b %Y %H:%M:%S", entry["published_parsed"])
273 def setEntryRead(self, id):
274 if self.readItems[id]==False:
275 self.countUnread = self.countUnread - 1
276 self.readItems[id] = True
278 def setEntryUnread(self, id):
279 if self.readItems[id]==True:
280 self.countUnread = self.countUnread + 1
281 self.readItems[id] = False
283 def isEntryRead(self, id):
284 return self.readItems[id]
286 def getTitle(self, id):
287 return self.entries[id]["title"]
289 def getContentLink(self, id):
290 if self.entries[id].has_key("contentLink"):
291 return self.entries[id]["contentLink"]
292 return self.entries[id]["link"]
294 def getExternalLink(self, id):
295 return self.entries[id]["link"]
297 def getDate(self, id):
298 return self.entries[id]["date"]
300 def getDateTuple(self, id):
301 return self.entries[id]["dateTuple"]
303 def getUniqueId(self, index):
304 return self.ids[index]
306 def generateUniqueId(self, entry):
307 return getId(entry["date"] + entry["title"])
309 def getUpdateTime(self):
310 return self.updateTime
312 def getEntries(self):
318 def getNextId(self, id):
319 return self.ids[(self.ids.index(id)+1) % self.getNumberOfEntries()]
321 def getPreviousId(self, id):
322 return self.ids[(self.ids.index(id)-1) % self.getNumberOfEntries()]
324 def getNumberOfUnreadItems(self):
325 return self.countUnread
327 def getNumberOfEntries(self):
330 def getItem(self, id):
332 return self.entries[id]
336 def getContent(self, id):
337 if self.entries[id].has_key("contentLink"):
338 file = open(self.entries[id]["contentLink"])
339 content = file.read()
342 return self.entries[id]["content"]
344 def removeEntry(self, id):
346 if self.entries.has_key(id):
347 entry = self.entries[id]
348 if entry.has_key("images"):
349 for img in entry["images"]:
350 self.imageHandler.removeImage(self.uniqueId, img)
352 if entry.has_key("contentLink"):
354 remove(entry["contentLink"]) #os.remove
356 print "File not found for deletion: %s" % entry["contentLink"]
359 print "Entries has no %s key" % id
363 print "Ids has no %s key" % id
364 if self.readItems.has_key(id):
365 if self.readItems[id]==False:
366 self.countUnread = self.countUnread - 1
367 del self.readItems[id]
369 print "ReadItems has no %s key" % id
371 # print "Error removing entry %s" %id
373 def getArticle(self, entry):
374 #self.setEntryRead(id)
375 #entry = self.entries[id]
376 title = entry['title']
377 #content = entry.get('content', entry.get('summary_detail', {}))
378 content = entry["content"]
383 #text = '''<div style="color: black; background-color: white;">'''
384 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
385 text += "<html><head><title>" + title + "</title>"
386 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
387 #text += '<style> body {-webkit-user-select: none;} </style>'
388 text += '</head><body><div><a href=\"' + link + '\">' + title + "</a>"
389 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
390 text += "<BR /><BR />"
392 text += "</body></html>"
395 class ArchivedArticles(Feed):
396 def addArchivedArticle(self, title, link, updated_parsed, configdir):
398 entry["title"] = title
400 entry["summary"] = '<a href=\"' + link + '\">' + title + "</a>"
401 entry["updated_parsed"] = updated_parsed
402 entry["time"] = time.time()
404 (dateTuple, date) = self.extractDate(entry)
405 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
406 "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[], "downloaded":False, "time":entry["time"] }
407 id = self.generateUniqueId(tmpEntry)
408 self.entries[id] = tmpEntry
410 self.readItems[id] = False
411 self.countUnread = self.countUnread + 1
412 self.saveFeed(configdir)
413 self.saveUnread(configdir)
415 def updateFeed(self, configdir, expiryTime=24, proxy=None, imageCache=False):
416 for id in self.getIds():
417 entry = self.entries[id]
418 if not entry["downloaded"]:
420 f = urllib2.urlopen(entry["link"])
421 #entry["content"] = f.read()
424 soup = BeautifulSoup(html)
426 baseurl = entry["link"]
428 filename = self.imageHandler.addImage(self.uniqueId, baseurl, img['src'])
430 entry["images"].append(filename)
431 entry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html"
432 file = open(entry["contentLink"], "w")
433 file.write(soup.prettify())
435 if len(entry["content"]) > 0:
436 entry["downloaded"] = True
437 entry["time"] = time.time()
438 self.setEntryUnread(id)
441 #currentTime = time.time()
442 #expiry = float(expiryTime) * 3600
443 #if currentTime - entry["time"] > expiry:
444 # if self.isEntryRead(id):
445 # self.removeEntry(id)
447 # if currentTime - entry["time"] > 2*expiry:
448 # self.removeEntry(id)
449 self.updateTime = time.asctime()
450 self.saveFeed(configdir)
452 def purgeReadArticles(self):
455 entry = self.entries[id]
456 if self.isEntryRead(id):
459 def removeArticle(self, id):
462 def getArticle(self, index):
463 self.setEntryRead(index)
464 content = self.getContent(index)
469 # Lists all the feeds in a dictionary, and expose the data
470 def __init__(self, configdir):
471 self.configdir = configdir
473 if isfile(self.configdir+"feeds.pickle"):
474 file = open(self.configdir+"feeds.pickle")
475 self.listOfFeeds = pickle.load(file)
478 self.listOfFeeds = {getId("Slashdot"):{"title":"Slashdot", "url":"http://rss.slashdot.org/Slashdot/slashdot", "unread":0, "updateTime":"Never"}, }
480 file = open(self.configdir+"images.pickle")
481 self.imageHandler = pickle.load(file)
484 self.imageHandler = ImageHandler(self.configdir)
485 if self.listOfFeeds.has_key("font"):
486 del self.listOfFeeds["font"]
487 if self.listOfFeeds.has_key("feedingit-order"):
488 self.sortedKeys = self.listOfFeeds["feedingit-order"]
490 self.sortedKeys = self.listOfFeeds.keys()
491 if "font" in self.sortedKeys:
492 self.sortedKeys.remove("font")
493 self.sortedKeys.sort(key=lambda obj: self.getFeedTitle(obj))
494 list = self.sortedKeys[:]
495 #self.closeCurrentlyDisplayedFeed()
497 def addArchivedArticle(self, key, index):
498 feed = self.getFeed(key)
499 title = feed.getTitle(index)
500 link = feed.getExternalLink(index)
501 date = feed.getDateTuple(index)
502 if not self.listOfFeeds.has_key("ArchivedArticles"):
503 self.listOfFeeds["ArchivedArticles"] = {"title":"Archived Articles", "url":"", "unread":0, "updateTime":"Never"}
504 self.sortedKeys.append("ArchivedArticles")
505 #self.feeds["Archived Articles"] = ArchivedArticles("Archived Articles", "")
507 archFeed = self.getFeed("ArchivedArticles")
508 archFeed.addArchivedArticle(title, link, date, self.configdir)
509 self.listOfFeeds[key]["unread"] = archFeed.getNumberOfUnreadItems()
511 def loadFeed(self, key):
512 if isfile(self.configdir+key+".d/feed"):
513 file = open(self.configdir+key+".d/feed")
514 feed = pickle.load(file)
519 except AttributeError:
520 feed.uniqueId = getId(feed.name)
521 feed.imageHandler = self.imageHandler
522 #feed.reloadUnread(self.configdir)
525 title = self.listOfFeeds[key]["title"]
526 url = self.listOfFeeds[key]["url"]
527 if key == "ArchivedArticles":
528 feed = ArchivedArticles("ArchivedArticles", title, url, self.imageHandler)
530 feed = Feed(getId(title), title, url, self.imageHandler)
533 def updateFeeds(self, expiryTime=24, proxy=None, imageCache=False):
534 for key in self.getListOfFeeds():
535 feed = self.loadFeed(key)
536 feed.updateFeed(self.configdir, expiryTime, proxy, imageCache)
537 self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
538 self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
540 def updateFeed(self, key, expiryTime=24, proxy=None, imageCache=False):
541 feed = self.getFeed(key)
542 feed.updateFeed(self.configdir, expiryTime, proxy, imageCache)
543 self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
544 self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
546 def editFeed(self, key, title, url):
547 self.listOfFeeds[key]["title"] = title
548 self.listOfFeeds[key]["url"] = url
549 feed = self.loadFeed(key)
552 def getFeed(self, key):
554 feed = self.loadFeed(key)
555 feed.reloadUnread(self.configdir)
557 # If the feed file gets corrupted, we need to reset the feed.
559 bus = dbus.SessionBus()
560 remote_object = bus.get_object("org.freedesktop.Notifications", # Connection name
561 "/org/freedesktop/Notifications" # Object's path
563 iface = dbus.Interface(remote_object, 'org.freedesktop.Notifications')
564 iface.SystemNoteInfoprint("Error opening feed %s, it has been reset." % self.getFeedTitle(key))
565 if isdir(self.configdir+key+".d/"):
566 rmtree(self.configdir+key+".d/")
567 feed = self.loadFeed(key)
570 def getFeedUpdateTime(self, key):
571 #print self.listOfFeeds.has_key(key)
572 if not self.listOfFeeds[key].has_key("updateTime"):
573 self.listOfFeeds[key]["updateTime"] = "Never"
574 return self.listOfFeeds[key]["updateTime"]
576 def getFeedNumberOfUnreadItems(self, key):
577 if not self.listOfFeeds[key].has_key("unread"):
578 self.listOfFeeds[key]["unread"] = 0
579 return self.listOfFeeds[key]["unread"]
581 def updateUnread(self, key, unreadItems):
582 self.listOfFeeds[key]["unread"] = unreadItems
584 def getFeedTitle(self, key):
585 return self.listOfFeeds[key]["title"]
587 def getFeedUrl(self, key):
588 return self.listOfFeeds[key]["url"]
590 def getListOfFeeds(self):
591 return self.sortedKeys
593 def getFavicon(self, key):
594 filename = self.configdir+key+".d/favicon.ico"
600 def addFeed(self, title, url):
601 if not self.listOfFeeds.has_key(getId(title)):
602 self.listOfFeeds[getId(title)] = {"title":title, "url":url, "unread":0, "updateTime":"Never"}
603 self.sortedKeys.append(getId(title))
605 #self.feeds[getId(title)] = Feed(title, url)
610 def removeFeed(self, key):
611 del self.listOfFeeds[key]
612 self.sortedKeys.remove(key)
614 if isdir(self.configdir+key+".d/"):
615 rmtree(self.configdir+key+".d/")
618 def saveConfig(self):
619 self.listOfFeeds["feedingit-order"] = self.sortedKeys
620 file = open(self.configdir+"feeds.pickle", "w")
621 pickle.dump(self.listOfFeeds, file)
623 file = open(self.configdir+"images.pickle", "w")
624 pickle.dump(self.imageHandler, file)
627 def moveUp(self, key):
628 index = self.sortedKeys.index(key)
629 self.sortedKeys[index] = self.sortedKeys[index-1]
630 self.sortedKeys[index-1] = key
632 def moveDown(self, key):
633 index = self.sortedKeys.index(key)
634 index2 = (index+1)%len(self.sortedKeys)
635 self.sortedKeys[index] = self.sortedKeys[index2]
636 self.sortedKeys[index2] = key
638 if __name__ == "__main__":
639 listing = Listing('/home/user/.feedingit/')
640 list = listing.getListOfFeeds()[:]
643 if key.startswith('d8'):
644 print listing.getFeedUpdateTime(key)