1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # ============================================================================
21 # Author : Yves Marcoz
23 # Description : Simple RSS Reader
24 # ============================================================================
26 from os.path import isfile, isdir
27 from shutil import rmtree
28 from os import mkdir, remove, utime
34 from BeautifulSoup import BeautifulSoup
35 from urlparse import urljoin
37 logger = logging.getLogger(__name__)
39 #CONFIGDIR="/home/user/.feedingit/"
42 return md5.new(string).hexdigest()
46 # if gconf.client_get_default().get_bool('/system/http_proxy/use_http_proxy'):
47 # port = gconf.client_get_default().get_int('/system/http_proxy/port')
48 # http = gconf.client_get_default().get_string('/system/http_proxy/host')
49 # proxy = proxy = urllib2.ProxyHandler( {"http":"http://%s:%s/"% (http,port)} )
50 # return (True, proxy)
51 # return (False, None)
53 # Enable proxy support for images and ArchivedArticles
54 #(proxy_support, proxy) = getProxy()
56 # opener = urllib2.build_opener(proxy)
57 # urllib2.install_opener(opener)
59 # Entry = {"title":XXX, "content":XXX, "date":XXX, "link":XXX, images = [] }
62 def __init__(self, configdir):
66 def __init__(self, uniqueId, name, url):
74 self.updateTime = "Never"
76 self.uniqueId = uniqueId
80 def addImage(self, configdir, key, baseurl, url):
81 filename = configdir+key+".d/"+getId(url)
82 if not isfile(filename):
84 #if url.startswith("http"):
85 # f = urllib2.urlopen(url)
87 f = urllib2.urlopen(urljoin(baseurl,url))
88 outf = open(filename, "w")
93 logger.error("Could not download " + url)
95 #open(filename,"a").close() # "Touch" the file
96 file = open(filename,"a")
101 def editFeed(self, url):
104 def saveFeed(self, configdir):
105 if not isdir(configdir+self.uniqueId+".d"):
106 mkdir(configdir+self.uniqueId+".d")
107 file = open(configdir+self.uniqueId+".d/feed", "w")
108 pickle.dump(self, file )
110 self.saveUnread(configdir)
112 def saveUnread(self, configdir):
113 if not isdir(configdir+self.uniqueId+".d"):
114 mkdir(configdir+self.uniqueId+".d")
115 file = open(configdir+self.uniqueId+".d/unread", "w")
116 pickle.dump(self.readItems, file )
119 def reloadUnread(self, configdir):
121 file = open(configdir+self.uniqueId+".d/unread", "r")
122 self.readItems = pickle.load( file )
125 for id in self.getIds():
126 if self.readItems[id]==False:
127 self.countUnread = self.countUnread + 1
130 return self.countUnread
132 def updateFeed(self, configdir, expiryTime=24, proxy=None, imageCache=False):
133 # Expiry time is in hours
135 tmp=feedparser.parse(self.url, etag = self.etag, modified = self.modified)
137 tmp=feedparser.parse(self.url, etag = self.etag, modified = self.modified, handlers = [proxy])
138 expiry = float(expiryTime) * 3600.
140 # Check if the parse was succesful (number of entries > 0, else do nothing)
141 if len(tmp["entries"])>0:
142 # The etag and modified value should only be updated if the content was not null
144 self.etag = tmp["etag"]
148 self.modified = tmp["modified"]
151 #if len(tmp["entries"])>0:
152 if not isdir(configdir+self.uniqueId+".d"):
153 mkdir(configdir+self.uniqueId+".d")
155 f = urllib2.urlopen(urljoin(tmp["feed"]["link"],"/favicon.ico"))
158 outf = open(configdir+self.uniqueId+".d/favicon.ico", "w")
164 #traceback.print_exc()
168 #reversedEntries = self.getEntries()
169 #reversedEntries.reverse()
171 currentTime = time.time()
174 for entry in tmp["entries"]:
175 (dateTuple, date) = self.extractDate(entry)
179 entry["title"] = "No Title"
184 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
185 "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[] }
186 id = self.generateUniqueId(tmpEntry)
188 #articleTime = time.mktime(self.entries[id]["dateTuple"])
189 if not id in self.ids:
190 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
192 baseurl = tmpEntry["link"]
196 filename = self.addImage(configdir, self.uniqueId, baseurl, img['src'])
198 tmpEntry["images"].append(filename)
200 logger.error("Error downloading image %s" % img)
201 tmpEntry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html"
202 file = open(tmpEntry["contentLink"], "w")
203 file.write(soup.prettify())
205 tmpEntries[id] = tmpEntry
207 if id not in self.readItems:
208 self.readItems[id] = False
211 filename = configdir+self.uniqueId+".d/"+id+".html"
212 file = open(filename,"a")
213 utime(filename, None)
215 for image in self.entries[id]["images"]:
216 file = open(image,"a")
221 tmpEntries[id] = self.entries[id]
225 for entryId in oldIds:
226 if not entryId in tmpIds:
228 articleTime = time.mktime(self.entries[entryId]["dateTuple"])
229 if (currentTime - articleTime > 2*expiry):
230 self.removeEntry(entryId)
232 if (currentTime - articleTime > expiry) and (self.isEntryRead(entryId)):
233 # Entry is over 24 hours, and already read
234 self.removeEntry(entryId)
236 tmpEntries[entryId] = self.entries[entryId]
237 tmpIds.append(entryId)
239 logger.error("Error purging old articles %s" % entryId)
240 self.removeEntry(entryId)
242 self.entries = tmpEntries
249 if not self.readItems.has_key(id):
250 self.readItems[id] = False
251 if self.readItems[id]==False:
252 tmpUnread = tmpUnread + 1
253 keys = self.readItems.keys()
255 if not id in self.ids:
256 del self.readItems[id]
258 self.countUnread = tmpUnread
259 self.updateTime = time.asctime()
260 self.updateStamp = currentTime
261 self.saveFeed(configdir)
262 from glob import glob
264 for file in glob(configdir+self.uniqueId+".d/*"):
268 # put the two dates into matching format
270 lastmodDate = stats[8]
272 expDate = time.time()-expiry*3
273 # check if image-last-modified-date is outdated
275 if expDate > lastmodDate:
279 #print 'Removing', file
281 remove(file) # commented out for testing
285 logger.error('Could not remove %s' % file)
288 def extractContent(self, entry):
290 if entry.has_key('summary'):
291 content = entry.get('summary', '')
292 if entry.has_key('content'):
293 if len(entry.content[0].value) > len(content):
294 content = entry.content[0].value
296 content = entry.get('description', '')
299 def extractDate(self, entry):
300 if entry.has_key("updated_parsed"):
301 date1 = entry["updated_parsed"]
302 date = time.strftime("%a, %d %b %Y %H:%M:%S",entry["updated_parsed"])
303 elif entry.has_key("published_parsed"):
304 date1 = entry["published_parsed"]
305 date = time.strftime("%a, %d %b %Y %H:%M:%S", entry["published_parsed"])
312 def setEntryRead(self, id):
313 if self.readItems[id]==False:
314 self.countUnread = self.countUnread - 1
315 self.readItems[id] = True
317 def setEntryUnread(self, id):
318 if self.readItems[id]==True:
319 self.countUnread = self.countUnread + 1
320 self.readItems[id] = False
322 def isEntryRead(self, id):
323 # Check if an entry is read; return False if the read
324 # status of an entry is unknown (id not in readItems)
325 return self.readItems.get(id, False)
327 def getTitle(self, id):
328 return self.entries[id]["title"]
330 def getContentLink(self, id):
331 if self.entries[id].has_key("contentLink"):
332 return self.entries[id]["contentLink"]
333 return self.entries[id]["link"]
335 def getExternalLink(self, id):
336 return self.entries[id]["link"]
338 def getDate(self, id):
339 return self.entries[id]["date"]
341 def getDateTuple(self, id):
342 return self.entries[id]["dateTuple"]
344 def getUniqueId(self, index):
345 return self.ids[index]
347 def generateUniqueId(self, entry):
348 return getId(entry["date"] + entry["title"])
350 def getUpdateTime(self):
351 return self.updateTime
353 def getUpdateStamp(self):
355 return self.updateStamp
358 return self.updateStamp
360 def getEntries(self):
366 def getNextId(self, id):
367 return self.ids[(self.ids.index(id)+1) % self.getNumberOfEntries()]
369 def getPreviousId(self, id):
370 return self.ids[(self.ids.index(id)-1) % self.getNumberOfEntries()]
372 def getNumberOfUnreadItems(self):
373 return self.countUnread
375 def getNumberOfEntries(self):
378 def getItem(self, id):
380 return self.entries[id]
384 def getImages(self, id):
385 return self.entries[id]["images"]
387 def getContent(self, id):
388 if self.entries[id].has_key("contentLink"):
389 file = open(self.entries[id]["contentLink"])
390 content = file.read()
393 return self.entries[id]["content"]
395 def removeEntry(self, id):
397 if self.entries.has_key(id):
398 entry = self.entries[id]
400 if entry.has_key("contentLink"):
402 remove(entry["contentLink"]) #os.remove
404 logger.error("File not found for deletion: %s"
405 % entry["contentLink"])
408 logger.error("Entries has no %s key" % id)
412 logger.error("Ids has no %s key" % id)
413 if self.readItems.has_key(id):
414 if self.readItems[id]==False:
415 self.countUnread = self.countUnread - 1
416 del self.readItems[id]
418 logger.error("ReadItems has no %s key" % id)
420 # print "Error removing entry %s" %id
422 def getArticle(self, entry):
423 #self.setEntryRead(id)
424 #entry = self.entries[id]
425 title = entry['title']
426 #content = entry.get('content', entry.get('summary_detail', {}))
427 content = entry["content"]
432 #text = '''<div style="color: black; background-color: white;">'''
433 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
434 text += "<html><head><title>" + title + "</title>"
435 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
436 #text += '<style> body {-webkit-user-select: none;} </style>'
437 text += '</head><body><div><a href=\"' + link + '\">' + title + "</a>"
438 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
439 text += "<BR /><BR />"
441 text += "</body></html>"
444 class ArchivedArticles(Feed):
445 def addArchivedArticle(self, title, link, updated_parsed, configdir):
447 entry["title"] = title
449 entry["summary"] = '<a href=\"' + link + '\">' + title + "</a>"
450 entry["updated_parsed"] = updated_parsed
451 entry["time"] = time.time()
453 (dateTuple, date) = self.extractDate(entry)
454 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
455 "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[], "downloaded":False, "time":entry["time"] }
456 id = self.generateUniqueId(tmpEntry)
457 self.entries[id] = tmpEntry
459 self.readItems[id] = False
460 self.countUnread = self.countUnread + 1
461 self.saveFeed(configdir)
462 self.saveUnread(configdir)
464 def updateFeed(self, configdir, expiryTime=24, proxy=None, imageCache=False):
465 for id in self.getIds():
466 entry = self.entries[id]
467 if not entry["downloaded"]:
469 f = urllib2.urlopen(entry["link"])
470 #entry["content"] = f.read()
473 soup = BeautifulSoup(html)
475 baseurl = entry["link"]
477 filename = self.addImage(configdir, self.uniqueId, baseurl, img['src'])
479 entry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html"
480 file = open(entry["contentLink"], "w")
481 file.write(soup.prettify())
483 if len(entry["content"]) > 0:
484 entry["downloaded"] = True
485 entry["time"] = time.time()
486 self.setEntryUnread(id)
489 #currentTime = time.time()
490 #expiry = float(expiryTime) * 3600
491 #if currentTime - entry["time"] > expiry:
492 # if self.isEntryRead(id):
493 # self.removeEntry(id)
495 # if currentTime - entry["time"] > 2*expiry:
496 # self.removeEntry(id)
497 self.updateTime = time.asctime()
498 self.updateStamp = time.time()
499 self.saveFeed(configdir)
501 def purgeReadArticles(self):
504 entry = self.entries[id]
505 if self.isEntryRead(id):
508 def removeArticle(self, id):
511 def getArticle(self, index):
512 self.setEntryRead(index)
513 content = self.getContent(index)
518 # Lists all the feeds in a dictionary, and expose the data
519 def __init__(self, configdir):
520 self.configdir = configdir
522 if isfile(self.configdir+"feeds.pickle"):
523 file = open(self.configdir+"feeds.pickle")
524 self.listOfFeeds = pickle.load(file)
527 self.listOfFeeds = {getId("Maemo News"):{"title":"Maemo News", "url":"http://maemo.org/news/items.xml", "unread":0, "updateTime":"Never"}, }
528 if self.listOfFeeds.has_key("font"):
529 del self.listOfFeeds["font"]
530 if self.listOfFeeds.has_key("feedingit-order"):
531 self.sortedKeys = self.listOfFeeds["feedingit-order"]
533 self.sortedKeys = self.listOfFeeds.keys()
534 if "font" in self.sortedKeys:
535 self.sortedKeys.remove("font")
536 self.sortedKeys.sort(key=lambda obj: self.getFeedTitle(obj))
537 list = self.sortedKeys[:]
538 #self.closeCurrentlyDisplayedFeed()
540 def addArchivedArticle(self, key, index):
541 feed = self.getFeed(key)
542 title = feed.getTitle(index)
543 link = feed.getExternalLink(index)
544 date = feed.getDateTuple(index)
545 if not self.listOfFeeds.has_key("ArchivedArticles"):
546 self.listOfFeeds["ArchivedArticles"] = {"title":"Archived Articles", "url":"", "unread":0, "updateTime":"Never"}
547 self.sortedKeys.append("ArchivedArticles")
548 #self.feeds["Archived Articles"] = ArchivedArticles("Archived Articles", "")
550 archFeed = self.getFeed("ArchivedArticles")
551 archFeed.addArchivedArticle(title, link, date, self.configdir)
552 self.listOfFeeds[key]["unread"] = archFeed.getNumberOfUnreadItems()
554 def loadFeed(self, key):
555 if isfile(self.configdir+key+".d/feed"):
556 file = open(self.configdir+key+".d/feed")
557 feed = pickle.load(file)
561 except AttributeError:
562 feed.uniqueId = getId(feed.name)
564 del feed.imageHandler
569 except AttributeError:
573 except AttributeError:
575 #feed.reloadUnread(self.configdir)
578 title = self.listOfFeeds[key]["title"]
579 url = self.listOfFeeds[key]["url"]
580 if key == "ArchivedArticles":
581 feed = ArchivedArticles("ArchivedArticles", title, url)
583 feed = Feed(getId(title), title, url)
586 def updateFeeds(self, expiryTime=24, proxy=None, imageCache=False):
587 for key in self.getListOfFeeds():
588 feed = self.loadFeed(key)
589 feed.updateFeed(self.configdir, expiryTime, proxy, imageCache)
590 self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
591 self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
592 self.listOfFeeds[key]["updateStamp"] = feed.getUpdateStamp()
594 def updateFeed(self, key, expiryTime=24, proxy=None, imageCache=False):
595 feed = self.getFeed(key)
596 feed.updateFeed(self.configdir, expiryTime, proxy, imageCache)
597 self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
598 self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
599 self.listOfFeeds[key]["updateStamp"] = feed.getUpdateStamp()
601 def editFeed(self, key, title, url):
602 self.listOfFeeds[key]["title"] = title
603 self.listOfFeeds[key]["url"] = url
604 feed = self.loadFeed(key)
607 def getFeed(self, key):
609 feed = self.loadFeed(key)
610 feed.reloadUnread(self.configdir)
612 # If the feed file gets corrupted, we need to reset the feed.
614 logger.error("getFeed: %s" % traceback.format_exc())
616 bus = dbus.SessionBus()
617 remote_object = bus.get_object("org.freedesktop.Notifications", # Connection name
618 "/org/freedesktop/Notifications" # Object's path
620 iface = dbus.Interface(remote_object, 'org.freedesktop.Notifications')
621 iface.SystemNoteInfoprint("Error opening feed %s, it has been reset." % self.getFeedTitle(key))
622 if isdir(self.configdir+key+".d/"):
623 rmtree(self.configdir+key+".d/")
624 feed = self.loadFeed(key)
627 def getFeedUpdateTime(self, key):
628 #print self.listOfFeeds.has_key(key)
629 if not self.listOfFeeds[key].has_key("updateTime"):
630 self.listOfFeeds[key]["updateTime"] = "Never"
631 return self.listOfFeeds[key]["updateTime"]
633 def getFeedUpdateStamp(self, key):
634 #print self.listOfFeeds.has_key(key)
635 if not self.listOfFeeds[key].has_key("updateStamp"):
636 self.listOfFeeds[key]["updateStamp"] = 0
637 return self.listOfFeeds[key]["updateStamp"]
639 def getFeedNumberOfUnreadItems(self, key):
640 if not self.listOfFeeds[key].has_key("unread"):
641 self.listOfFeeds[key]["unread"] = 0
642 return self.listOfFeeds[key]["unread"]
644 def updateUnread(self, key, unreadItems):
645 self.listOfFeeds[key]["unread"] = unreadItems
647 def getFeedTitle(self, key):
648 return self.listOfFeeds[key]["title"]
650 def getFeedUrl(self, key):
651 return self.listOfFeeds[key]["url"]
653 def getListOfFeeds(self):
654 return self.sortedKeys
656 def getFavicon(self, key):
657 filename = self.configdir+key+".d/favicon.ico"
663 def addFeed(self, title, url):
664 if not self.listOfFeeds.has_key(getId(title)):
665 self.listOfFeeds[getId(title)] = {"title":title, "url":url, "unread":0, "updateTime":"Never"}
666 self.sortedKeys.append(getId(title))
668 #self.feeds[getId(title)] = Feed(title, url)
673 def removeFeed(self, key):
674 del self.listOfFeeds[key]
675 self.sortedKeys.remove(key)
677 if isdir(self.configdir+key+".d/"):
678 rmtree(self.configdir+key+".d/")
681 def saveConfig(self):
682 self.listOfFeeds["feedingit-order"] = self.sortedKeys
683 file = open(self.configdir+"feeds.pickle", "w")
684 pickle.dump(self.listOfFeeds, file)
687 def moveUp(self, key):
688 index = self.sortedKeys.index(key)
689 self.sortedKeys[index] = self.sortedKeys[index-1]
690 self.sortedKeys[index-1] = key
692 def moveDown(self, key):
693 index = self.sortedKeys.index(key)
694 index2 = (index+1)%len(self.sortedKeys)
695 self.sortedKeys[index] = self.sortedKeys[index2]
696 self.sortedKeys[index2] = key
698 if __name__ == "__main__":
699 listing = Listing('/home/user/.feedingit/')
700 list = listing.getListOfFeeds()[:]
703 if key.startswith('d8'):
704 print listing.getFeedUpdateTime(key)