1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # ============================================================================
21 # Author : Yves Marcoz
23 # Description : Simple RSS Reader
24 # ============================================================================
26 from os.path import isfile, isdir
27 from shutil import rmtree
28 from os import mkdir, remove, utime
34 from BeautifulSoup import BeautifulSoup
35 from urlparse import urljoin
37 #CONFIGDIR="/home/user/.feedingit/"
40 return md5.new(string).hexdigest()
44 # if gconf.client_get_default().get_bool('/system/http_proxy/use_http_proxy'):
45 # port = gconf.client_get_default().get_int('/system/http_proxy/port')
46 # http = gconf.client_get_default().get_string('/system/http_proxy/host')
47 # proxy = proxy = urllib2.ProxyHandler( {"http":"http://%s:%s/"% (http,port)} )
48 # return (True, proxy)
49 # return (False, None)
51 # Enable proxy support for images and ArchivedArticles
52 #(proxy_support, proxy) = getProxy()
54 # opener = urllib2.build_opener(proxy)
55 # urllib2.install_opener(opener)
57 # Entry = {"title":XXX, "content":XXX, "date":XXX, "link":XXX, images = [] }
60 def __init__(self, configdir):
64 def __init__(self, uniqueId, name, url):
72 self.updateTime = "Never"
73 self.uniqueId = uniqueId
75 def addImage(self, configdir, key, baseurl, url):
76 filename = configdir+key+".d/"+getId(url)
77 if not isfile(filename):
79 #if url.startswith("http"):
80 # f = urllib2.urlopen(url)
82 f = urllib2.urlopen(urljoin(baseurl,url))
83 outf = open(filename, "w")
88 print "Could not download " + url
90 #open(filename,"a").close() # "Touch" the file
91 file = open(filename,"a")
96 def editFeed(self, url):
99 def saveFeed(self, configdir):
100 if not isdir(configdir+self.uniqueId+".d"):
101 mkdir(configdir+self.uniqueId+".d")
102 file = open(configdir+self.uniqueId+".d/feed", "w")
103 pickle.dump(self, file )
105 self.saveUnread(configdir)
107 def saveUnread(self, configdir):
108 if not isdir(configdir+self.uniqueId+".d"):
109 mkdir(configdir+self.uniqueId+".d")
110 file = open(configdir+self.uniqueId+".d/unread", "w")
111 pickle.dump(self.readItems, file )
114 def reloadUnread(self, configdir):
116 file = open(configdir+self.uniqueId+".d/unread", "r")
117 self.readItems = pickle.load( file )
120 for id in self.getIds():
121 if self.readItems[id]==False:
122 self.countUnread = self.countUnread + 1
125 return self.countUnread
127 def updateFeed(self, configdir, expiryTime=24, proxy=None, imageCache=False):
128 # Expiry time is in hours
130 tmp=feedparser.parse(self.url)
132 tmp=feedparser.parse(self.url, handlers = [proxy])
133 expiry = float(expiryTime) * 3600.
134 # Check if the parse was succesful (number of entries > 0, else do nothing)
135 if len(tmp["entries"])>0:
136 if not isdir(configdir+self.uniqueId+".d"):
137 mkdir(configdir+self.uniqueId+".d")
139 f = urllib2.urlopen(urljoin(tmp["feed"]["link"],"/favicon.ico"))
142 outf = open(configdir+self.uniqueId+".d/favicon.ico", "w")
148 traceback.print_exc()
151 #reversedEntries = self.getEntries()
152 #reversedEntries.reverse()
154 currentTime = time.time()
157 for entry in tmp["entries"]:
158 (dateTuple, date) = self.extractDate(entry)
162 entry["title"] = "No Title"
167 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
168 "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[] }
169 id = self.generateUniqueId(tmpEntry)
171 #articleTime = time.mktime(self.entries[id]["dateTuple"])
172 if not id in self.ids:
173 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
175 baseurl = tmpEntry["link"]
179 filename = self.addImage(configdir, self.uniqueId, baseurl, img['src'])
181 tmpEntry["images"].append(filename)
183 print "Error downloading image %s" % img
184 tmpEntry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html"
185 file = open(tmpEntry["contentLink"], "w")
186 file.write(soup.prettify())
188 tmpEntries[id] = tmpEntry
190 if id not in self.readItems:
191 self.readItems[id] = False
194 filename = configdir+self.uniqueId+".d/"+id+".html"
195 file = open(filename,"a")
196 utime(filename, None)
198 for image in self.entries[id]["images"]:
199 file = open(image,"a")
204 tmpEntries[id] = self.entries[id]
208 for entryId in oldIds:
209 if not entryId in tmpIds:
211 articleTime = time.mktime(self.entries[entryId]["dateTuple"])
212 if (currentTime - articleTime > 2*expiry):
213 self.removeEntry(entryId)
215 if (currentTime - articleTime > expiry) and (self.isEntryRead(entryId)):
216 # Entry is over 24 hours, and already read
217 self.removeEntry(entryId)
219 tmpEntries[entryId] = self.entries[entryId]
220 tmpIds.append(entryId)
222 print "Error purging old articles %s" % entryId
223 self.removeEntry(entryId)
225 self.entries = tmpEntries
232 if not self.readItems.has_key(id):
233 self.readItems[id] = False
234 if self.readItems[id]==False:
235 tmpUnread = tmpUnread + 1
236 keys = self.readItems.keys()
238 if not id in self.ids:
239 del self.readItems[id]
241 self.countUnread = tmpUnread
242 self.updateTime = time.asctime()
243 self.saveFeed(configdir)
244 from glob import glob
246 for file in glob(configdir+self.uniqueId+".d/*"):
250 # put the two dates into matching format
252 lastmodDate = stats[8]
254 expDate = time.time()-expiry*3
255 # check if image-last-modified-date is outdated
257 if expDate > lastmodDate:
261 #print 'Removing', file
263 remove(file) # commented out for testing
267 print 'Could not remove', file
270 def extractContent(self, entry):
272 if entry.has_key('summary'):
273 content = entry.get('summary', '')
274 if entry.has_key('content'):
275 if len(entry.content[0].value) > len(content):
276 content = entry.content[0].value
278 content = entry.get('description', '')
281 def extractDate(self, entry):
282 if entry.has_key("updated_parsed"):
283 date1 = entry["updated_parsed"]
284 date = time.strftime("%a, %d %b %Y %H:%M:%S",entry["updated_parsed"])
285 elif entry.has_key("published_parsed"):
286 date1 = entry["published_parsed"]
287 date = time.strftime("%a, %d %b %Y %H:%M:%S", entry["published_parsed"])
294 def setEntryRead(self, id):
295 if self.readItems[id]==False:
296 self.countUnread = self.countUnread - 1
297 self.readItems[id] = True
299 def setEntryUnread(self, id):
300 if self.readItems[id]==True:
301 self.countUnread = self.countUnread + 1
302 self.readItems[id] = False
304 def isEntryRead(self, id):
305 return self.readItems[id]
307 def getTitle(self, id):
308 return self.entries[id]["title"]
310 def getContentLink(self, id):
311 if self.entries[id].has_key("contentLink"):
312 return self.entries[id]["contentLink"]
313 return self.entries[id]["link"]
315 def getExternalLink(self, id):
316 return self.entries[id]["link"]
318 def getDate(self, id):
319 return self.entries[id]["date"]
321 def getDateTuple(self, id):
322 return self.entries[id]["dateTuple"]
324 def getUniqueId(self, index):
325 return self.ids[index]
327 def generateUniqueId(self, entry):
328 return getId(entry["date"] + entry["title"])
330 def getUpdateTime(self):
331 return self.updateTime
333 def getEntries(self):
339 def getNextId(self, id):
340 return self.ids[(self.ids.index(id)+1) % self.getNumberOfEntries()]
342 def getPreviousId(self, id):
343 return self.ids[(self.ids.index(id)-1) % self.getNumberOfEntries()]
345 def getNumberOfUnreadItems(self):
346 return self.countUnread
348 def getNumberOfEntries(self):
351 def getItem(self, id):
353 return self.entries[id]
357 def getContent(self, id):
358 if self.entries[id].has_key("contentLink"):
359 file = open(self.entries[id]["contentLink"])
360 content = file.read()
363 return self.entries[id]["content"]
365 def removeEntry(self, id):
367 if self.entries.has_key(id):
368 entry = self.entries[id]
370 if entry.has_key("contentLink"):
372 remove(entry["contentLink"]) #os.remove
374 print "File not found for deletion: %s" % entry["contentLink"]
377 print "Entries has no %s key" % id
381 print "Ids has no %s key" % id
382 if self.readItems.has_key(id):
383 if self.readItems[id]==False:
384 self.countUnread = self.countUnread - 1
385 del self.readItems[id]
387 print "ReadItems has no %s key" % id
389 # print "Error removing entry %s" %id
391 def getArticle(self, entry):
392 #self.setEntryRead(id)
393 #entry = self.entries[id]
394 title = entry['title']
395 #content = entry.get('content', entry.get('summary_detail', {}))
396 content = entry["content"]
401 #text = '''<div style="color: black; background-color: white;">'''
402 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
403 text += "<html><head><title>" + title + "</title>"
404 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
405 #text += '<style> body {-webkit-user-select: none;} </style>'
406 text += '</head><body><div><a href=\"' + link + '\">' + title + "</a>"
407 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
408 text += "<BR /><BR />"
410 text += "</body></html>"
413 class ArchivedArticles(Feed):
414 def addArchivedArticle(self, title, link, updated_parsed, configdir):
416 entry["title"] = title
418 entry["summary"] = '<a href=\"' + link + '\">' + title + "</a>"
419 entry["updated_parsed"] = updated_parsed
420 entry["time"] = time.time()
422 (dateTuple, date) = self.extractDate(entry)
423 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
424 "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[], "downloaded":False, "time":entry["time"] }
425 id = self.generateUniqueId(tmpEntry)
426 self.entries[id] = tmpEntry
428 self.readItems[id] = False
429 self.countUnread = self.countUnread + 1
430 self.saveFeed(configdir)
431 self.saveUnread(configdir)
433 def updateFeed(self, configdir, expiryTime=24, proxy=None, imageCache=False):
434 for id in self.getIds():
435 entry = self.entries[id]
436 if not entry["downloaded"]:
438 f = urllib2.urlopen(entry["link"])
439 #entry["content"] = f.read()
442 soup = BeautifulSoup(html)
444 baseurl = entry["link"]
446 filename = self.addImage(configdir, self.uniqueId, baseurl, img['src'])
448 entry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html"
449 file = open(entry["contentLink"], "w")
450 file.write(soup.prettify())
452 if len(entry["content"]) > 0:
453 entry["downloaded"] = True
454 entry["time"] = time.time()
455 self.setEntryUnread(id)
458 #currentTime = time.time()
459 #expiry = float(expiryTime) * 3600
460 #if currentTime - entry["time"] > expiry:
461 # if self.isEntryRead(id):
462 # self.removeEntry(id)
464 # if currentTime - entry["time"] > 2*expiry:
465 # self.removeEntry(id)
466 self.updateTime = time.asctime()
467 self.saveFeed(configdir)
469 def purgeReadArticles(self):
472 entry = self.entries[id]
473 if self.isEntryRead(id):
476 def removeArticle(self, id):
479 def getArticle(self, index):
480 self.setEntryRead(index)
481 content = self.getContent(index)
486 # Lists all the feeds in a dictionary, and expose the data
487 def __init__(self, configdir):
488 self.configdir = configdir
490 if isfile(self.configdir+"feeds.pickle"):
491 file = open(self.configdir+"feeds.pickle")
492 self.listOfFeeds = pickle.load(file)
495 self.listOfFeeds = {getId("Slashdot"):{"title":"Slashdot", "url":"http://rss.slashdot.org/Slashdot/slashdot", "unread":0, "updateTime":"Never"}, }
496 if self.listOfFeeds.has_key("font"):
497 del self.listOfFeeds["font"]
498 if self.listOfFeeds.has_key("feedingit-order"):
499 self.sortedKeys = self.listOfFeeds["feedingit-order"]
501 self.sortedKeys = self.listOfFeeds.keys()
502 if "font" in self.sortedKeys:
503 self.sortedKeys.remove("font")
504 self.sortedKeys.sort(key=lambda obj: self.getFeedTitle(obj))
505 list = self.sortedKeys[:]
506 #self.closeCurrentlyDisplayedFeed()
508 def addArchivedArticle(self, key, index):
509 feed = self.getFeed(key)
510 title = feed.getTitle(index)
511 link = feed.getExternalLink(index)
512 date = feed.getDateTuple(index)
513 if not self.listOfFeeds.has_key("ArchivedArticles"):
514 self.listOfFeeds["ArchivedArticles"] = {"title":"Archived Articles", "url":"", "unread":0, "updateTime":"Never"}
515 self.sortedKeys.append("ArchivedArticles")
516 #self.feeds["Archived Articles"] = ArchivedArticles("Archived Articles", "")
518 archFeed = self.getFeed("ArchivedArticles")
519 archFeed.addArchivedArticle(title, link, date, self.configdir)
520 self.listOfFeeds[key]["unread"] = archFeed.getNumberOfUnreadItems()
522 def loadFeed(self, key):
523 if isfile(self.configdir+key+".d/feed"):
524 file = open(self.configdir+key+".d/feed")
525 feed = pickle.load(file)
529 except AttributeError:
530 feed.uniqueId = getId(feed.name)
532 del feed.imageHandler
535 #feed.reloadUnread(self.configdir)
538 title = self.listOfFeeds[key]["title"]
539 url = self.listOfFeeds[key]["url"]
540 if key == "ArchivedArticles":
541 feed = ArchivedArticles("ArchivedArticles", title, url)
543 feed = Feed(getId(title), title, url)
546 def updateFeeds(self, expiryTime=24, proxy=None, imageCache=False):
547 for key in self.getListOfFeeds():
548 feed = self.loadFeed(key)
549 feed.updateFeed(self.configdir, expiryTime, proxy, imageCache)
550 self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
551 self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
553 def updateFeed(self, key, expiryTime=24, proxy=None, imageCache=False):
554 feed = self.getFeed(key)
555 feed.updateFeed(self.configdir, expiryTime, proxy, imageCache)
556 self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
557 self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
559 def editFeed(self, key, title, url):
560 self.listOfFeeds[key]["title"] = title
561 self.listOfFeeds[key]["url"] = url
562 feed = self.loadFeed(key)
565 def getFeed(self, key):
567 feed = self.loadFeed(key)
568 feed.reloadUnread(self.configdir)
570 # If the feed file gets corrupted, we need to reset the feed.
572 traceback.print_exc()
574 bus = dbus.SessionBus()
575 remote_object = bus.get_object("org.freedesktop.Notifications", # Connection name
576 "/org/freedesktop/Notifications" # Object's path
578 iface = dbus.Interface(remote_object, 'org.freedesktop.Notifications')
579 iface.SystemNoteInfoprint("Error opening feed %s, it has been reset." % self.getFeedTitle(key))
580 if isdir(self.configdir+key+".d/"):
581 rmtree(self.configdir+key+".d/")
582 feed = self.loadFeed(key)
585 def getFeedUpdateTime(self, key):
586 #print self.listOfFeeds.has_key(key)
587 if not self.listOfFeeds[key].has_key("updateTime"):
588 self.listOfFeeds[key]["updateTime"] = "Never"
589 return self.listOfFeeds[key]["updateTime"]
591 def getFeedNumberOfUnreadItems(self, key):
592 if not self.listOfFeeds[key].has_key("unread"):
593 self.listOfFeeds[key]["unread"] = 0
594 return self.listOfFeeds[key]["unread"]
596 def updateUnread(self, key, unreadItems):
597 self.listOfFeeds[key]["unread"] = unreadItems
599 def getFeedTitle(self, key):
600 return self.listOfFeeds[key]["title"]
602 def getFeedUrl(self, key):
603 return self.listOfFeeds[key]["url"]
605 def getListOfFeeds(self):
606 return self.sortedKeys
608 def getFavicon(self, key):
609 filename = self.configdir+key+".d/favicon.ico"
615 def addFeed(self, title, url):
616 if not self.listOfFeeds.has_key(getId(title)):
617 self.listOfFeeds[getId(title)] = {"title":title, "url":url, "unread":0, "updateTime":"Never"}
618 self.sortedKeys.append(getId(title))
620 #self.feeds[getId(title)] = Feed(title, url)
625 def removeFeed(self, key):
626 del self.listOfFeeds[key]
627 self.sortedKeys.remove(key)
629 if isdir(self.configdir+key+".d/"):
630 rmtree(self.configdir+key+".d/")
633 def saveConfig(self):
634 self.listOfFeeds["feedingit-order"] = self.sortedKeys
635 file = open(self.configdir+"feeds.pickle", "w")
636 pickle.dump(self.listOfFeeds, file)
639 def moveUp(self, key):
640 index = self.sortedKeys.index(key)
641 self.sortedKeys[index] = self.sortedKeys[index-1]
642 self.sortedKeys[index-1] = key
644 def moveDown(self, key):
645 index = self.sortedKeys.index(key)
646 index2 = (index+1)%len(self.sortedKeys)
647 self.sortedKeys[index] = self.sortedKeys[index2]
648 self.sortedKeys[index2] = key
650 if __name__ == "__main__":
651 listing = Listing('/home/user/.feedingit/')
652 list = listing.getListOfFeeds()[:]
655 if key.startswith('d8'):
656 print listing.getFeedUpdateTime(key)