1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # ============================================================================
21 # Author : Yves Marcoz
23 # Description : Simple RSS Reader
24 # ============================================================================
26 from os.path import isfile, isdir
27 from shutil import rmtree
28 from os import mkdir, remove, utime
34 from BeautifulSoup import BeautifulSoup
35 from urlparse import urljoin
37 #CONFIGDIR="/home/user/.feedingit/"
40 return md5.new(string).hexdigest()
44 # if gconf.client_get_default().get_bool('/system/http_proxy/use_http_proxy'):
45 # port = gconf.client_get_default().get_int('/system/http_proxy/port')
46 # http = gconf.client_get_default().get_string('/system/http_proxy/host')
47 # proxy = proxy = urllib2.ProxyHandler( {"http":"http://%s:%s/"% (http,port)} )
48 # return (True, proxy)
49 # return (False, None)
51 # Enable proxy support for images and ArchivedArticles
52 #(proxy_support, proxy) = getProxy()
54 # opener = urllib2.build_opener(proxy)
55 # urllib2.install_opener(opener)
57 # Entry = {"title":XXX, "content":XXX, "date":XXX, "link":XXX, images = [] }
60 def __init__(self, configdir):
64 def __init__(self, uniqueId, name, url):
72 self.updateTime = "Never"
74 self.uniqueId = uniqueId
78 def addImage(self, configdir, key, baseurl, url):
79 filename = configdir+key+".d/"+getId(url)
80 if not isfile(filename):
82 #if url.startswith("http"):
83 # f = urllib2.urlopen(url)
85 f = urllib2.urlopen(urljoin(baseurl,url))
86 outf = open(filename, "w")
91 print "Could not download " + url
93 #open(filename,"a").close() # "Touch" the file
94 file = open(filename,"a")
99 def editFeed(self, url):
102 def saveFeed(self, configdir):
103 if not isdir(configdir+self.uniqueId+".d"):
104 mkdir(configdir+self.uniqueId+".d")
105 file = open(configdir+self.uniqueId+".d/feed", "w")
106 pickle.dump(self, file )
108 self.saveUnread(configdir)
110 def saveUnread(self, configdir):
111 if not isdir(configdir+self.uniqueId+".d"):
112 mkdir(configdir+self.uniqueId+".d")
113 file = open(configdir+self.uniqueId+".d/unread", "w")
114 pickle.dump(self.readItems, file )
117 def reloadUnread(self, configdir):
119 file = open(configdir+self.uniqueId+".d/unread", "r")
120 self.readItems = pickle.load( file )
123 for id in self.getIds():
124 if self.readItems[id]==False:
125 self.countUnread = self.countUnread + 1
128 return self.countUnread
130 def updateFeed(self, configdir, expiryTime=24, proxy=None, imageCache=False):
131 # Expiry time is in hours
133 tmp=feedparser.parse(self.url, etag = self.etag, modified = self.modified)
135 tmp=feedparser.parse(self.url, etag = self.etag, modified = self.modified, handlers = [proxy])
136 expiry = float(expiryTime) * 3600.
138 # Check if the parse was succesful (number of entries > 0, else do nothing)
139 if len(tmp["entries"])>0:
140 # The etag and modified value should only be updated if the content was not null
142 self.etag = tmp["etag"]
146 self.modified = tmp["modified"]
149 #if len(tmp["entries"])>0:
150 if not isdir(configdir+self.uniqueId+".d"):
151 mkdir(configdir+self.uniqueId+".d")
153 f = urllib2.urlopen(urljoin(tmp["feed"]["link"],"/favicon.ico"))
156 outf = open(configdir+self.uniqueId+".d/favicon.ico", "w")
162 #traceback.print_exc()
166 #reversedEntries = self.getEntries()
167 #reversedEntries.reverse()
169 currentTime = time.time()
172 for entry in tmp["entries"]:
173 (dateTuple, date) = self.extractDate(entry)
177 entry["title"] = "No Title"
182 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
183 "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[] }
184 id = self.generateUniqueId(tmpEntry)
186 #articleTime = time.mktime(self.entries[id]["dateTuple"])
187 if not id in self.ids:
188 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
190 baseurl = tmpEntry["link"]
194 filename = self.addImage(configdir, self.uniqueId, baseurl, img['src'])
196 tmpEntry["images"].append(filename)
198 print "Error downloading image %s" % img
199 tmpEntry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html"
200 file = open(tmpEntry["contentLink"], "w")
201 file.write(soup.prettify())
203 tmpEntries[id] = tmpEntry
205 if id not in self.readItems:
206 self.readItems[id] = False
209 filename = configdir+self.uniqueId+".d/"+id+".html"
210 file = open(filename,"a")
211 utime(filename, None)
213 for image in self.entries[id]["images"]:
214 file = open(image,"a")
219 tmpEntries[id] = self.entries[id]
223 for entryId in oldIds:
224 if not entryId in tmpIds:
226 articleTime = time.mktime(self.entries[entryId]["dateTuple"])
227 if (currentTime - articleTime > 2*expiry):
228 self.removeEntry(entryId)
230 if (currentTime - articleTime > expiry) and (self.isEntryRead(entryId)):
231 # Entry is over 24 hours, and already read
232 self.removeEntry(entryId)
234 tmpEntries[entryId] = self.entries[entryId]
235 tmpIds.append(entryId)
237 print "Error purging old articles %s" % entryId
238 self.removeEntry(entryId)
240 self.entries = tmpEntries
247 if not self.readItems.has_key(id):
248 self.readItems[id] = False
249 if self.readItems[id]==False:
250 tmpUnread = tmpUnread + 1
251 keys = self.readItems.keys()
253 if not id in self.ids:
254 del self.readItems[id]
256 self.countUnread = tmpUnread
257 self.updateTime = time.asctime()
258 self.updateStamp = currentTime
259 self.saveFeed(configdir)
260 from glob import glob
262 for file in glob(configdir+self.uniqueId+".d/*"):
266 # put the two dates into matching format
268 lastmodDate = stats[8]
270 expDate = time.time()-expiry*3
271 # check if image-last-modified-date is outdated
273 if expDate > lastmodDate:
277 #print 'Removing', file
279 remove(file) # commented out for testing
283 print 'Could not remove', file
286 def extractContent(self, entry):
288 if entry.has_key('summary'):
289 content = entry.get('summary', '')
290 if entry.has_key('content'):
291 if len(entry.content[0].value) > len(content):
292 content = entry.content[0].value
294 content = entry.get('description', '')
297 def extractDate(self, entry):
298 if entry.has_key("updated_parsed"):
299 date1 = entry["updated_parsed"]
300 date = time.strftime("%a, %d %b %Y %H:%M:%S",entry["updated_parsed"])
301 elif entry.has_key("published_parsed"):
302 date1 = entry["published_parsed"]
303 date = time.strftime("%a, %d %b %Y %H:%M:%S", entry["published_parsed"])
310 def setEntryRead(self, id):
311 if self.readItems[id]==False:
312 self.countUnread = self.countUnread - 1
313 self.readItems[id] = True
315 def setEntryUnread(self, id):
316 if self.readItems[id]==True:
317 self.countUnread = self.countUnread + 1
318 self.readItems[id] = False
320 def isEntryRead(self, id):
321 # Check if an entry is read; return False if the read
322 # status of an entry is unknown (id not in readItems)
323 return self.readItems.get(id, False)
325 def getTitle(self, id):
326 return self.entries[id]["title"]
328 def getContentLink(self, id):
329 if self.entries[id].has_key("contentLink"):
330 return self.entries[id]["contentLink"]
331 return self.entries[id]["link"]
333 def getExternalLink(self, id):
334 return self.entries[id]["link"]
336 def getDate(self, id):
337 return self.entries[id]["date"]
339 def getDateTuple(self, id):
340 return self.entries[id]["dateTuple"]
342 def getUniqueId(self, index):
343 return self.ids[index]
345 def generateUniqueId(self, entry):
346 return getId(entry["date"] + entry["title"])
348 def getUpdateTime(self):
349 return self.updateTime
351 def getUpdateStamp(self):
353 return self.updateStamp
356 return self.updateStamp
358 def getEntries(self):
364 def getNextId(self, id):
365 return self.ids[(self.ids.index(id)+1) % self.getNumberOfEntries()]
367 def getPreviousId(self, id):
368 return self.ids[(self.ids.index(id)-1) % self.getNumberOfEntries()]
370 def getNumberOfUnreadItems(self):
371 return self.countUnread
373 def getNumberOfEntries(self):
376 def getItem(self, id):
378 return self.entries[id]
382 def getImages(self, id):
383 return self.entries[id]["images"]
385 def getContent(self, id):
386 if self.entries[id].has_key("contentLink"):
387 file = open(self.entries[id]["contentLink"])
388 content = file.read()
391 return self.entries[id]["content"]
393 def removeEntry(self, id):
395 if self.entries.has_key(id):
396 entry = self.entries[id]
398 if entry.has_key("contentLink"):
400 remove(entry["contentLink"]) #os.remove
402 print "File not found for deletion: %s" % entry["contentLink"]
405 print "Entries has no %s key" % id
409 print "Ids has no %s key" % id
410 if self.readItems.has_key(id):
411 if self.readItems[id]==False:
412 self.countUnread = self.countUnread - 1
413 del self.readItems[id]
415 print "ReadItems has no %s key" % id
417 # print "Error removing entry %s" %id
419 def getArticle(self, entry):
420 #self.setEntryRead(id)
421 #entry = self.entries[id]
422 title = entry['title']
423 #content = entry.get('content', entry.get('summary_detail', {}))
424 content = entry["content"]
429 #text = '''<div style="color: black; background-color: white;">'''
430 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
431 text += "<html><head><title>" + title + "</title>"
432 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
433 #text += '<style> body {-webkit-user-select: none;} </style>'
434 text += '</head><body><div><a href=\"' + link + '\">' + title + "</a>"
435 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
436 text += "<BR /><BR />"
438 text += "</body></html>"
441 class ArchivedArticles(Feed):
442 def addArchivedArticle(self, title, link, updated_parsed, configdir):
444 entry["title"] = title
446 entry["summary"] = '<a href=\"' + link + '\">' + title + "</a>"
447 entry["updated_parsed"] = updated_parsed
448 entry["time"] = time.time()
450 (dateTuple, date) = self.extractDate(entry)
451 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
452 "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[], "downloaded":False, "time":entry["time"] }
453 id = self.generateUniqueId(tmpEntry)
454 self.entries[id] = tmpEntry
456 self.readItems[id] = False
457 self.countUnread = self.countUnread + 1
458 self.saveFeed(configdir)
459 self.saveUnread(configdir)
461 def updateFeed(self, configdir, expiryTime=24, proxy=None, imageCache=False):
462 for id in self.getIds():
463 entry = self.entries[id]
464 if not entry["downloaded"]:
466 f = urllib2.urlopen(entry["link"])
467 #entry["content"] = f.read()
470 soup = BeautifulSoup(html)
472 baseurl = entry["link"]
474 filename = self.addImage(configdir, self.uniqueId, baseurl, img['src'])
476 entry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html"
477 file = open(entry["contentLink"], "w")
478 file.write(soup.prettify())
480 if len(entry["content"]) > 0:
481 entry["downloaded"] = True
482 entry["time"] = time.time()
483 self.setEntryUnread(id)
486 #currentTime = time.time()
487 #expiry = float(expiryTime) * 3600
488 #if currentTime - entry["time"] > expiry:
489 # if self.isEntryRead(id):
490 # self.removeEntry(id)
492 # if currentTime - entry["time"] > 2*expiry:
493 # self.removeEntry(id)
494 self.updateTime = time.asctime()
495 self.updateStamp = time.time()
496 self.saveFeed(configdir)
498 def purgeReadArticles(self):
501 entry = self.entries[id]
502 if self.isEntryRead(id):
505 def removeArticle(self, id):
508 def getArticle(self, index):
509 self.setEntryRead(index)
510 content = self.getContent(index)
515 # Lists all the feeds in a dictionary, and expose the data
516 def __init__(self, configdir):
517 self.configdir = configdir
519 if isfile(self.configdir+"feeds.pickle"):
520 file = open(self.configdir+"feeds.pickle")
521 self.listOfFeeds = pickle.load(file)
524 self.listOfFeeds = {getId("Maemo News"):{"title":"Maemo News", "url":"http://maemo.org/news/items.xml", "unread":0, "updateTime":"Never"}, }
525 if self.listOfFeeds.has_key("font"):
526 del self.listOfFeeds["font"]
527 if self.listOfFeeds.has_key("feedingit-order"):
528 self.sortedKeys = self.listOfFeeds["feedingit-order"]
530 self.sortedKeys = self.listOfFeeds.keys()
531 if "font" in self.sortedKeys:
532 self.sortedKeys.remove("font")
533 self.sortedKeys.sort(key=lambda obj: self.getFeedTitle(obj))
534 list = self.sortedKeys[:]
535 #self.closeCurrentlyDisplayedFeed()
537 def addArchivedArticle(self, key, index):
538 feed = self.getFeed(key)
539 title = feed.getTitle(index)
540 link = feed.getExternalLink(index)
541 date = feed.getDateTuple(index)
542 if not self.listOfFeeds.has_key("ArchivedArticles"):
543 self.listOfFeeds["ArchivedArticles"] = {"title":"Archived Articles", "url":"", "unread":0, "updateTime":"Never"}
544 self.sortedKeys.append("ArchivedArticles")
545 #self.feeds["Archived Articles"] = ArchivedArticles("Archived Articles", "")
547 archFeed = self.getFeed("ArchivedArticles")
548 archFeed.addArchivedArticle(title, link, date, self.configdir)
549 self.listOfFeeds[key]["unread"] = archFeed.getNumberOfUnreadItems()
551 def loadFeed(self, key):
552 if isfile(self.configdir+key+".d/feed"):
553 file = open(self.configdir+key+".d/feed")
554 feed = pickle.load(file)
558 except AttributeError:
559 feed.uniqueId = getId(feed.name)
561 del feed.imageHandler
566 except AttributeError:
570 except AttributeError:
572 #feed.reloadUnread(self.configdir)
575 title = self.listOfFeeds[key]["title"]
576 url = self.listOfFeeds[key]["url"]
577 if key == "ArchivedArticles":
578 feed = ArchivedArticles("ArchivedArticles", title, url)
580 feed = Feed(getId(title), title, url)
583 def updateFeeds(self, expiryTime=24, proxy=None, imageCache=False):
584 for key in self.getListOfFeeds():
585 feed = self.loadFeed(key)
586 feed.updateFeed(self.configdir, expiryTime, proxy, imageCache)
587 self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
588 self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
589 self.listOfFeeds[key]["updateStamp"] = feed.getUpdateStamp()
591 def updateFeed(self, key, expiryTime=24, proxy=None, imageCache=False):
592 feed = self.getFeed(key)
593 feed.updateFeed(self.configdir, expiryTime, proxy, imageCache)
594 self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
595 self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
596 self.listOfFeeds[key]["updateStamp"] = feed.getUpdateStamp()
598 def editFeed(self, key, title, url):
599 self.listOfFeeds[key]["title"] = title
600 self.listOfFeeds[key]["url"] = url
601 feed = self.loadFeed(key)
604 def getFeed(self, key):
606 feed = self.loadFeed(key)
607 feed.reloadUnread(self.configdir)
609 # If the feed file gets corrupted, we need to reset the feed.
611 traceback.print_exc()
613 bus = dbus.SessionBus()
614 remote_object = bus.get_object("org.freedesktop.Notifications", # Connection name
615 "/org/freedesktop/Notifications" # Object's path
617 iface = dbus.Interface(remote_object, 'org.freedesktop.Notifications')
618 iface.SystemNoteInfoprint("Error opening feed %s, it has been reset." % self.getFeedTitle(key))
619 if isdir(self.configdir+key+".d/"):
620 rmtree(self.configdir+key+".d/")
621 feed = self.loadFeed(key)
624 def getFeedUpdateTime(self, key):
625 #print self.listOfFeeds.has_key(key)
626 if not self.listOfFeeds[key].has_key("updateTime"):
627 self.listOfFeeds[key]["updateTime"] = "Never"
628 return self.listOfFeeds[key]["updateTime"]
630 def getFeedUpdateStamp(self, key):
631 #print self.listOfFeeds.has_key(key)
632 if not self.listOfFeeds[key].has_key("updateStamp"):
633 self.listOfFeeds[key]["updateStamp"] = 0
634 return self.listOfFeeds[key]["updateStamp"]
636 def getFeedNumberOfUnreadItems(self, key):
637 if not self.listOfFeeds[key].has_key("unread"):
638 self.listOfFeeds[key]["unread"] = 0
639 return self.listOfFeeds[key]["unread"]
641 def updateUnread(self, key, unreadItems):
642 self.listOfFeeds[key]["unread"] = unreadItems
644 def getFeedTitle(self, key):
645 return self.listOfFeeds[key]["title"]
647 def getFeedUrl(self, key):
648 return self.listOfFeeds[key]["url"]
650 def getListOfFeeds(self):
651 return self.sortedKeys
653 def getFavicon(self, key):
654 filename = self.configdir+key+".d/favicon.ico"
660 def addFeed(self, title, url):
661 if not self.listOfFeeds.has_key(getId(title)):
662 self.listOfFeeds[getId(title)] = {"title":title, "url":url, "unread":0, "updateTime":"Never"}
663 self.sortedKeys.append(getId(title))
665 #self.feeds[getId(title)] = Feed(title, url)
670 def removeFeed(self, key):
671 del self.listOfFeeds[key]
672 self.sortedKeys.remove(key)
674 if isdir(self.configdir+key+".d/"):
675 rmtree(self.configdir+key+".d/")
678 def saveConfig(self):
679 self.listOfFeeds["feedingit-order"] = self.sortedKeys
680 file = open(self.configdir+"feeds.pickle", "w")
681 pickle.dump(self.listOfFeeds, file)
684 def moveUp(self, key):
685 index = self.sortedKeys.index(key)
686 self.sortedKeys[index] = self.sortedKeys[index-1]
687 self.sortedKeys[index-1] = key
689 def moveDown(self, key):
690 index = self.sortedKeys.index(key)
691 index2 = (index+1)%len(self.sortedKeys)
692 self.sortedKeys[index] = self.sortedKeys[index2]
693 self.sortedKeys[index2] = key
695 if __name__ == "__main__":
696 listing = Listing('/home/user/.feedingit/')
697 list = listing.getListOfFeeds()[:]
700 if key.startswith('d8'):
701 print listing.getFeedUpdateTime(key)