1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # ============================================================================
21 # Author : Yves Marcoz
23 # Description : Simple RSS Reader
24 # ============================================================================
26 from os.path import isfile
27 from os.path import isdir
28 from shutil import rmtree
29 from os import mkdir, remove
35 from BeautifulSoup import BeautifulSoup
36 from urlparse import urlparse
38 #CONFIGDIR="/home/user/.feedingit/"
41 return md5.new(string).hexdigest()
43 # Entry = {"title":XXX, "content":XXX, "date":XXX, "link":XXX, images = [] }
46 def __init__(self, configdir):
47 self.configdir = configdir
50 def addImage(self, key, baseurl, url):
51 filename = self.configdir+key+".d/"+getId(url)
52 if not isfile(filename):
54 if url.startswith("http"):
55 f = urllib2.urlopen(url)
57 f = urllib2.urlopen(baseurl+"/"+url)
58 outf = open(filename, "w")
63 print "Could not download" + url
64 if filename in self.images:
65 self.images[filename] += 1
67 self.images[filename] = 1
70 def removeImage(self, key, filename):
71 #filename = self.configdir+key+".d/"+getId(url)
73 self.images[filename] -= 1
75 self.images[filename] = 0 #Delete image
77 if self.images[filename] == 0:
78 remove(filename) #os.remove
79 del self.images[filename]
81 print "Could not remove image %s" % filename
84 def __init__(self, uniqueId, name, url, imageHandler):
92 self.updateTime = "Never"
93 self.uniqueId = uniqueId
94 self.imageHandler = imageHandler
96 def editFeed(self, url):
99 def saveFeed(self, configdir):
100 if not isdir(configdir+self.uniqueId+".d"):
101 mkdir(configdir+self.uniqueId+".d")
102 file = open(configdir+self.uniqueId+".d/feed", "w")
103 pickle.dump(self, file )
105 self.saveUnread(configdir)
107 def saveUnread(self, configdir):
108 if not isdir(configdir+self.uniqueId+".d"):
109 mkdir(configdir+self.uniqueId+".d")
110 file = open(configdir+self.uniqueId+".d/unread", "w")
111 pickle.dump(self.readItems, file )
114 def reloadUnread(self, configdir):
116 file = open(configdir+self.uniqueId+".d/unread", "r")
117 self.readItems = pickle.load( file )
120 for id in self.getIds():
121 if self.readItems[id]==False:
122 self.countUnread = self.countUnread + 1
125 return self.countUnread
127 def updateFeed(self, configdir, expiryTime=24, proxy=None, imageCache=False):
128 # Expiry time is in hours
130 tmp=feedparser.parse(self.url)
132 tmp=feedparser.parse(self.url, handlers = [proxy])
133 # Check if the parse was succesful (number of entries > 0, else do nothing)
134 if len(tmp["entries"])>0:
135 #reversedEntries = self.getEntries()
136 #reversedEntries.reverse()
137 if not isdir(configdir+self.uniqueId+".d"):
138 mkdir(configdir+self.uniqueId+".d")
141 for entry in tmp["entries"]:
142 (dateTuple, date) = self.extractDate(entry)
143 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
144 "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[] }
145 id = self.generateUniqueId(tmpEntry)
146 if not id in self.ids:
148 soup = BeautifulSoup(tmpEntry["content"])
150 baseurl = ''.join(urlparse(tmpEntry["link"])[:-1])
153 filename = self.imageHandler.addImage(self.uniqueId, baseurl, img['src'])
155 tmpEntry["images"].append(filename)
156 tmpEntry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html"
157 file = open(tmpEntry["contentLink"], "w")
158 file.write(soup.prettify())
160 tmpEntries[id] = tmpEntry
163 for entryId in self.getIds()[:]:
164 currentTime = time.time()
165 expiry = float(expiryTime) * 3600.
167 articleTime = time.mktime(self.entries[entryId]["dateTuple"])
168 if currentTime - articleTime < expiry:
169 tmpEntries[entryId] = self.entries[entryId]
170 tmpIds.append(entryId)
172 if (not self.isEntryRead(entryId)) and (currentTime - articleTime < 2*expiry):
173 tmpEntries[entryId] = self.entries[entryId]
174 tmpIds.append(entryId)
179 print "Error purging old articles %s" % id
182 self.entries = tmpEntries
185 # Initialize the new articles to unread
186 tmpReadItems = self.readItems
188 for id in self.getIds():
189 if not tmpReadItems.has_key(id):
190 self.readItems[id] = False
192 self.readItems[id] = tmpReadItems[id]
193 if self.readItems[id]==False:
194 self.countUnread = self.countUnread + 1
196 self.updateTime = time.asctime()
197 self.saveFeed(configdir)
199 def extractContent(self, entry):
201 if entry.has_key('summary'):
202 content = entry.get('summary', '')
203 if entry.has_key('content'):
204 if len(entry.content[0].value) > len(content):
205 content = entry.content[0].value
207 content = entry.get('description', '')
210 def extractDate(self, entry):
211 if entry.has_key("updated_parsed"):
212 date1 = entry["updated_parsed"]
213 date = time.strftime("%a, %d %b %Y %H:%M:%S",entry["updated_parsed"])
214 elif entry.has_key("published_parsed"):
215 date1 = entry["published_parsed"]
216 date = time.strftime("%a, %d %b %Y %H:%M:%S", entry["published_parsed"])
223 def setEntryRead(self, id):
224 if self.readItems[id]==False:
225 self.countUnread = self.countUnread - 1
226 self.readItems[id] = True
228 def setEntryUnread(self, id):
229 if self.readItems[id]==True:
230 self.countUnread = self.countUnread + 1
231 self.readItems[id] = False
233 def isEntryRead(self, id):
234 return self.readItems[id]
236 def getTitle(self, id):
237 return self.entries[id]["title"]
239 def getContentLink(self, id):
240 if self.entries[id].has_key("contentLink"):
241 return self.entries[id]["contentLink"]
242 return self.entries[id]["link"]
244 def getExternalLink(self, id):
245 return self.entries[id]["link"]
247 def getDate(self, id):
248 return self.entries[id]["date"]
250 def getDateTuple(self, id):
251 return self.entries[id]["dateTuple"]
253 def getUniqueId(self, index):
254 return self.ids[index]
256 def generateUniqueId(self, entry):
257 return getId(entry["date"] + entry["title"])
259 def getUpdateTime(self):
260 return self.updateTime
262 def getEntries(self):
268 def getNextId(self, id):
269 return self.ids[(self.ids.index(id)+1) % self.getNumberOfEntries()]
271 def getPreviousId(self, id):
272 return self.ids[(self.ids.index(id)-1) % self.getNumberOfEntries()]
274 def getNumberOfUnreadItems(self):
275 return self.countUnread
277 def getNumberOfEntries(self):
280 def getItem(self, id):
282 return self.entries[id]
286 def getContent(self, id):
287 if self.entries[id].has_key("contentLink"):
288 file = open(self.entries[id]["contentLink"])
289 content = file.read()
292 return self.entries[id]["content"]
294 def removeEntry(self, id):
296 if self.entries.has_key(id):
297 entry = self.entries[id]
298 if entry.has_key("images"):
299 for img in entry["images"]:
300 self.imageHandler.removeImage(self.uniqueId, img)
302 if entry.has_key("contentLink"):
304 remove(entry["contentLink"]) #os.remove
306 print "File not found for deletion: %s" % entry["contentLink"]
309 print "Entries has no %s key" % id
313 print "Ids has no %s key" % id
314 if self.readItems.has_key(id):
315 if self.readItems[id]==False:
316 self.countUnread = self.countUnread - 1
317 del self.readItems[id]
319 print "ReadItems has no %s key" % id
321 # print "Error removing entry %s" %id
323 def getArticle(self, id):
324 self.setEntryRead(id)
325 entry = self.entries[id]
326 title = entry['title']
327 #content = entry.get('content', entry.get('summary_detail', {}))
328 content = entry["content"]
333 #text = '''<div style="color: black; background-color: white;">'''
334 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
335 text += "<html><head><title>" + title + "</title>"
336 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
337 #text += '<style> body {-webkit-user-select: none;} </style>'
338 text += '</head><body><div><a href=\"' + link + '\">' + title + "</a>"
339 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
340 text += "<BR /><BR />"
342 text += "</body></html>"
345 class ArchivedArticles(Feed):
346 def addArchivedArticle(self, title, link, updated_parsed, configdir):
348 entry["title"] = title
350 entry["summary"] = '<a href=\"' + link + '\">' + title + "</a>"
351 entry["updated_parsed"] = updated_parsed
352 entry["time"] = time.time()
354 (dateTuple, date) = self.extractDate(entry)
355 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
356 "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[], "downloaded":False, "time":entry["time"] }
357 id = self.generateUniqueId(tmpEntry)
358 self.entries[id] = tmpEntry
360 self.readItems[id] = False
361 self.countUnread = self.countUnread + 1
362 self.saveFeed(configdir)
363 self.saveUnread(configdir)
365 def updateFeed(self, configdir, expiryTime=24, proxy=None, imageCache=False):
366 for id in self.getIds():
367 entry = self.entries[id]
368 if not entry["downloaded"]:
370 f = urllib2.urlopen(entry["link"])
371 #entry["content"] = f.read()
374 soup = BeautifulSoup(html)
376 baseurl = ''.join(urlparse(entry["link"])[:-1])
378 filename = self.imageHandler.addImage(self.uniqueId, baseurl, img['src'])
379 #filename = configdir+self.uniqueId+".d/"+getId(img['src'])
380 #if not isfile(filename):
382 # if img['src'].startswith("http"):
383 # f = urllib2.urlopen(img['src'])
385 # f = urllib2.urlopen(baseurl+"/"+img['src'])
386 # #print baseurl+"/"+img['src']
388 # outf = open(filename, "w")
389 # outf.write(f.read())
393 # print "Could not download" + img['src']
395 entry["images"].append(filename)
396 entry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html"
397 file = open(entry["contentLink"], "w")
398 file.write(soup.prettify())
400 if len(entry["content"]) > 0:
401 entry["downloaded"] = True
402 entry["time"] = time.time()
403 self.setEntryUnread(id)
406 currentTime = time.time()
407 expiry = float(expiryTime) * 3600
408 if currentTime - entry["time"] > expiry:
409 if self.isEntryRead(id):
412 if currentTime - entry["time"] > 2*expiry:
414 self.updateTime = time.asctime()
415 self.saveFeed(configdir)
417 def getArticle(self, index):
418 self.setEntryRead(index)
419 content = self.getContent(index)
424 # Lists all the feeds in a dictionary, and expose the data
425 def __init__(self, configdir):
426 self.configdir = configdir
428 if isfile(self.configdir+"feeds.pickle"):
429 file = open(self.configdir+"feeds.pickle")
430 self.listOfFeeds = pickle.load(file)
433 self.listOfFeeds = {getId("Slashdot"):{"title":"Slashdot", "url":"http://rss.slashdot.org/Slashdot/slashdot", "unread":0, "updateTime":"Never"}, }
434 if isfile(self.configdir+"images.pickle"):
435 file = open(self.configdir+"images.pickle")
436 self.imageHandler = pickle.load(file)
439 self.imageHandler = ImageHandler(self.configdir)
440 if self.listOfFeeds.has_key("font"):
441 del self.listOfFeeds["font"]
442 if self.listOfFeeds.has_key("feedingit-order"):
443 self.sortedKeys = self.listOfFeeds["feedingit-order"]
445 self.sortedKeys = self.listOfFeeds.keys()
446 if "font" in self.sortedKeys:
447 self.sortedKeys.remove("font")
448 self.sortedKeys.sort(key=lambda obj: self.getFeedTitle(obj))
449 list = self.sortedKeys[:]
455 #if key.startswith('d8'):
456 #traceback.print_exc()
457 # self.sortedKeys.remove(key)
459 #print key in self.sortedKeys
460 #print "d8eb3f07572892a7b5ed9c81c5bb21a2" in self.sortedKeys
461 #print self.listOfFeeds["d8eb3f07572892a7b5ed9c81c5bb21a2"]
462 self.closeCurrentlyDisplayedFeed()
465 def addArchivedArticle(self, key, index):
466 feed = self.getFeed(key)
467 title = feed.getTitle(index)
468 link = feed.getLink(index)
469 date = feed.getDateTuple(index)
470 if not self.listOfFeeds.has_key("ArchivedArticles"):
471 self.listOfFeeds["ArchivedArticles"] = {"title":"Archived Articles", "url":"", "unread":0, "updateTime":"Never"}
472 self.sortedKeys.append("ArchivedArticles")
473 #self.feeds["Archived Articles"] = ArchivedArticles("Archived Articles", "")
475 archFeed = self.getFeed("ArchivedArticles")
476 archFeed.addArchivedArticle(title, link, date, self.configdir)
477 self.listOfFeeds[key]["unread"] = archFeed.getNumberOfUnreadItems()
479 def loadFeed(self, key):
480 if isfile(self.configdir+key+".d/feed"):
481 file = open(self.configdir+key+".d/feed")
482 feed = pickle.load(file)
487 except AttributeError:
488 feed.uniqueId = getId(feed.name)
489 feed.imageHandler = self.imageHandler
490 #feed.reloadUnread(self.configdir)
493 title = self.listOfFeeds[key]["title"]
494 url = self.listOfFeeds[key]["url"]
495 if key == "ArchivedArticles":
496 feed = ArchivedArticles("ArchivedArticles", title, url, self.imageHandler)
498 feed = Feed(getId(title), title, url, self.imageHandler)
501 def updateFeeds(self, expiryTime=24, proxy=None, imageCache=False):
502 for key in self.getListOfFeeds():
503 feed = self.loadFeed(key)
504 feed.updateFeed(self.configdir, expiryTime, proxy, imageCache)
505 self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
506 self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
508 def updateFeed(self, key, expiryTime=24, proxy=None, imageCache=False):
509 feed = self.getFeed(key)
510 feed.updateFeed(self.configdir, expiryTime, proxy, imageCache)
511 self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
512 self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
514 def editFeed(self, key, title, url):
515 self.listOfFeeds[key]["title"] = title
516 self.listOfFeeds[key]["url"] = url
517 feed = self.loadFeed(key)
520 def getFeed(self, key):
521 feed = self.loadFeed(key)
522 feed.reloadUnread(self.configdir)
525 def getFeedUpdateTime(self, key):
526 #print self.listOfFeeds.has_key(key)
527 if not self.listOfFeeds[key].has_key("updateTime"):
528 self.listOfFeeds[key]["updateTime"] = "Never"
529 return self.listOfFeeds[key]["updateTime"]
531 def getFeedNumberOfUnreadItems(self, key):
532 if not self.listOfFeeds[key].has_key("unread"):
533 self.listOfFeeds[key]["unread"] = 0
534 return self.listOfFeeds[key]["unread"]
536 def updateUnread(self, key, unreadItems):
537 self.listOfFeeds[key]["unread"] = unreadItems
539 def getFeedTitle(self, key):
540 return self.listOfFeeds[key]["title"]
542 def getFeedUrl(self, key):
543 return self.listOfFeeds[key]["url"]
545 def getListOfFeeds(self):
546 return self.sortedKeys
548 #def getNumberOfUnreadItems(self, key):
549 # if self.listOfFeeds.has_key("unread"):
550 # return self.listOfFeeds[key]["unread"]
554 def addFeed(self, title, url):
555 if not self.listOfFeeds.has_key(getId(title)):
556 self.listOfFeeds[getId(title)] = {"title":title, "url":url, "unread":0, "updateTime":"Never"}
557 self.sortedKeys.append(getId(title))
559 #self.feeds[getId(title)] = Feed(title, url)
564 def removeFeed(self, key):
565 del self.listOfFeeds[key]
566 self.sortedKeys.remove(key)
568 if isdir(self.configdir+key+".d/"):
569 rmtree(self.configdir+key+".d/")
572 def saveConfig(self):
573 self.listOfFeeds["feedingit-order"] = self.sortedKeys
574 file = open(self.configdir+"feeds.pickle", "w")
575 pickle.dump(self.listOfFeeds, file)
577 file = open(self.configdir+"images.pickle", "w")
578 pickle.dump(self.imageHandler, file)
581 def moveUp(self, key):
582 index = self.sortedKeys.index(key)
583 self.sortedKeys[index] = self.sortedKeys[index-1]
584 self.sortedKeys[index-1] = key
586 def moveDown(self, key):
587 index = self.sortedKeys.index(key)
588 index2 = (index+1)%len(self.sortedKeys)
589 self.sortedKeys[index] = self.sortedKeys[index2]
590 self.sortedKeys[index2] = key
592 def setCurrentlyDisplayedFeed(self, key):
593 self.currentlyDisplayedFeed = key
594 def closeCurrentlyDisplayedFeed(self):
595 self.currentlyDisplayedFeed = False
596 def getCurrentlyDisplayedFeed(self):
597 return self.currentlyDisplayedFeed
599 if __name__ == "__main__":
600 listing = Listing('/home/user/.feedingit/')
601 list = listing.getListOfFeeds()[:]
604 if key.startswith('d8'):
605 print listing.getFeedUpdateTime(key)