1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # ============================================================================
21 # Author : Yves Marcoz
23 # Description : Simple RSS Reader
24 # ============================================================================
26 from os.path import isfile
27 from os.path import isdir
28 from shutil import rmtree
35 from BeautifulSoup import BeautifulSoup
36 from urlparse import urlparse
38 #CONFIGDIR="/home/user/.feedingit/"
41 return md5.new(string).hexdigest()
43 # Entry = {"title":XXX, "content":XXX, "date":XXX, "link":XXX, images = [] }
46 def __init__(self, configdir):
47 self.configdir = configdir
50 def addImage(self, key, baseurl, url):
51 filename = self.configdir+key+".d/"+getId(url)
52 if not isfile(filename):
54 if url.startswith("http"):
55 f = urllib2.urlopen(url)
57 f = urllib2.urlopen(baseurl+"/"+url)
58 outf = open(filename, "w")
63 print "Could not download" + url
64 if url in self.images:
68 return "file://" + filename
70 def removeImage(self, key, url):
71 filename = self.configdir+key+".d/"+getId(url)
73 if self.images[url] == 0:
82 def setEntryUnread(self, id):
83 if self.readItems.has_key(id):
84 if self.readItems[id]==True:
85 self.countUnread = self.countUnread + 1
86 self.readItems[id] = False
88 self.readItems[id] = False
89 self.countUnread = self.countUnread + 1
91 def setEntryRead(self, id):
92 if self.readItems[id]==False:
93 self.countUnread = self.countUnread - 1
94 self.readItems[id] = True
97 return self.readItems[id]
99 def removeEntry(self, id):
100 if self.readItems[id]==False:
101 self.countUnread = self.countUnread - 1
102 del self.readItems[id]
105 def __init__(self, uniqueId, name, url, imageHandler):
113 self.updateTime = "Never"
114 self.uniqueId = uniqueId
115 self.imageHandler = imageHandler
117 def editFeed(self, url):
120 def saveFeed(self, configdir):
121 if not isdir(configdir+self.uniqueId+".d"):
122 mkdir(configdir+self.uniqueId+".d")
123 file = open(configdir+self.uniqueId+".d/feed", "w")
124 pickle.dump(self, file )
126 self.saveUnread(configdir)
128 def saveUnread(self, configdir):
129 if not isdir(configdir+self.uniqueId+".d"):
130 mkdir(configdir+self.uniqueId+".d")
131 file = open(configdir+self.uniqueId+".d/unread", "w")
132 pickle.dump(self.readItems, file )
135 def reloadUnread(self, configdir):
137 file = open(configdir+self.uniqueId+".d/unread", "r")
138 self.readItems = pickle.load( file )
141 for id in self.getIds():
142 if self.readItems[id]==False:
143 self.countUnread = self.countUnread + 1
146 return self.countUnread
148 def updateFeed(self, configdir, expiryTime=24):
149 # Expiry time is in hours
150 tmp=feedparser.parse(self.url)
151 # Check if the parse was succesful (number of entries > 0, else do nothing)
152 if len(tmp["entries"])>0:
153 #reversedEntries = self.getEntries()
154 #reversedEntries.reverse()
157 for entry in tmp["entries"]:
158 (dateTuple, date) = self.extractDate(entry)
159 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
160 "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[] }
161 id = self.generateUniqueId(tmpEntry)
162 tmpEntries[id] = tmpEntry
164 for entryId in self.getIds():
165 currentTime = time.time()
166 expiry = float(expiryTime) * 3600.
167 articleTime = time.mktime(self.entries[entryId]["dateTuple"])
168 if currentTime - articleTime < expiry:
169 if not entryId in tmpIds:
170 tmpEntries[entryId] = self.entries[entryId]
171 tmpIds.append(entryId)
173 if (not self.isEntryRead(entryId)) and (currentTime - articleTime < 2*expiry):
174 tmpEntries[entryId] = self.entries[entryId]
175 tmpIds.append(entryId)
177 self.entries = tmpEntries
180 # Initialize the new articles to unread
181 tmpReadItems = self.readItems
183 for id in self.getIds():
184 if not tmpReadItems.has_key(id):
185 self.readItems[id] = False
187 self.readItems[id] = tmpReadItems[id]
188 if self.readItems[id]==False:
189 self.countUnread = self.countUnread + 1
191 self.updateTime = time.asctime()
192 self.saveFeed(configdir)
194 def extractContent(self, entry):
196 if entry.has_key('summary'):
197 content = entry.get('summary', '')
198 if entry.has_key('content'):
199 if len(entry.content[0].value) > len(content):
200 content = entry.content[0].value
202 content = entry.get('description', '')
205 def extractDate(self, entry):
206 if entry.has_key("updated_parsed"):
207 date1 = entry["updated_parsed"]
208 date = time.strftime("%a, %d %b %Y %H:%M:%S",entry["updated_parsed"])
209 elif entry.has_key("published_parsed"):
210 date1 = entry["published_parsed"]
211 date = time.strftime("%a, %d %b %Y %H:%M:%S", entry["published_parsed"])
218 def setEntryRead(self, id):
219 if self.readItems[id]==False:
220 self.countUnread = self.countUnread - 1
221 self.readItems[id] = True
223 def setEntryUnread(self, id):
224 if self.readItems[id]==True:
225 self.countUnread = self.countUnread + 1
226 self.readItems[id] = False
228 def isEntryRead(self, id):
229 return self.readItems[id]
231 def getTitle(self, id):
232 return self.entries[id]["title"]
234 def getLink(self, id):
235 if self.entries[id].has_key("contentLink"):
236 return self.entries[id]["contentLink"]
237 return self.entries[id]["link"]
239 def getDate(self, id):
240 return self.entries[id]["date"]
242 def getDateTuple(self, id):
243 return self.entries[id]["dateTuple"]
245 def getUniqueId(self, index):
246 return self.ids[index]
248 def generateUniqueId(self, entry):
249 return getId(entry["date"] + entry["title"])
251 def getUpdateTime(self):
252 return self.updateTime
254 def getEntries(self):
260 def getNextId(self, id):
261 return self.ids[(self.ids.index(id)+1) % self.getNumberOfEntries()]
263 def getPreviousId(self, id):
264 return self.ids[(self.ids.index(id)-1) % self.getNumberOfEntries()]
266 def getNumberOfUnreadItems(self):
267 return self.countUnread
269 def getNumberOfEntries(self):
272 def getItem(self, id):
274 return self.entries[id]
278 def getContent(self, id):
279 if self.entries[id].has_key("contentLink"):
280 file = open(self.entries[id]["contentLink"])
281 content = file.read()
284 return self.entries[id]["content"]
286 def removeEntry(self, id):
287 entry = self.entries[id]
288 for img in entry["images"]:
289 self.imageHandler.removeImage(self.uniqueId, img)
290 if entry.has_key["contentLink"]:
291 os.remove(entry["contentLink"])
292 self.entries.remove(id)
294 if self.readItems[id]==False:
295 self.countUnread = self.countUnread - 1
296 self.readItems.remove(id)
298 def getArticle(self, id):
299 self.setEntryRead(id)
300 entry = self.entries[id]
301 title = entry['title']
302 #content = entry.get('content', entry.get('summary_detail', {}))
303 content = entry["content"]
308 #text = '''<div style="color: black; background-color: white;">'''
309 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
310 text += "<html><head><title>" + title + "</title>"
311 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
312 #text += '<style> body {-webkit-user-select: none;} </style>'
313 text += '</head><body><div><a href=\"' + link + '\">' + title + "</a>"
314 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
315 text += "<BR /><BR />"
317 text += "</body></html>"
320 class ArchivedArticles(Feed):
321 def addArchivedArticle(self, title, link, updated_parsed, configdir):
323 entry["title"] = title
325 entry["summary"] = '<a href=\"' + link + '\">' + title + "</a>"
326 entry["updated_parsed"] = updated_parsed
327 entry["time"] = time.time()
329 (dateTuple, date) = self.extractDate(entry)
330 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
331 "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[], "downloaded":False, "time":entry["time"] }
332 id = self.generateUniqueId(tmpEntry)
333 self.entries[id] = tmpEntry
335 self.readItems[id] = False
336 self.countUnread = self.countUnread + 1
337 self.saveFeed(configdir)
338 self.saveUnread(configdir)
340 def updateFeed(self, configdir, expiryTime=24):
341 for id in self.getIds():
342 entry = self.entries[id]
343 if not entry["downloaded"]:
345 f = urllib2.urlopen(entry["link"])
346 #entry["content"] = f.read()
349 soup = BeautifulSoup(html)
350 images = soup.body('img')
351 baseurl = ''.join(urlparse(entry["link"])[:-1])
353 filename = self.imageHandler.addImage(self.uniqueId, baseurl, img['src'])
354 #filename = configdir+self.uniqueId+".d/"+getId(img['src'])
355 #if not isfile(filename):
357 # if img['src'].startswith("http"):
358 # f = urllib2.urlopen(img['src'])
360 # f = urllib2.urlopen(baseurl+"/"+img['src'])
361 # #print baseurl+"/"+img['src']
363 # outf = open(filename, "w")
364 # outf.write(f.read())
368 # print "Could not download" + img['src']
370 entry["images"].append(filename)
371 entry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html"
372 file = open(entry["contentLink"], "w")
373 file.write(soup.prettify())
375 if len(entry["content"]) > 0:
376 entry["downloaded"] = True
377 entry["time"] = time.time()
378 self.setEntryUnread(id)
381 currentTime = time.time()
382 expiry = float(expiryTime) * 3600
383 if currentTime - entry["time"] > expiry:
384 if self.isEntryRead(id):
387 if currentTime - entry["time"] > 2*expiry:
389 self.updateTime = time.asctime()
390 self.saveFeed(configdir)
392 def getArticle(self, index):
393 self.setEntryRead(index)
394 content = self.getContent(index)
399 # Lists all the feeds in a dictionary, and expose the data
400 def __init__(self, configdir):
401 self.configdir = configdir
403 if isfile(self.configdir+"feeds.pickle"):
404 file = open(self.configdir+"feeds.pickle")
405 self.listOfFeeds = pickle.load(file)
408 self.listOfFeeds = {getId("Slashdot"):{"title":"Slashdot", "url":"http://rss.slashdot.org/Slashdot/slashdot", "unread":0, "updateTime":"Never"}, }
409 if isfile(self.configdir+"images.pickle"):
410 file = open(self.configdir+"images.pickle")
411 self.imageHandler = pickle.load(file)
414 self.imageHandler = ImageHandler(self.configdir)
415 if self.listOfFeeds.has_key("font"):
416 del self.listOfFeeds["font"]
417 if self.listOfFeeds.has_key("feedingit-order"):
418 self.sortedKeys = self.listOfFeeds["feedingit-order"]
420 self.sortedKeys = self.listOfFeeds.keys()
421 if "font" in self.sortedKeys:
422 self.sortedKeys.remove("font")
423 self.sortedKeys.sort(key=lambda obj: self.getFeedTitle(obj))
424 list = self.sortedKeys[:]
430 #if key.startswith('d8'):
431 #traceback.print_exc()
432 # self.sortedKeys.remove(key)
434 #print key in self.sortedKeys
435 #print "d8eb3f07572892a7b5ed9c81c5bb21a2" in self.sortedKeys
436 #print self.listOfFeeds["d8eb3f07572892a7b5ed9c81c5bb21a2"]
437 self.closeCurrentlyDisplayedFeed()
440 def addArchivedArticle(self, key, index):
441 feed = self.getFeed(key)
442 title = feed.getTitle(index)
443 link = feed.getLink(index)
444 date = feed.getDateTuple(index)
445 if not self.listOfFeeds.has_key("ArchivedArticles"):
446 self.listOfFeeds["ArchivedArticles"] = {"title":"Archived Articles", "url":"", "unread":0, "updateTime":"Never"}
447 self.sortedKeys.append("ArchivedArticles")
448 #self.feeds["Archived Articles"] = ArchivedArticles("Archived Articles", "")
450 archFeed = self.getFeed("ArchivedArticles")
451 archFeed.addArchivedArticle(title, link, date, self.configdir)
452 self.listOfFeeds[key]["unread"] = archFeed.getNumberOfUnreadItems()
454 def loadFeed(self, key):
455 if isfile(self.configdir+key+".d/feed"):
456 file = open(self.configdir+key+".d/feed")
457 feed = pickle.load(file)
462 except AttributeError:
463 feed.uniqueId = getId(feed.name)
464 feed.imageHandler = self.imageHandler
465 #feed.reloadUnread(self.configdir)
468 title = self.listOfFeeds[key]["title"]
469 url = self.listOfFeeds[key]["url"]
470 if key == "ArchivedArticles":
471 feed = ArchivedArticles("ArchivedArticles", title, url, self.imageHandler)
473 feed = Feed(getId(title), title, url, self.imageHandler)
476 def updateFeeds(self, expiryTime=24):
477 for key in self.getListOfFeeds():
478 feed = self.loadFeed(key)
479 feed.updateFeed(self.configdir, expiryTime)
480 self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
481 self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
483 def updateFeed(self, key, expiryTime=24):
484 feed = self.getFeed(key)
485 feed.updateFeed(self.configdir, expiryTime)
486 self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
487 self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
489 def editFeed(self, key, title, url):
490 self.listOfFeeds[key]["title"] = title
491 self.listOfFeeds[key]["url"] = url
492 feed = self.loadFeed(key)
495 def getFeed(self, key):
496 feed = self.loadFeed(key)
497 feed.reloadUnread(self.configdir)
500 def getFeedUpdateTime(self, key):
501 #print self.listOfFeeds.has_key(key)
502 if not self.listOfFeeds[key].has_key("updateTime"):
503 self.listOfFeeds[key]["updateTime"] = "Never"
504 return self.listOfFeeds[key]["updateTime"]
506 def getFeedNumberOfUnreadItems(self, key):
507 if not self.listOfFeeds[key].has_key("unread"):
508 self.listOfFeeds[key]["unread"] = 0
509 return self.listOfFeeds[key]["unread"]
511 def updateUnread(self, key, unreadItems):
512 self.listOfFeeds[key]["unread"] = unreadItems
514 def getFeedTitle(self, key):
515 return self.listOfFeeds[key]["title"]
517 def getFeedUrl(self, key):
518 return self.listOfFeeds[key]["url"]
520 def getListOfFeeds(self):
521 return self.sortedKeys
523 #def getNumberOfUnreadItems(self, key):
524 # if self.listOfFeeds.has_key("unread"):
525 # return self.listOfFeeds[key]["unread"]
529 def addFeed(self, title, url):
530 if not self.listOfFeeds.has_key(getId(title)):
531 self.listOfFeeds[getId(title)] = {"title":title, "url":url, "unread":0, "updateTime":"Never"}
532 self.sortedKeys.append(getId(title))
534 #self.feeds[getId(title)] = Feed(title, url)
539 def removeFeed(self, key):
540 del self.listOfFeeds[key]
541 self.sortedKeys.remove(key)
543 if isdir(self.configdir+key+".d/"):
544 rmtree(self.configdir+key+".d/")
547 def saveConfig(self):
548 self.listOfFeeds["feedingit-order"] = self.sortedKeys
549 file = open(self.configdir+"feeds.pickle", "w")
550 pickle.dump(self.listOfFeeds, file)
552 file = open(self.configdir+"images.pickle", "w")
553 pickle.dump(self.imageHandler, file)
556 def moveUp(self, key):
557 index = self.sortedKeys.index(key)
558 self.sortedKeys[index] = self.sortedKeys[index-1]
559 self.sortedKeys[index-1] = key
561 def moveDown(self, key):
562 index = self.sortedKeys.index(key)
563 index2 = (index+1)%len(self.sortedKeys)
564 self.sortedKeys[index] = self.sortedKeys[index2]
565 self.sortedKeys[index2] = key
567 def setCurrentlyDisplayedFeed(self, key):
568 self.currentlyDisplayedFeed = key
569 def closeCurrentlyDisplayedFeed(self):
570 self.currentlyDisplayedFeed = False
571 def getCurrentlyDisplayedFeed(self):
572 return self.currentlyDisplayedFeed
574 if __name__ == "__main__":
575 listing = Listing('/home/user/.feedingit/')
576 list = listing.getListOfFeeds()[:]
579 if key.startswith('d8'):
580 print listing.getFeedUpdateTime(key)