1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # ============================================================================
21 # Author : Yves Marcoz
23 # Description : Simple RSS Reader
24 # ============================================================================
26 from os.path import isfile
27 from os.path import isdir
28 from shutil import rmtree
35 from BeautifulSoup import BeautifulSoup
36 from urlparse import urlparse
38 #CONFIGDIR="/home/user/.feedingit/"
41 return md5.new(string).hexdigest()
43 # Entry = {"title":XXX, "content":XXX, "date":XXX, "link":XXX, images = [] }
46 def __init__(self, configdir):
47 self.configdir = configdir
50 def addImage(self, key, baseurl, url):
51 filename = self.configdir+key+".d/"+getId(url)
52 if not isfile(filename):
54 if url.startswith("http"):
55 f = urllib2.urlopen(url)
57 f = urllib2.urlopen(baseurl+"/"+url)
58 outf = open(filename, "w")
63 print "Could not download" + url
64 if url in self.images:
68 return "file://" + filename
70 def removeImage(self, key, url):
71 filename = self.configdir+key+".d/"+getId(url)
73 if self.images[url] == 0:
82 def setEntryUnread(self, id):
83 if self.readItems.has_key(id):
84 if self.readItems[id]==True:
85 self.countUnread = self.countUnread + 1
86 self.readItems[id] = False
88 self.readItems[id] = False
89 self.countUnread = self.countUnread + 1
91 def setEntryRead(self, id):
92 if self.readItems[id]==False:
93 self.countUnread = self.countUnread - 1
94 self.readItems[id] = True
97 return self.readItems[id]
99 def removeEntry(self, id):
100 if self.readItems[id]==False:
101 self.countUnread = self.countUnread - 1
102 del self.readItems[id]
105 def __init__(self, uniqueId, name, url, imageHandler):
113 self.updateTime = "Never"
114 self.uniqueId = uniqueId
115 self.imageHandler = imageHandler
117 def editFeed(self, url):
120 def saveFeed(self, configdir):
121 if not isdir(configdir+self.uniqueId+".d"):
122 mkdir(configdir+self.uniqueId+".d")
123 file = open(configdir+self.uniqueId+".d/feed", "w")
124 pickle.dump(self, file )
126 self.saveUnread(configdir)
128 def saveUnread(self, configdir):
129 if not isdir(configdir+self.uniqueId+".d"):
130 mkdir(configdir+self.uniqueId+".d")
131 file = open(configdir+self.uniqueId+".d/unread", "w")
132 pickle.dump(self.readItems, file )
135 def reloadUnread(self, configdir):
137 file = open(configdir+self.uniqueId+".d/unread", "r")
138 self.readItems = pickle.load( file )
141 for id in self.getIds():
142 if self.readItems[id]==False:
143 self.countUnread = self.countUnread + 1
146 return self.countUnread
148 def updateFeed(self, configdir, expiryTime=24, proxy=None):
149 # Expiry time is in hours
151 tmp=feedparser.parse(self.url)
153 tmp=feedparser.parse(self.url, handlers = [proxy])
154 # Check if the parse was succesful (number of entries > 0, else do nothing)
155 if len(tmp["entries"])>0:
156 #reversedEntries = self.getEntries()
157 #reversedEntries.reverse()
160 for entry in tmp["entries"]:
161 (dateTuple, date) = self.extractDate(entry)
162 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
163 "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[] }
164 id = self.generateUniqueId(tmpEntry)
165 tmpEntries[id] = tmpEntry
167 for entryId in self.getIds():
168 currentTime = time.time()
169 expiry = float(expiryTime) * 3600.
170 articleTime = time.mktime(self.entries[entryId]["dateTuple"])
171 if currentTime - articleTime < expiry:
172 if not entryId in tmpIds:
173 tmpEntries[entryId] = self.entries[entryId]
174 tmpIds.append(entryId)
176 if (not self.isEntryRead(entryId)) and (currentTime - articleTime < 2*expiry):
177 tmpEntries[entryId] = self.entries[entryId]
178 tmpIds.append(entryId)
180 self.entries = tmpEntries
183 # Initialize the new articles to unread
184 tmpReadItems = self.readItems
186 for id in self.getIds():
187 if not tmpReadItems.has_key(id):
188 self.readItems[id] = False
190 self.readItems[id] = tmpReadItems[id]
191 if self.readItems[id]==False:
192 self.countUnread = self.countUnread + 1
194 self.updateTime = time.asctime()
195 self.saveFeed(configdir)
197 def extractContent(self, entry):
199 if entry.has_key('summary'):
200 content = entry.get('summary', '')
201 if entry.has_key('content'):
202 if len(entry.content[0].value) > len(content):
203 content = entry.content[0].value
205 content = entry.get('description', '')
208 def extractDate(self, entry):
209 if entry.has_key("updated_parsed"):
210 date1 = entry["updated_parsed"]
211 date = time.strftime("%a, %d %b %Y %H:%M:%S",entry["updated_parsed"])
212 elif entry.has_key("published_parsed"):
213 date1 = entry["published_parsed"]
214 date = time.strftime("%a, %d %b %Y %H:%M:%S", entry["published_parsed"])
221 def setEntryRead(self, id):
222 if self.readItems[id]==False:
223 self.countUnread = self.countUnread - 1
224 self.readItems[id] = True
226 def setEntryUnread(self, id):
227 if self.readItems[id]==True:
228 self.countUnread = self.countUnread + 1
229 self.readItems[id] = False
231 def isEntryRead(self, id):
232 return self.readItems[id]
234 def getTitle(self, id):
235 return self.entries[id]["title"]
237 def getLink(self, id):
238 if self.entries[id].has_key("contentLink"):
239 return self.entries[id]["contentLink"]
240 return self.entries[id]["link"]
242 def getDate(self, id):
243 return self.entries[id]["date"]
245 def getDateTuple(self, id):
246 return self.entries[id]["dateTuple"]
248 def getUniqueId(self, index):
249 return self.ids[index]
251 def generateUniqueId(self, entry):
252 return getId(entry["date"] + entry["title"])
254 def getUpdateTime(self):
255 return self.updateTime
257 def getEntries(self):
263 def getNextId(self, id):
264 return self.ids[(self.ids.index(id)+1) % self.getNumberOfEntries()]
266 def getPreviousId(self, id):
267 return self.ids[(self.ids.index(id)-1) % self.getNumberOfEntries()]
269 def getNumberOfUnreadItems(self):
270 return self.countUnread
272 def getNumberOfEntries(self):
275 def getItem(self, id):
277 return self.entries[id]
281 def getContent(self, id):
282 if self.entries[id].has_key("contentLink"):
283 file = open(self.entries[id]["contentLink"])
284 content = file.read()
287 return self.entries[id]["content"]
289 def removeEntry(self, id):
290 entry = self.entries[id]
291 for img in entry["images"]:
292 self.imageHandler.removeImage(self.uniqueId, img)
293 if entry.has_key["contentLink"]:
294 os.remove(entry["contentLink"])
295 self.entries.remove(id)
297 if self.readItems[id]==False:
298 self.countUnread = self.countUnread - 1
299 self.readItems.remove(id)
301 def getArticle(self, id):
302 self.setEntryRead(id)
303 entry = self.entries[id]
304 title = entry['title']
305 #content = entry.get('content', entry.get('summary_detail', {}))
306 content = entry["content"]
311 #text = '''<div style="color: black; background-color: white;">'''
312 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
313 text += "<html><head><title>" + title + "</title>"
314 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
315 #text += '<style> body {-webkit-user-select: none;} </style>'
316 text += '</head><body><div><a href=\"' + link + '\">' + title + "</a>"
317 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
318 text += "<BR /><BR />"
320 text += "</body></html>"
323 class ArchivedArticles(Feed):
324 def addArchivedArticle(self, title, link, updated_parsed, configdir):
326 entry["title"] = title
328 entry["summary"] = '<a href=\"' + link + '\">' + title + "</a>"
329 entry["updated_parsed"] = updated_parsed
330 entry["time"] = time.time()
332 (dateTuple, date) = self.extractDate(entry)
333 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
334 "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[], "downloaded":False, "time":entry["time"] }
335 id = self.generateUniqueId(tmpEntry)
336 self.entries[id] = tmpEntry
338 self.readItems[id] = False
339 self.countUnread = self.countUnread + 1
340 self.saveFeed(configdir)
341 self.saveUnread(configdir)
343 def updateFeed(self, configdir, expiryTime=24, proxy=None):
344 for id in self.getIds():
345 entry = self.entries[id]
346 if not entry["downloaded"]:
348 f = urllib2.urlopen(entry["link"])
349 #entry["content"] = f.read()
352 soup = BeautifulSoup(html)
353 images = soup.body('img')
354 baseurl = ''.join(urlparse(entry["link"])[:-1])
356 filename = self.imageHandler.addImage(self.uniqueId, baseurl, img['src'])
357 #filename = configdir+self.uniqueId+".d/"+getId(img['src'])
358 #if not isfile(filename):
360 # if img['src'].startswith("http"):
361 # f = urllib2.urlopen(img['src'])
363 # f = urllib2.urlopen(baseurl+"/"+img['src'])
364 # #print baseurl+"/"+img['src']
366 # outf = open(filename, "w")
367 # outf.write(f.read())
371 # print "Could not download" + img['src']
373 entry["images"].append(filename)
374 entry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html"
375 file = open(entry["contentLink"], "w")
376 file.write(soup.prettify())
378 if len(entry["content"]) > 0:
379 entry["downloaded"] = True
380 entry["time"] = time.time()
381 self.setEntryUnread(id)
384 currentTime = time.time()
385 expiry = float(expiryTime) * 3600
386 if currentTime - entry["time"] > expiry:
387 if self.isEntryRead(id):
390 if currentTime - entry["time"] > 2*expiry:
392 self.updateTime = time.asctime()
393 self.saveFeed(configdir)
395 def getArticle(self, index):
396 self.setEntryRead(index)
397 content = self.getContent(index)
402 # Lists all the feeds in a dictionary, and expose the data
403 def __init__(self, configdir):
404 self.configdir = configdir
406 if isfile(self.configdir+"feeds.pickle"):
407 file = open(self.configdir+"feeds.pickle")
408 self.listOfFeeds = pickle.load(file)
411 self.listOfFeeds = {getId("Slashdot"):{"title":"Slashdot", "url":"http://rss.slashdot.org/Slashdot/slashdot", "unread":0, "updateTime":"Never"}, }
412 if isfile(self.configdir+"images.pickle"):
413 file = open(self.configdir+"images.pickle")
414 self.imageHandler = pickle.load(file)
417 self.imageHandler = ImageHandler(self.configdir)
418 if self.listOfFeeds.has_key("font"):
419 del self.listOfFeeds["font"]
420 if self.listOfFeeds.has_key("feedingit-order"):
421 self.sortedKeys = self.listOfFeeds["feedingit-order"]
423 self.sortedKeys = self.listOfFeeds.keys()
424 if "font" in self.sortedKeys:
425 self.sortedKeys.remove("font")
426 self.sortedKeys.sort(key=lambda obj: self.getFeedTitle(obj))
427 list = self.sortedKeys[:]
433 #if key.startswith('d8'):
434 #traceback.print_exc()
435 # self.sortedKeys.remove(key)
437 #print key in self.sortedKeys
438 #print "d8eb3f07572892a7b5ed9c81c5bb21a2" in self.sortedKeys
439 #print self.listOfFeeds["d8eb3f07572892a7b5ed9c81c5bb21a2"]
440 self.closeCurrentlyDisplayedFeed()
443 def addArchivedArticle(self, key, index):
444 feed = self.getFeed(key)
445 title = feed.getTitle(index)
446 link = feed.getLink(index)
447 date = feed.getDateTuple(index)
448 if not self.listOfFeeds.has_key("ArchivedArticles"):
449 self.listOfFeeds["ArchivedArticles"] = {"title":"Archived Articles", "url":"", "unread":0, "updateTime":"Never"}
450 self.sortedKeys.append("ArchivedArticles")
451 #self.feeds["Archived Articles"] = ArchivedArticles("Archived Articles", "")
453 archFeed = self.getFeed("ArchivedArticles")
454 archFeed.addArchivedArticle(title, link, date, self.configdir)
455 self.listOfFeeds[key]["unread"] = archFeed.getNumberOfUnreadItems()
457 def loadFeed(self, key):
458 if isfile(self.configdir+key+".d/feed"):
459 file = open(self.configdir+key+".d/feed")
460 feed = pickle.load(file)
465 except AttributeError:
466 feed.uniqueId = getId(feed.name)
467 feed.imageHandler = self.imageHandler
468 #feed.reloadUnread(self.configdir)
471 title = self.listOfFeeds[key]["title"]
472 url = self.listOfFeeds[key]["url"]
473 if key == "ArchivedArticles":
474 feed = ArchivedArticles("ArchivedArticles", title, url, self.imageHandler)
476 feed = Feed(getId(title), title, url, self.imageHandler)
479 def updateFeeds(self, expiryTime=24, proxy=None):
480 for key in self.getListOfFeeds():
481 feed = self.loadFeed(key)
482 feed.updateFeed(self.configdir, expiryTime, proxy)
483 self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
484 self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
486 def updateFeed(self, key, expiryTime=24, proxy=None):
487 feed = self.getFeed(key)
488 feed.updateFeed(self.configdir, expiryTime, proxy)
489 self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
490 self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
492 def editFeed(self, key, title, url):
493 self.listOfFeeds[key]["title"] = title
494 self.listOfFeeds[key]["url"] = url
495 feed = self.loadFeed(key)
498 def getFeed(self, key):
499 feed = self.loadFeed(key)
500 feed.reloadUnread(self.configdir)
503 def getFeedUpdateTime(self, key):
504 #print self.listOfFeeds.has_key(key)
505 if not self.listOfFeeds[key].has_key("updateTime"):
506 self.listOfFeeds[key]["updateTime"] = "Never"
507 return self.listOfFeeds[key]["updateTime"]
509 def getFeedNumberOfUnreadItems(self, key):
510 if not self.listOfFeeds[key].has_key("unread"):
511 self.listOfFeeds[key]["unread"] = 0
512 return self.listOfFeeds[key]["unread"]
514 def updateUnread(self, key, unreadItems):
515 self.listOfFeeds[key]["unread"] = unreadItems
517 def getFeedTitle(self, key):
518 return self.listOfFeeds[key]["title"]
520 def getFeedUrl(self, key):
521 return self.listOfFeeds[key]["url"]
523 def getListOfFeeds(self):
524 return self.sortedKeys
526 #def getNumberOfUnreadItems(self, key):
527 # if self.listOfFeeds.has_key("unread"):
528 # return self.listOfFeeds[key]["unread"]
532 def addFeed(self, title, url):
533 if not self.listOfFeeds.has_key(getId(title)):
534 self.listOfFeeds[getId(title)] = {"title":title, "url":url, "unread":0, "updateTime":"Never"}
535 self.sortedKeys.append(getId(title))
537 #self.feeds[getId(title)] = Feed(title, url)
542 def removeFeed(self, key):
543 del self.listOfFeeds[key]
544 self.sortedKeys.remove(key)
546 if isdir(self.configdir+key+".d/"):
547 rmtree(self.configdir+key+".d/")
550 def saveConfig(self):
551 self.listOfFeeds["feedingit-order"] = self.sortedKeys
552 file = open(self.configdir+"feeds.pickle", "w")
553 pickle.dump(self.listOfFeeds, file)
555 file = open(self.configdir+"images.pickle", "w")
556 pickle.dump(self.imageHandler, file)
559 def moveUp(self, key):
560 index = self.sortedKeys.index(key)
561 self.sortedKeys[index] = self.sortedKeys[index-1]
562 self.sortedKeys[index-1] = key
564 def moveDown(self, key):
565 index = self.sortedKeys.index(key)
566 index2 = (index+1)%len(self.sortedKeys)
567 self.sortedKeys[index] = self.sortedKeys[index2]
568 self.sortedKeys[index2] = key
570 def setCurrentlyDisplayedFeed(self, key):
571 self.currentlyDisplayedFeed = key
572 def closeCurrentlyDisplayedFeed(self):
573 self.currentlyDisplayedFeed = False
574 def getCurrentlyDisplayedFeed(self):
575 return self.currentlyDisplayedFeed
577 if __name__ == "__main__":
578 listing = Listing('/home/user/.feedingit/')
579 list = listing.getListOfFeeds()[:]
582 if key.startswith('d8'):
583 print listing.getFeedUpdateTime(key)