1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # ============================================================================
21 # Author : Yves Marcoz
23 # Description : Simple RSS Reader
24 # ============================================================================
26 from os.path import isfile, isdir
27 from shutil import rmtree
28 from os import mkdir, remove
34 from BeautifulSoup import BeautifulSoup
35 from urlparse import urlparse
37 #CONFIGDIR="/home/user/.feedingit/"
40 return md5.new(string).hexdigest()
44 # if gconf.client_get_default().get_bool('/system/http_proxy/use_http_proxy'):
45 # port = gconf.client_get_default().get_int('/system/http_proxy/port')
46 # http = gconf.client_get_default().get_string('/system/http_proxy/host')
47 # proxy = proxy = urllib2.ProxyHandler( {"http":"http://%s:%s/"% (http,port)} )
48 # return (True, proxy)
49 # return (False, None)
51 # Enable proxy support for images and ArchivedArticles
52 #(proxy_support, proxy) = getProxy()
54 # opener = urllib2.build_opener(proxy)
55 # urllib2.install_opener(opener)
57 # Entry = {"title":XXX, "content":XXX, "date":XXX, "link":XXX, images = [] }
60 def __init__(self, configdir):
61 self.configdir = configdir
64 def addImage(self, key, baseurl, url):
65 filename = self.configdir+key+".d/"+getId(url)
66 if not isfile(filename):
68 if url.startswith("http"):
69 f = urllib2.urlopen(url)
71 f = urllib2.urlopen(baseurl+"/"+url)
72 outf = open(filename, "w")
77 print "Could not download" + url
78 if filename in self.images:
79 self.images[filename] += 1
81 self.images[filename] = 1
84 def removeImage(self, key, filename):
85 #filename = self.configdir+key+".d/"+getId(url)
87 self.images[filename] -= 1
89 self.images[filename] = 0 #Delete image
91 if self.images[filename] == 0:
92 remove(filename) #os.remove
93 del self.images[filename]
95 print "Could not remove image %s" % filename
98 def __init__(self, uniqueId, name, url, imageHandler):
106 self.updateTime = "Never"
107 self.uniqueId = uniqueId
108 self.imageHandler = imageHandler
110 def editFeed(self, url):
113 def saveFeed(self, configdir):
114 if not isdir(configdir+self.uniqueId+".d"):
115 mkdir(configdir+self.uniqueId+".d")
116 file = open(configdir+self.uniqueId+".d/feed", "w")
117 pickle.dump(self, file )
119 self.saveUnread(configdir)
121 def saveUnread(self, configdir):
122 if not isdir(configdir+self.uniqueId+".d"):
123 mkdir(configdir+self.uniqueId+".d")
124 file = open(configdir+self.uniqueId+".d/unread", "w")
125 pickle.dump(self.readItems, file )
128 def reloadUnread(self, configdir):
130 file = open(configdir+self.uniqueId+".d/unread", "r")
131 self.readItems = pickle.load( file )
134 for id in self.getIds():
135 if self.readItems[id]==False:
136 self.countUnread = self.countUnread + 1
139 return self.countUnread
141 def updateFeed(self, configdir, expiryTime=24, proxy=None, imageCache=False):
142 # Expiry time is in hours
144 tmp=feedparser.parse(self.url)
146 tmp=feedparser.parse(self.url, handlers = [proxy])
147 expiry = float(expiryTime) * 3600.
148 # Check if the parse was succesful (number of entries > 0, else do nothing)
149 if len(tmp["entries"])>0:
150 #reversedEntries = self.getEntries()
151 #reversedEntries.reverse()
152 if not isdir(configdir+self.uniqueId+".d"):
153 mkdir(configdir+self.uniqueId+".d")
154 currentTime = time.time()
157 for entry in tmp["entries"]:
158 (dateTuple, date) = self.extractDate(entry)
162 entry["title"] = "No Title"
167 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
168 "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[] }
169 id = self.generateUniqueId(tmpEntry)
171 #articleTime = time.mktime(self.entries[id]["dateTuple"])
172 if not id in self.ids:
173 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
175 baseurl = ''.join(urlparse(tmpEntry["link"])[:-1])
179 filename = self.imageHandler.addImage(self.uniqueId, baseurl, img['src'])
181 tmpEntry["images"].append(filename)
183 print "Error downloading image %s" %img
184 tmpEntry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html"
185 file = open(tmpEntry["contentLink"], "w")
186 file.write(soup.prettify())
188 tmpEntries[id] = tmpEntry
190 if id not in self.readItems:
191 self.readItems[id] = False
193 tmpEntries[id] = self.entries[id]
197 for entryId in oldIds:
198 if not entryId in tmpIds:
200 articleTime = time.mktime(self.entries[entryId]["dateTuple"])
201 if (currentTime - articleTime > 2*expiry):
202 self.removeEntry(entryId)
204 if (currentTime - articleTime > expiry) and (self.isEntryRead(entryId)):
205 # Entry is over 24 hours, and already read
206 self.removeEntry(entryId)
208 tmpEntries[entryId] = self.entries[entryId]
209 tmpIds.append(entryId)
211 print "Error purging old articles %s" % entryId
212 self.removeEntry(entryId)
214 self.entries = tmpEntries
221 if not self.readItems.has_key(id):
222 self.readItems[id] = False
223 if self.readItems[id]==False:
224 tmpUnread = tmpUnread + 1
226 self.countUnread = tmpUnread
227 self.updateTime = time.asctime()
228 self.saveFeed(configdir)
230 def extractContent(self, entry):
232 if entry.has_key('summary'):
233 content = entry.get('summary', '')
234 if entry.has_key('content'):
235 if len(entry.content[0].value) > len(content):
236 content = entry.content[0].value
238 content = entry.get('description', '')
241 def extractDate(self, entry):
242 if entry.has_key("updated_parsed"):
243 date1 = entry["updated_parsed"]
244 date = time.strftime("%a, %d %b %Y %H:%M:%S",entry["updated_parsed"])
245 elif entry.has_key("published_parsed"):
246 date1 = entry["published_parsed"]
247 date = time.strftime("%a, %d %b %Y %H:%M:%S", entry["published_parsed"])
254 def setEntryRead(self, id):
255 if self.readItems[id]==False:
256 self.countUnread = self.countUnread - 1
257 self.readItems[id] = True
259 def setEntryUnread(self, id):
260 if self.readItems[id]==True:
261 self.countUnread = self.countUnread + 1
262 self.readItems[id] = False
264 def isEntryRead(self, id):
265 return self.readItems[id]
267 def getTitle(self, id):
268 return self.entries[id]["title"]
270 def getContentLink(self, id):
271 if self.entries[id].has_key("contentLink"):
272 return self.entries[id]["contentLink"]
273 return self.entries[id]["link"]
275 def getExternalLink(self, id):
276 return self.entries[id]["link"]
278 def getDate(self, id):
279 return self.entries[id]["date"]
281 def getDateTuple(self, id):
282 return self.entries[id]["dateTuple"]
284 def getUniqueId(self, index):
285 return self.ids[index]
287 def generateUniqueId(self, entry):
288 return getId(entry["date"] + entry["title"])
290 def getUpdateTime(self):
291 return self.updateTime
293 def getEntries(self):
299 def getNextId(self, id):
300 return self.ids[(self.ids.index(id)+1) % self.getNumberOfEntries()]
302 def getPreviousId(self, id):
303 return self.ids[(self.ids.index(id)-1) % self.getNumberOfEntries()]
305 def getNumberOfUnreadItems(self):
306 return self.countUnread
308 def getNumberOfEntries(self):
311 def getItem(self, id):
313 return self.entries[id]
317 def getContent(self, id):
318 if self.entries[id].has_key("contentLink"):
319 file = open(self.entries[id]["contentLink"])
320 content = file.read()
323 return self.entries[id]["content"]
325 def removeEntry(self, id):
327 if self.entries.has_key(id):
328 entry = self.entries[id]
329 if entry.has_key("images"):
330 for img in entry["images"]:
331 self.imageHandler.removeImage(self.uniqueId, img)
333 if entry.has_key("contentLink"):
335 remove(entry["contentLink"]) #os.remove
337 print "File not found for deletion: %s" % entry["contentLink"]
340 print "Entries has no %s key" % id
344 print "Ids has no %s key" % id
345 if self.readItems.has_key(id):
346 if self.readItems[id]==False:
347 self.countUnread = self.countUnread - 1
348 del self.readItems[id]
350 print "ReadItems has no %s key" % id
352 # print "Error removing entry %s" %id
354 def getArticle(self, entry):
355 #self.setEntryRead(id)
356 #entry = self.entries[id]
357 title = entry['title']
358 #content = entry.get('content', entry.get('summary_detail', {}))
359 content = entry["content"]
364 #text = '''<div style="color: black; background-color: white;">'''
365 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
366 text += "<html><head><title>" + title + "</title>"
367 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
368 #text += '<style> body {-webkit-user-select: none;} </style>'
369 text += '</head><body><div><a href=\"' + link + '\">' + title + "</a>"
370 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
371 text += "<BR /><BR />"
373 text += "</body></html>"
376 class ArchivedArticles(Feed):
377 def addArchivedArticle(self, title, link, updated_parsed, configdir):
379 entry["title"] = title
381 entry["summary"] = '<a href=\"' + link + '\">' + title + "</a>"
382 entry["updated_parsed"] = updated_parsed
383 entry["time"] = time.time()
385 (dateTuple, date) = self.extractDate(entry)
386 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
387 "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[], "downloaded":False, "time":entry["time"] }
388 id = self.generateUniqueId(tmpEntry)
389 self.entries[id] = tmpEntry
391 self.readItems[id] = False
392 self.countUnread = self.countUnread + 1
393 self.saveFeed(configdir)
394 self.saveUnread(configdir)
396 def updateFeed(self, configdir, expiryTime=24, proxy=None, imageCache=False):
397 for id in self.getIds():
398 entry = self.entries[id]
399 if not entry["downloaded"]:
401 f = urllib2.urlopen(entry["link"])
402 #entry["content"] = f.read()
405 soup = BeautifulSoup(html)
407 baseurl = ''.join(urlparse(entry["link"])[:-1])
409 filename = self.imageHandler.addImage(self.uniqueId, baseurl, img['src'])
410 #filename = configdir+self.uniqueId+".d/"+getId(img['src'])
411 #if not isfile(filename):
413 # if img['src'].startswith("http"):
414 # f = urllib2.urlopen(img['src'])
416 # f = urllib2.urlopen(baseurl+"/"+img['src'])
417 # #print baseurl+"/"+img['src']
419 # outf = open(filename, "w")
420 # outf.write(f.read())
424 # print "Could not download" + img['src']
426 entry["images"].append(filename)
427 entry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html"
428 file = open(entry["contentLink"], "w")
429 file.write(soup.prettify())
431 if len(entry["content"]) > 0:
432 entry["downloaded"] = True
433 entry["time"] = time.time()
434 self.setEntryUnread(id)
437 currentTime = time.time()
438 expiry = float(expiryTime) * 3600
439 if currentTime - entry["time"] > expiry:
440 if self.isEntryRead(id):
443 if currentTime - entry["time"] > 2*expiry:
445 self.updateTime = time.asctime()
446 self.saveFeed(configdir)
448 def getArticle(self, index):
449 self.setEntryRead(index)
450 content = self.getContent(index)
455 # Lists all the feeds in a dictionary, and expose the data
456 def __init__(self, configdir):
457 self.configdir = configdir
459 if isfile(self.configdir+"feeds.pickle"):
460 file = open(self.configdir+"feeds.pickle")
461 self.listOfFeeds = pickle.load(file)
464 self.listOfFeeds = {getId("Slashdot"):{"title":"Slashdot", "url":"http://rss.slashdot.org/Slashdot/slashdot", "unread":0, "updateTime":"Never"}, }
466 file = open(self.configdir+"images.pickle")
467 self.imageHandler = pickle.load(file)
470 self.imageHandler = ImageHandler(self.configdir)
471 if self.listOfFeeds.has_key("font"):
472 del self.listOfFeeds["font"]
473 if self.listOfFeeds.has_key("feedingit-order"):
474 self.sortedKeys = self.listOfFeeds["feedingit-order"]
476 self.sortedKeys = self.listOfFeeds.keys()
477 if "font" in self.sortedKeys:
478 self.sortedKeys.remove("font")
479 self.sortedKeys.sort(key=lambda obj: self.getFeedTitle(obj))
480 list = self.sortedKeys[:]
481 self.closeCurrentlyDisplayedFeed()
483 def addArchivedArticle(self, key, index):
484 feed = self.getFeed(key)
485 title = feed.getTitle(index)
486 link = feed.getExternalLink(index)
487 date = feed.getDateTuple(index)
488 if not self.listOfFeeds.has_key("ArchivedArticles"):
489 self.listOfFeeds["ArchivedArticles"] = {"title":"Archived Articles", "url":"", "unread":0, "updateTime":"Never"}
490 self.sortedKeys.append("ArchivedArticles")
491 #self.feeds["Archived Articles"] = ArchivedArticles("Archived Articles", "")
493 archFeed = self.getFeed("ArchivedArticles")
494 archFeed.addArchivedArticle(title, link, date, self.configdir)
495 self.listOfFeeds[key]["unread"] = archFeed.getNumberOfUnreadItems()
497 def loadFeed(self, key):
498 if isfile(self.configdir+key+".d/feed"):
499 file = open(self.configdir+key+".d/feed")
500 feed = pickle.load(file)
505 except AttributeError:
506 feed.uniqueId = getId(feed.name)
507 feed.imageHandler = self.imageHandler
508 #feed.reloadUnread(self.configdir)
511 title = self.listOfFeeds[key]["title"]
512 url = self.listOfFeeds[key]["url"]
513 if key == "ArchivedArticles":
514 feed = ArchivedArticles("ArchivedArticles", title, url, self.imageHandler)
516 feed = Feed(getId(title), title, url, self.imageHandler)
519 def updateFeeds(self, expiryTime=24, proxy=None, imageCache=False):
520 for key in self.getListOfFeeds():
521 feed = self.loadFeed(key)
522 feed.updateFeed(self.configdir, expiryTime, proxy, imageCache)
523 self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
524 self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
526 def updateFeed(self, key, expiryTime=24, proxy=None, imageCache=False):
527 feed = self.getFeed(key)
528 feed.updateFeed(self.configdir, expiryTime, proxy, imageCache)
529 self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
530 self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
532 def editFeed(self, key, title, url):
533 self.listOfFeeds[key]["title"] = title
534 self.listOfFeeds[key]["url"] = url
535 feed = self.loadFeed(key)
538 def getFeed(self, key):
539 feed = self.loadFeed(key)
540 feed.reloadUnread(self.configdir)
543 def getFeedUpdateTime(self, key):
544 #print self.listOfFeeds.has_key(key)
545 if not self.listOfFeeds[key].has_key("updateTime"):
546 self.listOfFeeds[key]["updateTime"] = "Never"
547 return self.listOfFeeds[key]["updateTime"]
549 def getFeedNumberOfUnreadItems(self, key):
550 if not self.listOfFeeds[key].has_key("unread"):
551 self.listOfFeeds[key]["unread"] = 0
552 return self.listOfFeeds[key]["unread"]
554 def updateUnread(self, key, unreadItems):
555 self.listOfFeeds[key]["unread"] = unreadItems
557 def getFeedTitle(self, key):
558 return self.listOfFeeds[key]["title"]
560 def getFeedUrl(self, key):
561 return self.listOfFeeds[key]["url"]
563 def getListOfFeeds(self):
564 return self.sortedKeys
566 def addFeed(self, title, url):
567 if not self.listOfFeeds.has_key(getId(title)):
568 self.listOfFeeds[getId(title)] = {"title":title, "url":url, "unread":0, "updateTime":"Never"}
569 self.sortedKeys.append(getId(title))
571 #self.feeds[getId(title)] = Feed(title, url)
576 def removeFeed(self, key):
577 del self.listOfFeeds[key]
578 self.sortedKeys.remove(key)
580 if isdir(self.configdir+key+".d/"):
581 rmtree(self.configdir+key+".d/")
584 def saveConfig(self):
585 self.listOfFeeds["feedingit-order"] = self.sortedKeys
586 file = open(self.configdir+"feeds.pickle", "w")
587 pickle.dump(self.listOfFeeds, file)
589 file = open(self.configdir+"images.pickle", "w")
590 pickle.dump(self.imageHandler, file)
593 def moveUp(self, key):
594 index = self.sortedKeys.index(key)
595 self.sortedKeys[index] = self.sortedKeys[index-1]
596 self.sortedKeys[index-1] = key
598 def moveDown(self, key):
599 index = self.sortedKeys.index(key)
600 index2 = (index+1)%len(self.sortedKeys)
601 self.sortedKeys[index] = self.sortedKeys[index2]
602 self.sortedKeys[index2] = key
604 def setCurrentlyDisplayedFeed(self, key):
605 self.currentlyDisplayedFeed = key
606 def closeCurrentlyDisplayedFeed(self):
607 self.currentlyDisplayedFeed = False
608 def getCurrentlyDisplayedFeed(self):
609 return self.currentlyDisplayedFeed
611 if __name__ == "__main__":
612 listing = Listing('/home/user/.feedingit/')
613 list = listing.getListOfFeeds()[:]
616 if key.startswith('d8'):
617 print listing.getFeedUpdateTime(key)