1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # ============================================================================
21 # Author : Yves Marcoz
23 # Description : Simple RSS Reader
24 # ============================================================================
26 from os.path import isfile
27 from os.path import isdir
28 from shutil import rmtree
29 from os import mkdir, remove
35 from BeautifulSoup import BeautifulSoup
36 from urlparse import urlparse
38 #CONFIGDIR="/home/user/.feedingit/"
41 return md5.new(string).hexdigest()
45 if gconf.client_get_default().get_bool('/system/http_proxy/use_http_proxy'):
46 port = gconf.client_get_default().get_int('/system/http_proxy/port')
47 http = gconf.client_get_default().get_string('/system/http_proxy/host')
48 proxy = proxy = urllib2.ProxyHandler( {"http":"http://%s:%s/"% (http,port)} )
52 # Enable proxy support for images and ArchivedArticles
53 (proxy_support, proxy) = getProxy()
55 opener = urllib2.build_opener(proxy)
56 urllib2.install_opener(opener)
58 # Entry = {"title":XXX, "content":XXX, "date":XXX, "link":XXX, images = [] }
61 def __init__(self, configdir):
62 self.configdir = configdir
65 def addImage(self, key, baseurl, url):
66 filename = self.configdir+key+".d/"+getId(url)
67 if not isfile(filename):
69 if url.startswith("http"):
70 f = urllib2.urlopen(url)
72 f = urllib2.urlopen(baseurl+"/"+url)
73 outf = open(filename, "w")
78 print "Could not download" + url
79 if filename in self.images:
80 self.images[filename] += 1
82 self.images[filename] = 1
85 def removeImage(self, key, filename):
86 #filename = self.configdir+key+".d/"+getId(url)
88 self.images[filename] -= 1
90 self.images[filename] = 0 #Delete image
92 if self.images[filename] == 0:
93 remove(filename) #os.remove
94 del self.images[filename]
96 print "Could not remove image %s" % filename
99 def __init__(self, uniqueId, name, url, imageHandler):
107 self.updateTime = "Never"
108 self.uniqueId = uniqueId
109 self.imageHandler = imageHandler
111 def editFeed(self, url):
114 def saveFeed(self, configdir):
115 if not isdir(configdir+self.uniqueId+".d"):
116 mkdir(configdir+self.uniqueId+".d")
117 file = open(configdir+self.uniqueId+".d/feed", "w")
118 pickle.dump(self, file )
120 self.saveUnread(configdir)
122 def saveUnread(self, configdir):
123 if not isdir(configdir+self.uniqueId+".d"):
124 mkdir(configdir+self.uniqueId+".d")
125 file = open(configdir+self.uniqueId+".d/unread", "w")
126 pickle.dump(self.readItems, file )
129 def reloadUnread(self, configdir):
131 file = open(configdir+self.uniqueId+".d/unread", "r")
132 self.readItems = pickle.load( file )
135 for id in self.getIds():
136 if self.readItems[id]==False:
137 self.countUnread = self.countUnread + 1
140 return self.countUnread
142 def updateFeed(self, configdir, expiryTime=24, proxy=None, imageCache=False):
143 # Expiry time is in hours
145 tmp=feedparser.parse(self.url)
147 tmp=feedparser.parse(self.url, handlers = [proxy])
148 expiry = float(expiryTime) * 3600.
149 # Check if the parse was succesful (number of entries > 0, else do nothing)
150 if len(tmp["entries"])>0:
151 #reversedEntries = self.getEntries()
152 #reversedEntries.reverse()
153 if not isdir(configdir+self.uniqueId+".d"):
154 mkdir(configdir+self.uniqueId+".d")
155 currentTime = time.time()
158 for entry in tmp["entries"]:
159 (dateTuple, date) = self.extractDate(entry)
160 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
161 "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[] }
162 id = self.generateUniqueId(tmpEntry)
164 #articleTime = time.mktime(self.entries[id]["dateTuple"])
165 if not id in self.ids:
166 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
168 baseurl = ''.join(urlparse(tmpEntry["link"])[:-1])
172 filename = self.imageHandler.addImage(self.uniqueId, baseurl, img['src'])
174 tmpEntry["images"].append(filename)
176 print "Error downloading image %s" %img
177 tmpEntry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html"
178 file = open(tmpEntry["contentLink"], "w")
179 file.write(soup.prettify())
181 tmpEntries[id] = tmpEntry
183 if id not in self.readItems:
184 self.readItems[id] = False
186 tmpEntries[id] = self.entries[id]
190 for entryId in oldIds:
191 if not entryId in tmpIds:
193 articleTime = time.mktime(self.entries[entryId]["dateTuple"])
194 if (currentTime - articleTime > 2*expiry):
195 self.removeEntry(entryId)
197 if (currentTime - articleTime > expiry) and (self.isEntryRead(entryId)):
198 # Entry is over 24 hours, and already read
199 self.removeEntry(entryId)
201 tmpEntries[entryId] = self.entries[entryId]
202 tmpIds.append(entryId)
204 print "Error purging old articles %s" % entryId
205 self.removeEntry(entryId)
207 self.entries = tmpEntries
214 if not self.readItems.has_key(id):
215 self.readItems[id] = False
216 if self.readItems[id]==False:
217 tmpUnread = tmpUnread + 1
219 self.countUnread = tmpUnread
220 self.updateTime = time.asctime()
221 self.saveFeed(configdir)
223 def extractContent(self, entry):
225 if entry.has_key('summary'):
226 content = entry.get('summary', '')
227 if entry.has_key('content'):
228 if len(entry.content[0].value) > len(content):
229 content = entry.content[0].value
231 content = entry.get('description', '')
234 def extractDate(self, entry):
235 if entry.has_key("updated_parsed"):
236 date1 = entry["updated_parsed"]
237 date = time.strftime("%a, %d %b %Y %H:%M:%S",entry["updated_parsed"])
238 elif entry.has_key("published_parsed"):
239 date1 = entry["published_parsed"]
240 date = time.strftime("%a, %d %b %Y %H:%M:%S", entry["published_parsed"])
247 def setEntryRead(self, id):
248 if self.readItems[id]==False:
249 self.countUnread = self.countUnread - 1
250 self.readItems[id] = True
252 def setEntryUnread(self, id):
253 if self.readItems[id]==True:
254 self.countUnread = self.countUnread + 1
255 self.readItems[id] = False
257 def isEntryRead(self, id):
258 return self.readItems[id]
260 def getTitle(self, id):
261 return self.entries[id]["title"]
263 def getContentLink(self, id):
264 if self.entries[id].has_key("contentLink"):
265 return self.entries[id]["contentLink"]
266 return self.entries[id]["link"]
268 def getExternalLink(self, id):
269 return self.entries[id]["link"]
271 def getDate(self, id):
272 return self.entries[id]["date"]
274 def getDateTuple(self, id):
275 return self.entries[id]["dateTuple"]
277 def getUniqueId(self, index):
278 return self.ids[index]
280 def generateUniqueId(self, entry):
281 return getId(entry["date"] + entry["title"])
283 def getUpdateTime(self):
284 return self.updateTime
286 def getEntries(self):
292 def getNextId(self, id):
293 return self.ids[(self.ids.index(id)+1) % self.getNumberOfEntries()]
295 def getPreviousId(self, id):
296 return self.ids[(self.ids.index(id)-1) % self.getNumberOfEntries()]
298 def getNumberOfUnreadItems(self):
299 return self.countUnread
301 def getNumberOfEntries(self):
304 def getItem(self, id):
306 return self.entries[id]
310 def getContent(self, id):
311 if self.entries[id].has_key("contentLink"):
312 file = open(self.entries[id]["contentLink"])
313 content = file.read()
316 return self.entries[id]["content"]
318 def removeEntry(self, id):
320 if self.entries.has_key(id):
321 entry = self.entries[id]
322 if entry.has_key("images"):
323 for img in entry["images"]:
324 self.imageHandler.removeImage(self.uniqueId, img)
326 if entry.has_key("contentLink"):
328 remove(entry["contentLink"]) #os.remove
330 print "File not found for deletion: %s" % entry["contentLink"]
333 print "Entries has no %s key" % id
337 print "Ids has no %s key" % id
338 if self.readItems.has_key(id):
339 if self.readItems[id]==False:
340 self.countUnread = self.countUnread - 1
341 del self.readItems[id]
343 print "ReadItems has no %s key" % id
345 # print "Error removing entry %s" %id
347 def getArticle(self, entry):
348 #self.setEntryRead(id)
349 #entry = self.entries[id]
350 title = entry['title']
351 #content = entry.get('content', entry.get('summary_detail', {}))
352 content = entry["content"]
357 #text = '''<div style="color: black; background-color: white;">'''
358 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
359 text += "<html><head><title>" + title + "</title>"
360 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
361 #text += '<style> body {-webkit-user-select: none;} </style>'
362 text += '</head><body><div><a href=\"' + link + '\">' + title + "</a>"
363 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
364 text += "<BR /><BR />"
366 text += "</body></html>"
369 class ArchivedArticles(Feed):
370 def addArchivedArticle(self, title, link, updated_parsed, configdir):
372 entry["title"] = title
374 entry["summary"] = '<a href=\"' + link + '\">' + title + "</a>"
375 entry["updated_parsed"] = updated_parsed
376 entry["time"] = time.time()
378 (dateTuple, date) = self.extractDate(entry)
379 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
380 "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[], "downloaded":False, "time":entry["time"] }
381 id = self.generateUniqueId(tmpEntry)
382 self.entries[id] = tmpEntry
384 self.readItems[id] = False
385 self.countUnread = self.countUnread + 1
386 self.saveFeed(configdir)
387 self.saveUnread(configdir)
389 def updateFeed(self, configdir, expiryTime=24, proxy=None, imageCache=False):
390 for id in self.getIds():
391 entry = self.entries[id]
392 if not entry["downloaded"]:
394 f = urllib2.urlopen(entry["link"])
395 #entry["content"] = f.read()
398 soup = BeautifulSoup(html)
400 baseurl = ''.join(urlparse(entry["link"])[:-1])
402 filename = self.imageHandler.addImage(self.uniqueId, baseurl, img['src'])
403 #filename = configdir+self.uniqueId+".d/"+getId(img['src'])
404 #if not isfile(filename):
406 # if img['src'].startswith("http"):
407 # f = urllib2.urlopen(img['src'])
409 # f = urllib2.urlopen(baseurl+"/"+img['src'])
410 # #print baseurl+"/"+img['src']
412 # outf = open(filename, "w")
413 # outf.write(f.read())
417 # print "Could not download" + img['src']
419 entry["images"].append(filename)
420 entry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html"
421 file = open(entry["contentLink"], "w")
422 file.write(soup.prettify())
424 if len(entry["content"]) > 0:
425 entry["downloaded"] = True
426 entry["time"] = time.time()
427 self.setEntryUnread(id)
430 currentTime = time.time()
431 expiry = float(expiryTime) * 3600
432 if currentTime - entry["time"] > expiry:
433 if self.isEntryRead(id):
436 if currentTime - entry["time"] > 2*expiry:
438 self.updateTime = time.asctime()
439 self.saveFeed(configdir)
441 def getArticle(self, index):
442 self.setEntryRead(index)
443 content = self.getContent(index)
448 # Lists all the feeds in a dictionary, and expose the data
449 def __init__(self, configdir):
450 self.configdir = configdir
452 if isfile(self.configdir+"feeds.pickle"):
453 file = open(self.configdir+"feeds.pickle")
454 self.listOfFeeds = pickle.load(file)
457 self.listOfFeeds = {getId("Slashdot"):{"title":"Slashdot", "url":"http://rss.slashdot.org/Slashdot/slashdot", "unread":0, "updateTime":"Never"}, }
458 if isfile(self.configdir+"images.pickle"):
459 file = open(self.configdir+"images.pickle")
460 self.imageHandler = pickle.load(file)
463 self.imageHandler = ImageHandler(self.configdir)
464 if self.listOfFeeds.has_key("font"):
465 del self.listOfFeeds["font"]
466 if self.listOfFeeds.has_key("feedingit-order"):
467 self.sortedKeys = self.listOfFeeds["feedingit-order"]
469 self.sortedKeys = self.listOfFeeds.keys()
470 if "font" in self.sortedKeys:
471 self.sortedKeys.remove("font")
472 self.sortedKeys.sort(key=lambda obj: self.getFeedTitle(obj))
473 list = self.sortedKeys[:]
474 self.closeCurrentlyDisplayedFeed()
476 def addArchivedArticle(self, key, index):
477 feed = self.getFeed(key)
478 title = feed.getTitle(index)
479 link = feed.getExternalLink(index)
480 date = feed.getDateTuple(index)
481 if not self.listOfFeeds.has_key("ArchivedArticles"):
482 self.listOfFeeds["ArchivedArticles"] = {"title":"Archived Articles", "url":"", "unread":0, "updateTime":"Never"}
483 self.sortedKeys.append("ArchivedArticles")
484 #self.feeds["Archived Articles"] = ArchivedArticles("Archived Articles", "")
486 archFeed = self.getFeed("ArchivedArticles")
487 archFeed.addArchivedArticle(title, link, date, self.configdir)
488 self.listOfFeeds[key]["unread"] = archFeed.getNumberOfUnreadItems()
490 def loadFeed(self, key):
491 if isfile(self.configdir+key+".d/feed"):
492 file = open(self.configdir+key+".d/feed")
493 feed = pickle.load(file)
498 except AttributeError:
499 feed.uniqueId = getId(feed.name)
500 feed.imageHandler = self.imageHandler
501 #feed.reloadUnread(self.configdir)
504 title = self.listOfFeeds[key]["title"]
505 url = self.listOfFeeds[key]["url"]
506 if key == "ArchivedArticles":
507 feed = ArchivedArticles("ArchivedArticles", title, url, self.imageHandler)
509 feed = Feed(getId(title), title, url, self.imageHandler)
512 def updateFeeds(self, expiryTime=24, proxy=None, imageCache=False):
513 for key in self.getListOfFeeds():
514 feed = self.loadFeed(key)
515 feed.updateFeed(self.configdir, expiryTime, proxy, imageCache)
516 self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
517 self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
519 def updateFeed(self, key, expiryTime=24, proxy=None, imageCache=False):
520 feed = self.getFeed(key)
521 feed.updateFeed(self.configdir, expiryTime, proxy, imageCache)
522 self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
523 self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
525 def editFeed(self, key, title, url):
526 self.listOfFeeds[key]["title"] = title
527 self.listOfFeeds[key]["url"] = url
528 feed = self.loadFeed(key)
531 def getFeed(self, key):
532 feed = self.loadFeed(key)
533 feed.reloadUnread(self.configdir)
536 def getFeedUpdateTime(self, key):
537 #print self.listOfFeeds.has_key(key)
538 if not self.listOfFeeds[key].has_key("updateTime"):
539 self.listOfFeeds[key]["updateTime"] = "Never"
540 return self.listOfFeeds[key]["updateTime"]
542 def getFeedNumberOfUnreadItems(self, key):
543 if not self.listOfFeeds[key].has_key("unread"):
544 self.listOfFeeds[key]["unread"] = 0
545 return self.listOfFeeds[key]["unread"]
547 def updateUnread(self, key, unreadItems):
548 self.listOfFeeds[key]["unread"] = unreadItems
550 def getFeedTitle(self, key):
551 return self.listOfFeeds[key]["title"]
553 def getFeedUrl(self, key):
554 return self.listOfFeeds[key]["url"]
556 def getListOfFeeds(self):
557 return self.sortedKeys
559 def addFeed(self, title, url):
560 if not self.listOfFeeds.has_key(getId(title)):
561 self.listOfFeeds[getId(title)] = {"title":title, "url":url, "unread":0, "updateTime":"Never"}
562 self.sortedKeys.append(getId(title))
564 #self.feeds[getId(title)] = Feed(title, url)
569 def removeFeed(self, key):
570 del self.listOfFeeds[key]
571 self.sortedKeys.remove(key)
573 if isdir(self.configdir+key+".d/"):
574 rmtree(self.configdir+key+".d/")
577 def saveConfig(self):
578 self.listOfFeeds["feedingit-order"] = self.sortedKeys
579 file = open(self.configdir+"feeds.pickle", "w")
580 pickle.dump(self.listOfFeeds, file)
582 file = open(self.configdir+"images.pickle", "w")
583 pickle.dump(self.imageHandler, file)
586 def moveUp(self, key):
587 index = self.sortedKeys.index(key)
588 self.sortedKeys[index] = self.sortedKeys[index-1]
589 self.sortedKeys[index-1] = key
591 def moveDown(self, key):
592 index = self.sortedKeys.index(key)
593 index2 = (index+1)%len(self.sortedKeys)
594 self.sortedKeys[index] = self.sortedKeys[index2]
595 self.sortedKeys[index2] = key
597 def setCurrentlyDisplayedFeed(self, key):
598 self.currentlyDisplayedFeed = key
599 def closeCurrentlyDisplayedFeed(self):
600 self.currentlyDisplayedFeed = False
601 def getCurrentlyDisplayedFeed(self):
602 return self.currentlyDisplayedFeed
604 if __name__ == "__main__":
605 listing = Listing('/home/user/.feedingit/')
606 list = listing.getListOfFeeds()[:]
609 if key.startswith('d8'):
610 print listing.getFeedUpdateTime(key)