Added destroy for widget when removed from homescreen
[feedingit] / src / rss.py
1 #!/usr/bin/env python2.5
2
3
4 # Copyright (c) 2007-2008 INdT.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
9 #
10 #  This program is distributed in the hope that it will be useful,
11 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 #  GNU Lesser General Public License for more details.
14 #
15 #  You should have received a copy of the GNU Lesser General Public License
16 #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 #
18
19 # ============================================================================
20 # Name        : FeedingIt.py
21 # Author      : Yves Marcoz
22 # Version     : 0.5.4
23 # Description : Simple RSS Reader
24 # ============================================================================
25
26 from os.path import isfile, isdir
27 from shutil import rmtree
28 from os import mkdir, remove
29 import pickle
30 import md5
31 import feedparser
32 import time
33 import urllib2
34 from BeautifulSoup import BeautifulSoup
35 from urlparse import urljoin
36
37 #CONFIGDIR="/home/user/.feedingit/"
38
39 def getId(string):
40     return md5.new(string).hexdigest()
41
42 #def getProxy():
43 #    import gconf
44 #    if gconf.client_get_default().get_bool('/system/http_proxy/use_http_proxy'):
45 #        port = gconf.client_get_default().get_int('/system/http_proxy/port')
46 #        http = gconf.client_get_default().get_string('/system/http_proxy/host')
47 #        proxy = proxy = urllib2.ProxyHandler( {"http":"http://%s:%s/"% (http,port)} )
48 #        return (True, proxy)
49 #    return (False, None)
50
51 # Enable proxy support for images and ArchivedArticles
52 #(proxy_support, proxy) = getProxy()
53 #if proxy_support:
54 #    opener = urllib2.build_opener(proxy)
55 #    urllib2.install_opener(opener)
56
57 # Entry = {"title":XXX, "content":XXX, "date":XXX, "link":XXX, images = [] }
58
59 class ImageHandler:
60     def __init__(self, configdir):
61         self.configdir = configdir
62         self.images = {}
63         
64     def addImage(self, key, baseurl, url):
65         filename = self.configdir+key+".d/"+getId(url)
66         if not isfile(filename):
67             try:
68                 #if url.startswith("http"):
69                 #    f = urllib2.urlopen(url)
70                 #else:
71                 f = urllib2.urlopen(urljoin(baseurl,url))
72                 outf = open(filename, "w")
73                 outf.write(f.read())
74                 f.close()
75                 outf.close()
76             except:
77                 print "Could not download " + url
78         else:
79             open(filename,"a").close()  # "Touch" the file
80         if filename in self.images:
81             self.images[filename] += 1
82         else:
83             self.images[filename] = 1
84         return filename
85         
86     def removeImage(self, key, filename):
87         #filename = self.configdir+key+".d/"+getId(url)
88         try:
89             self.images[filename] -= 1
90         except:
91             self.images[filename] = 0 #Delete image
92         try:
93             if self.images[filename] == 0:
94                 remove(filename) #os.remove
95                 del self.images[filename]
96         except:
97             print "Could not remove image %s" % filename
98
99 class Feed:
100     def __init__(self, uniqueId, name, url, imageHandler):
101         self.titles = []
102         self.entries = {}
103         self.ids = []
104         self.readItems = {}
105         self.name = name
106         self.url = url
107         self.countUnread = 0
108         self.updateTime = "Never"
109         self.uniqueId = uniqueId
110         self.imageHandler = imageHandler
111
112     def editFeed(self, url):
113         self.url = url
114
115     def saveFeed(self, configdir):
116         if not isdir(configdir+self.uniqueId+".d"):
117              mkdir(configdir+self.uniqueId+".d")
118         file = open(configdir+self.uniqueId+".d/feed", "w")
119         pickle.dump(self, file )
120         file.close()
121         self.saveUnread(configdir)
122         
123     def saveUnread(self, configdir):
124         if not isdir(configdir+self.uniqueId+".d"):
125             mkdir(configdir+self.uniqueId+".d")
126         file = open(configdir+self.uniqueId+".d/unread", "w")
127         pickle.dump(self.readItems, file )
128         file.close()
129
130     def reloadUnread(self, configdir):
131         try:
132             file = open(configdir+self.uniqueId+".d/unread", "r")
133             self.readItems = pickle.load( file )
134             file.close()
135             self.countUnread = 0
136             for id in self.getIds():
137                if self.readItems[id]==False:
138                   self.countUnread = self.countUnread + 1
139         except:
140             pass
141         return self.countUnread
142
143     def updateFeed(self, configdir, expiryTime=24, proxy=None, imageCache=False):
144         # Expiry time is in hours
145         if proxy == None:
146             tmp=feedparser.parse(self.url)
147         else:
148             tmp=feedparser.parse(self.url, handlers = [proxy])
149         expiry = float(expiryTime) * 3600.
150         # Check if the parse was succesful (number of entries > 0, else do nothing)
151         if len(tmp["entries"])>0:
152            try:
153                f = urllib2.urlopen(urljoin(tmp["feed"]["link"],"/favicon.ico"))
154                data = f.read()
155                f.close()
156                outf = open(configdir+self.uniqueId+".d/favicon.ico", "w")
157                outf.write(data)
158                outf.close()
159                del data
160            except:
161                 import traceback
162                 traceback.print_exc()
163
164
165            #reversedEntries = self.getEntries()
166            #reversedEntries.reverse()
167            if not isdir(configdir+self.uniqueId+".d"):
168                mkdir(configdir+self.uniqueId+".d")
169            currentTime = time.time()
170            tmpEntries = {}
171            tmpIds = []
172            for entry in tmp["entries"]:
173                (dateTuple, date) = self.extractDate(entry)
174                try:
175                    entry["title"]
176                except:
177                    entry["title"] = "No Title"
178                try:
179                    entry["link"]
180                except:
181                    entry["link"] = ""
182                tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
183                             "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[] }
184                id = self.generateUniqueId(tmpEntry)
185                
186                #articleTime = time.mktime(self.entries[id]["dateTuple"])
187                if not id in self.ids:
188                    soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
189                    images = soup('img')
190                    baseurl = tmpEntry["link"]
191                    if imageCache:
192                       for img in images:
193                           try:
194                             filename = self.imageHandler.addImage(self.uniqueId, baseurl, img['src'])
195                             img['src']=filename
196                             tmpEntry["images"].append(filename)
197                           except:
198                               print "Error downloading image %s" % img
199                    tmpEntry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html"
200                    file = open(tmpEntry["contentLink"], "w")
201                    file.write(soup.prettify())
202                    file.close()
203                    tmpEntries[id] = tmpEntry
204                    tmpIds.append(id)
205                    if id not in self.readItems:
206                        self.readItems[id] = False
207                else:
208                     tmpEntries[id] = self.entries[id]
209                     tmpIds.append(id)
210             
211            oldIds = self.ids[:]
212            for entryId in oldIds:
213                 if not entryId in tmpIds:
214                     try:
215                         articleTime = time.mktime(self.entries[entryId]["dateTuple"])
216                         if (currentTime - articleTime > 2*expiry):
217                             self.removeEntry(entryId)
218                             continue
219                         if (currentTime - articleTime > expiry) and (self.isEntryRead(entryId)):
220                             # Entry is over 24 hours, and already read
221                             self.removeEntry(entryId)
222                             continue
223                         tmpEntries[entryId] = self.entries[entryId]
224                         tmpIds.append(entryId)
225                     except:
226                         print "Error purging old articles %s" % entryId
227                         self.removeEntry(entryId)
228
229            self.entries = tmpEntries
230            self.ids = tmpIds
231            tmpUnread = 0
232            
233
234            ids = self.ids[:]
235            for id in ids:
236                if not self.readItems.has_key(id):
237                    self.readItems[id] = False
238                if self.readItems[id]==False:
239                   tmpUnread = tmpUnread + 1
240            keys = self.readItems.keys()
241            for id in keys:
242                if not id in self.ids:
243                    del self.readItems[id]
244            del tmp
245            self.countUnread = tmpUnread
246            self.updateTime = time.asctime()
247            self.saveFeed(configdir)
248
249     def extractContent(self, entry):
250         content = ""
251         if entry.has_key('summary'):
252             content = entry.get('summary', '')
253         if entry.has_key('content'):
254             if len(entry.content[0].value) > len(content):
255                 content = entry.content[0].value
256         if content == "":
257             content = entry.get('description', '')
258         return content
259         
260     def extractDate(self, entry):
261         if entry.has_key("updated_parsed"):
262             date1 = entry["updated_parsed"]
263             date = time.strftime("%a, %d %b %Y %H:%M:%S",entry["updated_parsed"])
264         elif entry.has_key("published_parsed"):
265             date1 = entry["published_parsed"]
266             date = time.strftime("%a, %d %b %Y %H:%M:%S", entry["published_parsed"])
267         else:
268             date1= ""
269             date = ""
270         #print date1, date
271         return (date1, date)
272
273     def setEntryRead(self, id):
274         if self.readItems[id]==False:
275             self.countUnread = self.countUnread - 1
276             self.readItems[id] = True
277             
278     def setEntryUnread(self, id):
279         if self.readItems[id]==True:
280             self.countUnread = self.countUnread + 1
281             self.readItems[id] = False
282     
283     def isEntryRead(self, id):
284         return self.readItems[id]
285     
286     def getTitle(self, id):
287         return self.entries[id]["title"]
288     
289     def getContentLink(self, id):
290         if self.entries[id].has_key("contentLink"):
291             return self.entries[id]["contentLink"]
292         return self.entries[id]["link"]
293     
294     def getExternalLink(self, id):
295         return self.entries[id]["link"]
296     
297     def getDate(self, id):
298         return self.entries[id]["date"]
299
300     def getDateTuple(self, id):
301         return self.entries[id]["dateTuple"]
302  
303     def getUniqueId(self, index):
304         return self.ids[index]
305     
306     def generateUniqueId(self, entry):
307         return getId(entry["date"] + entry["title"])
308     
309     def getUpdateTime(self):
310         return self.updateTime
311     
312     def getEntries(self):
313         return self.entries
314     
315     def getIds(self):
316         return self.ids
317     
318     def getNextId(self, id):
319         return self.ids[(self.ids.index(id)+1) % self.getNumberOfEntries()]
320     
321     def getPreviousId(self, id):
322         return self.ids[(self.ids.index(id)-1) % self.getNumberOfEntries()]
323     
324     def getNumberOfUnreadItems(self):
325         return self.countUnread
326     
327     def getNumberOfEntries(self):
328         return len(self.ids)
329     
330     def getItem(self, id):
331         try:
332             return self.entries[id]
333         except:
334             return []
335     
336     def getContent(self, id):
337         if self.entries[id].has_key("contentLink"):
338             file = open(self.entries[id]["contentLink"])
339             content = file.read()
340             file.close()
341             return content
342         return self.entries[id]["content"]
343     
344     def removeEntry(self, id):
345         #try:
346         if self.entries.has_key(id):
347             entry = self.entries[id]
348             if entry.has_key("images"):
349                 for img in entry["images"]:
350                     self.imageHandler.removeImage(self.uniqueId, img)
351             
352             if entry.has_key("contentLink"):
353                 try:
354                     remove(entry["contentLink"])  #os.remove
355                 except:
356                     print "File not found for deletion: %s" % entry["contentLink"]
357             del self.entries[id]
358         else:
359             print "Entries has no %s key" % id
360         if id in self.ids:
361             self.ids.remove(id)
362         else:
363             print "Ids has no %s key" % id
364         if self.readItems.has_key(id):
365             if self.readItems[id]==False:
366                 self.countUnread = self.countUnread - 1
367             del self.readItems[id]
368         else:
369             print "ReadItems has no %s key" % id
370         #except:
371         #    print "Error removing entry %s" %id
372     
373     def getArticle(self, entry):
374         #self.setEntryRead(id)
375         #entry = self.entries[id]
376         title = entry['title']
377         #content = entry.get('content', entry.get('summary_detail', {}))
378         content = entry["content"]
379
380         link = entry['link']
381         date = entry["date"]
382
383         #text = '''<div style="color: black; background-color: white;">'''
384         text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
385         text += "<html><head><title>" + title + "</title>"
386         text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
387         #text += '<style> body {-webkit-user-select: none;} </style>'
388         text += '</head><body><div><a href=\"' + link + '\">' + title + "</a>"
389         text += "<BR /><small><i>Date: " + date + "</i></small></div>"
390         text += "<BR /><BR />"
391         text += content
392         text += "</body></html>"
393         return text
394         
395 class ArchivedArticles(Feed):    
396     def addArchivedArticle(self, title, link, updated_parsed, configdir):
397         entry = {}
398         entry["title"] = title
399         entry["link"] = link
400         entry["summary"] = '<a href=\"' + link + '\">' + title + "</a>"
401         entry["updated_parsed"] = updated_parsed
402         entry["time"] = time.time()
403         #print entry
404         (dateTuple, date) = self.extractDate(entry)
405         tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
406                             "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[], "downloaded":False, "time":entry["time"] }
407         id = self.generateUniqueId(tmpEntry)
408         self.entries[id] = tmpEntry
409         self.ids.append(id)  
410         self.readItems[id] = False
411         self.countUnread = self.countUnread + 1
412         self.saveFeed(configdir)
413         self.saveUnread(configdir)
414         
415     def updateFeed(self, configdir, expiryTime=24, proxy=None, imageCache=False):
416         for id in self.getIds():
417             entry = self.entries[id]
418             if not entry["downloaded"]:
419                 #try:
420                     f = urllib2.urlopen(entry["link"])
421                     #entry["content"] = f.read()
422                     html = f.read()
423                     f.close()
424                     soup = BeautifulSoup(html)
425                     images = soup('img')
426                     baseurl = entry["link"]
427                     for img in images:
428                         filename = self.imageHandler.addImage(self.uniqueId, baseurl, img['src'])
429                         img['src']=filename
430                         entry["images"].append(filename)
431                     entry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html"
432                     file = open(entry["contentLink"], "w")
433                     file.write(soup.prettify())
434                     file.close()
435                     if len(entry["content"]) > 0:
436                         entry["downloaded"] = True
437                         entry["time"] = time.time()
438                         self.setEntryUnread(id)
439                 #except:
440                 #    pass
441             #currentTime = time.time()
442             #expiry = float(expiryTime) * 3600
443             #if currentTime - entry["time"] > expiry:
444             #    if self.isEntryRead(id):
445             #        self.removeEntry(id)
446             #    else:
447             #        if currentTime - entry["time"] > 2*expiry:
448             #            self.removeEntry(id)
449         self.updateTime = time.asctime()
450         self.saveFeed(configdir)
451         
452     def purgeReadArticles(self):
453         ids = self.getIds()
454         for id in ids:
455             entry = self.entries[id]
456             if self.isEntryRead(id):
457                 self.removeEntry(id)
458                 
459     def removeArticle(self, id):
460         self.removeEntry(id)
461
462     def getArticle(self, index):
463         self.setEntryRead(index)
464         content = self.getContent(index)
465         return content
466
467
468 class Listing:
469     # Lists all the feeds in a dictionary, and expose the data
470     def __init__(self, configdir):
471         self.configdir = configdir
472         #self.feeds = {}
473         if isfile(self.configdir+"feeds.pickle"):
474             file = open(self.configdir+"feeds.pickle")
475             self.listOfFeeds = pickle.load(file)
476             file.close()
477         else:
478             self.listOfFeeds = {getId("Slashdot"):{"title":"Slashdot", "url":"http://rss.slashdot.org/Slashdot/slashdot", "unread":0, "updateTime":"Never"}, }
479         try:
480             file = open(self.configdir+"images.pickle")
481             self.imageHandler = pickle.load(file)
482             file.close()
483         except:
484             self.imageHandler = ImageHandler(self.configdir)
485         if self.listOfFeeds.has_key("font"):
486             del self.listOfFeeds["font"]
487         if self.listOfFeeds.has_key("feedingit-order"):
488             self.sortedKeys = self.listOfFeeds["feedingit-order"]
489         else:
490             self.sortedKeys = self.listOfFeeds.keys()
491             if "font" in self.sortedKeys:
492                 self.sortedKeys.remove("font")
493             self.sortedKeys.sort(key=lambda obj: self.getFeedTitle(obj))
494         list = self.sortedKeys[:]
495         #self.closeCurrentlyDisplayedFeed()
496
497     def addArchivedArticle(self, key, index):
498         feed = self.getFeed(key)
499         title = feed.getTitle(index)
500         link = feed.getExternalLink(index)
501         date = feed.getDateTuple(index)
502         if not self.listOfFeeds.has_key("ArchivedArticles"):
503             self.listOfFeeds["ArchivedArticles"] = {"title":"Archived Articles", "url":"", "unread":0, "updateTime":"Never"}
504             self.sortedKeys.append("ArchivedArticles")
505             #self.feeds["Archived Articles"] = ArchivedArticles("Archived Articles", "")
506             self.saveConfig()
507         archFeed = self.getFeed("ArchivedArticles")
508         archFeed.addArchivedArticle(title, link, date, self.configdir)
509         self.listOfFeeds[key]["unread"] = archFeed.getNumberOfUnreadItems()
510         
511     def loadFeed(self, key):
512             if isfile(self.configdir+key+".d/feed"):
513                 file = open(self.configdir+key+".d/feed")
514                 feed = pickle.load(file)
515                 file.close()
516                 try:
517                     feed.uniqueId
518                     feed.imageHandler
519                 except AttributeError:
520                     feed.uniqueId = getId(feed.name)
521                     feed.imageHandler = self.imageHandler
522                 #feed.reloadUnread(self.configdir)
523             else:
524                 #print key
525                 title = self.listOfFeeds[key]["title"]
526                 url = self.listOfFeeds[key]["url"]
527                 if key == "ArchivedArticles":
528                     feed = ArchivedArticles("ArchivedArticles", title, url, self.imageHandler)
529                 else:
530                     feed = Feed(getId(title), title, url, self.imageHandler)
531             return feed
532         
533     def updateFeeds(self, expiryTime=24, proxy=None, imageCache=False):
534         for key in self.getListOfFeeds():
535             feed = self.loadFeed(key)
536             feed.updateFeed(self.configdir, expiryTime, proxy, imageCache)
537             self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
538             self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
539             
540     def updateFeed(self, key, expiryTime=24, proxy=None, imageCache=False):
541         feed = self.getFeed(key)
542         feed.updateFeed(self.configdir, expiryTime, proxy, imageCache)
543         self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
544         self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
545         
546     def editFeed(self, key, title, url):
547         self.listOfFeeds[key]["title"] = title
548         self.listOfFeeds[key]["url"] = url
549         feed = self.loadFeed(key)
550         feed.editFeed(url)
551
552     def getFeed(self, key):
553         try:
554             feed = self.loadFeed(key)
555             feed.reloadUnread(self.configdir)
556         except:
557             # If the feed file gets corrupted, we need to reset the feed.
558             import dbus
559             bus = dbus.SessionBus()
560             remote_object = bus.get_object("org.freedesktop.Notifications", # Connection name
561                                "/org/freedesktop/Notifications" # Object's path
562                               )
563             iface = dbus.Interface(remote_object, 'org.freedesktop.Notifications')
564             iface.SystemNoteInfoprint("Error opening feed %s, it has been reset." % self.getFeedTitle(key))
565             if isdir(self.configdir+key+".d/"):
566                 rmtree(self.configdir+key+".d/")
567             feed = self.loadFeed(key)
568         return feed
569     
570     def getFeedUpdateTime(self, key):
571         #print self.listOfFeeds.has_key(key)
572         if not self.listOfFeeds[key].has_key("updateTime"):
573             self.listOfFeeds[key]["updateTime"] = "Never"
574         return self.listOfFeeds[key]["updateTime"]
575     
576     def getFeedNumberOfUnreadItems(self, key):
577         if not self.listOfFeeds[key].has_key("unread"):
578             self.listOfFeeds[key]["unread"] = 0
579         return self.listOfFeeds[key]["unread"]
580
581     def updateUnread(self, key, unreadItems):
582         self.listOfFeeds[key]["unread"] = unreadItems
583    
584     def getFeedTitle(self, key):
585         return self.listOfFeeds[key]["title"]
586     
587     def getFeedUrl(self, key):
588         return self.listOfFeeds[key]["url"]
589     
590     def getListOfFeeds(self):
591         return self.sortedKeys
592     
593     def getFavicon(self, key):
594         filename = self.configdir+key+".d/favicon.ico"
595         if isfile(filename):
596             return filename
597         else:
598             return False
599     
600     def addFeed(self, title, url):
601         if not self.listOfFeeds.has_key(getId(title)):
602             self.listOfFeeds[getId(title)] = {"title":title, "url":url, "unread":0, "updateTime":"Never"}
603             self.sortedKeys.append(getId(title))
604             self.saveConfig()
605             #self.feeds[getId(title)] = Feed(title, url)
606             return True
607         else:
608             return False
609         
610     def removeFeed(self, key):
611         del self.listOfFeeds[key]
612         self.sortedKeys.remove(key)
613         #del self.feeds[key]
614         if isdir(self.configdir+key+".d/"):
615            rmtree(self.configdir+key+".d/")
616         self.saveConfig()
617     
618     def saveConfig(self):
619         self.listOfFeeds["feedingit-order"] = self.sortedKeys
620         file = open(self.configdir+"feeds.pickle", "w")
621         pickle.dump(self.listOfFeeds, file)
622         file.close()
623         file = open(self.configdir+"images.pickle", "w")
624         pickle.dump(self.imageHandler, file)
625         file.close()
626         
627     def moveUp(self, key):
628         index = self.sortedKeys.index(key)
629         self.sortedKeys[index] = self.sortedKeys[index-1]
630         self.sortedKeys[index-1] = key
631         
632     def moveDown(self, key):
633         index = self.sortedKeys.index(key)
634         index2 = (index+1)%len(self.sortedKeys)
635         self.sortedKeys[index] = self.sortedKeys[index2]
636         self.sortedKeys[index2] = key
637     
638 if __name__ == "__main__":
639     listing = Listing('/home/user/.feedingit/')
640     list = listing.getListOfFeeds()[:]
641         #list.reverse()
642     for key in list:
643         if key.startswith('d8'):
644             print listing.getFeedUpdateTime(key)