Added unread cyan colour/Fix unread tracking
[feedingit] / src / rss.py
1 #!/usr/bin/env python2.5
2
3
4 # Copyright (c) 2007-2008 INdT.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
9 #
10 #  This program is distributed in the hope that it will be useful,
11 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 #  GNU Lesser General Public License for more details.
14 #
15 #  You should have received a copy of the GNU Lesser General Public License
16 #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 #
18
19 # ============================================================================
20 # Name        : FeedingIt.py
21 # Author      : Yves Marcoz
22 # Version     : 0.5.0
23 # Description : Simple RSS Reader
24 # ============================================================================
25
26 from os.path import isfile
27 from os.path import isdir
28 from shutil import rmtree
29 from os import mkdir
30 import pickle
31 import md5
32 import feedparser
33 import time
34 import urllib2
35 from BeautifulSoup import BeautifulSoup
36 from urlparse import urlparse
37
38 #CONFIGDIR="/home/user/.feedingit/"
39
40 def getId(string):
41     return md5.new(string).hexdigest()
42
43 # Entry = {"title":XXX, "content":XXX, "date":XXX, "link":XXX, images = [] }
44
45 class ImageHandler:
46     def __init__(self, configdir):
47         self.configdir = configdir
48         self.images = {}
49         
50     def addImage(self, key, baseurl, url):
51         filename = self.configdir+key+".d/"+getId(url)
52         if not isfile(filename):
53             try:
54                 if url.startswith("http"):
55                     f = urllib2.urlopen(url)
56                 else:
57                     f = urllib2.urlopen(baseurl+"/"+url)
58                 outf = open(filename, "w")
59                 outf.write(f.read())
60                 f.close()
61                 outf.close()
62             except:
63                 print "Could not download" + url
64         if url in self.images:
65             self.images[url] += 1
66         else:
67             self.images[url] = 1
68         return "file://" + filename
69         
70     def removeImage(self, key, url):
71         filename = self.configdir+key+".d/"+getId(url)
72         self.images[url] -= 1
73         if self.images[url] == 0:
74             os.remove(filename)
75             del self.images[url]
76
77 class UnreadTracker:
78     def __init__(self):
79         self.readItems = {}
80         self.countUnread
81         
82     def setEntryUnread(self, id):
83         if self.readItems.has_key(id):
84             if self.readItems[id]==True:
85                 self.countUnread = self.countUnread + 1
86                 self.readItems[id] = False
87         else:
88             self.readItems[id] = False
89             self.countUnread = self.countUnread + 1
90     
91     def setEntryRead(self, id):
92         if self.readItems[id]==False:
93             self.countUnread = self.countUnread - 1
94             self.readItems[id] = True
95
96     def isRead(self, id):
97         return self.readItems[id]
98     
99     def removeEntry(self, id):
100         if self.readItems[id]==False:
101             self.countUnread = self.countUnread - 1
102         del self.readItems[id]
103
104 class Feed:
105     def __init__(self, uniqueId, name, url, imageHandler):
106         self.titles = []
107         self.entries = {}
108         self.ids = []
109         self.readItems = {}
110         self.name = name
111         self.url = url
112         self.countUnread = 0
113         self.updateTime = "Never"
114         self.uniqueId = uniqueId
115         self.imageHandler = imageHandler
116
117     def editFeed(self, url):
118         self.url = url
119
120     def saveFeed(self, configdir):
121         if not isdir(configdir+self.uniqueId+".d"):
122              mkdir(configdir+self.uniqueId+".d")
123         file = open(configdir+self.uniqueId+".d/feed", "w")
124         pickle.dump(self, file )
125         file.close()
126         self.saveUnread(configdir)
127         
128     def saveUnread(self, configdir):
129         if not isdir(configdir+self.uniqueId+".d"):
130             mkdir(configdir+self.uniqueId+".d")
131         file = open(configdir+self.uniqueId+".d/unread", "w")
132         pickle.dump(self.readItems, file )
133         file.close()
134
135     def reloadUnread(self, configdir):
136         try:
137             file = open(configdir+self.uniqueId+".d/unread", "r")
138             self.readItems = pickle.load( file )
139             file.close()
140             self.countUnread = 0
141             for id in self.getIds():
142                if self.readItems[id]==False:
143                   self.countUnread = self.countUnread + 1
144         except:
145             pass
146         return self.countUnread
147
148     def updateFeed(self, configdir, expiryTime=24):
149         # Expiry time is in hours
150         tmp=feedparser.parse(self.url)
151         # Check if the parse was succesful (number of entries > 0, else do nothing)
152         if len(tmp["entries"])>0:
153            #reversedEntries = self.getEntries()
154            #reversedEntries.reverse()
155            tmpEntries = {}
156            tmpIds = []
157            for entry in tmp["entries"]:
158                (dateTuple, date) = self.extractDate(entry)
159                tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
160                             "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[] }
161                id = self.generateUniqueId(tmpEntry)
162                tmpEntries[id] = tmpEntry
163                tmpIds.append(id)               
164            for entryId in self.getIds():
165                currentTime = time.time()
166                expiry = float(expiryTime) * 3600.
167                articleTime = time.mktime(self.entries[entryId]["dateTuple"])
168                if currentTime - articleTime < expiry:
169                    if not entryId in tmpIds:
170                        tmpEntries[entryId] = self.entries[entryId]
171                        tmpIds.append(entryId)
172                else:
173                     if (not self.isEntryRead(entryId)) and (currentTime - articleTime < 2*expiry):
174                         tmpEntries[entryId] = self.entries[entryId]
175                         tmpIds.append(entryId)
176                    
177            self.entries = tmpEntries
178            self.ids = tmpIds
179            self.countUnread = 0
180            # Initialize the new articles to unread
181            tmpReadItems = self.readItems
182            self.readItems = {}
183            for id in self.getIds():
184                if not tmpReadItems.has_key(id):
185                    self.readItems[id] = False
186                else:
187                    self.readItems[id] = tmpReadItems[id]
188                if self.readItems[id]==False:
189                   self.countUnread = self.countUnread + 1
190            del tmp
191            self.updateTime = time.asctime()
192            self.saveFeed(configdir)
193
194     def extractContent(self, entry):
195         content = ""
196         if entry.has_key('summary'):
197             content = entry.get('summary', '')
198         if entry.has_key('content'):
199             if len(entry.content[0].value) > len(content):
200                 content = entry.content[0].value
201         if content == "":
202             content = entry.get('description', '')
203         return content
204         
205     def extractDate(self, entry):
206         if entry.has_key("updated_parsed"):
207             date1 = entry["updated_parsed"]
208             date = time.strftime("%a, %d %b %Y %H:%M:%S",entry["updated_parsed"])
209         elif entry.has_key("published_parsed"):
210             date1 = entry["published_parsed"]
211             date = time.strftime("%a, %d %b %Y %H:%M:%S", entry["published_parsed"])
212         else:
213             date1= ""
214             date = ""
215         #print date1, date
216         return (date1, date)
217
218     def setEntryRead(self, id):
219         if self.readItems[id]==False:
220             self.countUnread = self.countUnread - 1
221             self.readItems[id] = True
222             
223     def setEntryUnread(self, id):
224         if self.readItems[id]==True:
225             self.countUnread = self.countUnread + 1
226             self.readItems[id] = False
227     
228     def isEntryRead(self, id):
229         return self.readItems[id]
230     
231     def getTitle(self, id):
232         return self.entries[id]["title"]
233     
234     def getLink(self, id):
235         if self.entries[id].has_key("contentLink"):
236             return self.entries[id]["contentLink"]
237         return self.entries[id]["link"]
238     
239     def getDate(self, id):
240         return self.entries[id]["date"]
241
242     def getDateTuple(self, id):
243         return self.entries[id]["dateTuple"]
244  
245     def getUniqueId(self, index):
246         return self.ids[index]
247     
248     def generateUniqueId(self, entry):
249         return getId(entry["date"] + entry["title"])
250     
251     def getUpdateTime(self):
252         return self.updateTime
253     
254     def getEntries(self):
255         return self.entries
256     
257     def getIds(self):
258         return self.ids
259     
260     def getNextId(self, id):
261         return self.ids[(self.ids.index(id)+1) % self.getNumberOfEntries()]
262     
263     def getPreviousId(self, id):
264         return self.ids[(self.ids.index(id)-1) % self.getNumberOfEntries()]
265     
266     def getNumberOfUnreadItems(self):
267         return self.countUnread
268     
269     def getNumberOfEntries(self):
270         return len(self.ids)
271     
272     def getItem(self, id):
273         try:
274             return self.entries[id]
275         except:
276             return []
277     
278     def getContent(self, id):
279         if self.entries[id].has_key("contentLink"):
280             file = open(self.entries[id]["contentLink"])
281             content = file.read()
282             file.close()
283             return content
284         return self.entries[id]["content"]
285     
286     def removeEntry(self, id):
287         entry = self.entries[id]
288         for img in entry["images"]:
289             self.imageHandler.removeImage(self.uniqueId, img)
290         if entry.has_key["contentLink"]:
291             os.remove(entry["contentLink"])
292         self.entries.remove(id)
293         self.ids.remove(id)
294         if self.readItems[id]==False:
295             self.countUnread = self.countUnread - 1
296         self.readItems.remove(id)
297     
298     def getArticle(self, id):
299         self.setEntryRead(id)
300         entry = self.entries[id]
301         title = entry['title']
302         #content = entry.get('content', entry.get('summary_detail', {}))
303         content = entry["content"]
304
305         link = entry['link']
306         date = entry["date"]
307
308         #text = '''<div style="color: black; background-color: white;">'''
309         text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
310         text += "<html><head><title>" + title + "</title>"
311         text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
312         #text += '<style> body {-webkit-user-select: none;} </style>'
313         text += '</head><body><div><a href=\"' + link + '\">' + title + "</a>"
314         text += "<BR /><small><i>Date: " + date + "</i></small></div>"
315         text += "<BR /><BR />"
316         text += content
317         text += "</body></html>"
318         return text
319         
320 class ArchivedArticles(Feed):    
321     def addArchivedArticle(self, title, link, updated_parsed, configdir):
322         entry = {}
323         entry["title"] = title
324         entry["link"] = link
325         entry["summary"] = '<a href=\"' + link + '\">' + title + "</a>"
326         entry["updated_parsed"] = updated_parsed
327         entry["time"] = time.time()
328         #print entry
329         (dateTuple, date) = self.extractDate(entry)
330         tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
331                             "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[], "downloaded":False, "time":entry["time"] }
332         id = self.generateUniqueId(tmpEntry)
333         self.entries[id] = tmpEntry
334         self.ids.append(id)  
335         self.readItems[id] = False
336         self.countUnread = self.countUnread + 1
337         self.saveFeed(configdir)
338         self.saveUnread(configdir)
339         
340     def updateFeed(self, configdir, expiryTime=24):
341         for id in self.getIds():
342             entry = self.entries[id]
343             if not entry["downloaded"]:
344                 #try:
345                     f = urllib2.urlopen(entry["link"])
346                     #entry["content"] = f.read()
347                     html = f.read()
348                     f.close()
349                     soup = BeautifulSoup(html)
350                     images = soup.body('img')
351                     baseurl = ''.join(urlparse(entry["link"])[:-1])
352                     for img in images:
353                         filename = self.imageHandler.addImage(self.uniqueId, baseurl, img['src'])
354                         #filename = configdir+self.uniqueId+".d/"+getId(img['src'])
355                         #if not isfile(filename):
356                         #    try:
357                         #        if img['src'].startswith("http"):
358                         #            f = urllib2.urlopen(img['src'])
359                         #        else:
360                         #            f = urllib2.urlopen(baseurl+"/"+img['src'])
361                         #            #print baseurl+"/"+img['src']
362                         #        print filename
363                         #        outf = open(filename, "w")
364                         #        outf.write(f.read())
365                         #        f.close()
366                         #        outf.close()
367                         #    except:
368                         #        print "Could not download" + img['src']
369                         img['src']=filename
370                         entry["images"].append(filename)
371                     entry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html"
372                     file = open(entry["contentLink"], "w")
373                     file.write(soup.prettify())
374                     file.close()
375                     if len(entry["content"]) > 0:
376                         entry["downloaded"] = True
377                         entry["time"] = time.time()
378                         self.setEntryUnread(id)
379                 #except:
380                 #    pass
381             currentTime = time.time()
382             expiry = float(expiryTime) * 3600
383             if currentTime - entry["time"] > expiry:
384                 if self.isEntryRead(id):
385                     self.removeEntry(id)
386                 else:
387                     if currentTime - entry["time"] > 2*expiry:
388                         self.removeEntry(id)
389         self.updateTime = time.asctime()
390         self.saveFeed(configdir)
391
392     def getArticle(self, index):
393         self.setEntryRead(index)
394         content = self.getContent(index)
395         return content
396
397
398 class Listing:
399     # Lists all the feeds in a dictionary, and expose the data
400     def __init__(self, configdir):
401         self.configdir = configdir
402         #self.feeds = {}
403         if isfile(self.configdir+"feeds.pickle"):
404             file = open(self.configdir+"feeds.pickle")
405             self.listOfFeeds = pickle.load(file)
406             file.close()
407         else:
408             self.listOfFeeds = {getId("Slashdot"):{"title":"Slashdot", "url":"http://rss.slashdot.org/Slashdot/slashdot", "unread":0, "updateTime":"Never"}, }
409         if isfile(self.configdir+"images.pickle"):
410             file = open(self.configdir+"images.pickle")
411             self.imageHandler = pickle.load(file)
412             file.close()
413         else:
414             self.imageHandler = ImageHandler(self.configdir)
415         if self.listOfFeeds.has_key("font"):
416             del self.listOfFeeds["font"]
417         if self.listOfFeeds.has_key("feedingit-order"):
418             self.sortedKeys = self.listOfFeeds["feedingit-order"]
419         else:
420             self.sortedKeys = self.listOfFeeds.keys()
421             if "font" in self.sortedKeys:
422                 self.sortedKeys.remove("font")
423             self.sortedKeys.sort(key=lambda obj: self.getFeedTitle(obj))
424         list = self.sortedKeys[:]
425         #for key in list:
426         #    try:
427         #        self.loadFeed(key)
428         #    except:
429                 #import traceback
430                 #if key.startswith('d8'):
431                 #traceback.print_exc()
432         #        self.sortedKeys.remove(key)
433             #print key
434                 #print key in self.sortedKeys
435         #print "d8eb3f07572892a7b5ed9c81c5bb21a2" in self.sortedKeys
436         #print self.listOfFeeds["d8eb3f07572892a7b5ed9c81c5bb21a2"]
437         self.closeCurrentlyDisplayedFeed()
438         #self.saveConfig()
439
440     def addArchivedArticle(self, key, index):
441         feed = self.getFeed(key)
442         title = feed.getTitle(index)
443         link = feed.getLink(index)
444         date = feed.getDateTuple(index)
445         if not self.listOfFeeds.has_key("ArchivedArticles"):
446             self.listOfFeeds["ArchivedArticles"] = {"title":"Archived Articles", "url":"", "unread":0, "updateTime":"Never"}
447             self.sortedKeys.append("ArchivedArticles")
448             #self.feeds["Archived Articles"] = ArchivedArticles("Archived Articles", "")
449             self.saveConfig()
450         archFeed = self.getFeed("ArchivedArticles")
451         archFeed.addArchivedArticle(title, link, date, self.configdir)
452         self.listOfFeeds[key]["unread"] = archFeed.getNumberOfUnreadItems()
453         
454     def loadFeed(self, key):
455             if isfile(self.configdir+key+".d/feed"):
456                 file = open(self.configdir+key+".d/feed")
457                 feed = pickle.load(file)
458                 file.close()
459                 try:
460                     feed.uniqueId
461                     feed.imageHandler
462                 except AttributeError:
463                     feed.uniqueId = getId(feed.name)
464                     feed.imageHandler = self.imageHandler
465                 #feed.reloadUnread(self.configdir)
466             else:
467                 #print key
468                 title = self.listOfFeeds[key]["title"]
469                 url = self.listOfFeeds[key]["url"]
470                 if key == "ArchivedArticles":
471                     feed = ArchivedArticles("ArchivedArticles", title, url, self.imageHandler)
472                 else:
473                     feed = Feed(getId(title), title, url, self.imageHandler)
474             return feed
475         
476     def updateFeeds(self, expiryTime=24):
477         for key in self.getListOfFeeds():
478             feed = self.loadFeed(key)
479             feed.updateFeed(self.configdir, expiryTime)
480             self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
481             self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
482             
483     def updateFeed(self, key, expiryTime=24):
484         feed = self.getFeed(key)
485         feed.updateFeed(self.configdir, expiryTime)
486         self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
487         self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
488         
489     def editFeed(self, key, title, url):
490         self.listOfFeeds[key]["title"] = title
491         self.listOfFeeds[key]["url"] = url
492         feed = self.loadFeed(key)
493         feed.editFeed(url)
494
495     def getFeed(self, key):
496         feed = self.loadFeed(key)
497         feed.reloadUnread(self.configdir)
498         return feed
499     
500     def getFeedUpdateTime(self, key):
501         #print self.listOfFeeds.has_key(key)
502         if not self.listOfFeeds[key].has_key("updateTime"):
503             self.listOfFeeds[key]["updateTime"] = "Never"
504         return self.listOfFeeds[key]["updateTime"]
505     
506     def getFeedNumberOfUnreadItems(self, key):
507         if not self.listOfFeeds[key].has_key("unread"):
508             self.listOfFeeds[key]["unread"] = 0
509         return self.listOfFeeds[key]["unread"]
510
511     def updateUnread(self, key, unreadItems):
512         self.listOfFeeds[key]["unread"] = unreadItems
513    
514     def getFeedTitle(self, key):
515         return self.listOfFeeds[key]["title"]
516     
517     def getFeedUrl(self, key):
518         return self.listOfFeeds[key]["url"]
519     
520     def getListOfFeeds(self):
521         return self.sortedKeys
522     
523     #def getNumberOfUnreadItems(self, key):
524     #    if self.listOfFeeds.has_key("unread"):
525     #       return self.listOfFeeds[key]["unread"]
526     #    else:
527     #       return 0
528     
529     def addFeed(self, title, url):
530         if not self.listOfFeeds.has_key(getId(title)):
531             self.listOfFeeds[getId(title)] = {"title":title, "url":url, "unread":0, "updateTime":"Never"}
532             self.sortedKeys.append(getId(title))
533             self.saveConfig()
534             #self.feeds[getId(title)] = Feed(title, url)
535             return True
536         else:
537             return False
538         
539     def removeFeed(self, key):
540         del self.listOfFeeds[key]
541         self.sortedKeys.remove(key)
542         #del self.feeds[key]
543         if isdir(self.configdir+key+".d/"):
544            rmtree(self.configdir+key+".d/")
545         self.saveConfig()
546     
547     def saveConfig(self):
548         self.listOfFeeds["feedingit-order"] = self.sortedKeys
549         file = open(self.configdir+"feeds.pickle", "w")
550         pickle.dump(self.listOfFeeds, file)
551         file.close()
552         file = open(self.configdir+"images.pickle", "w")
553         pickle.dump(self.imageHandler, file)
554         file.close()
555         
556     def moveUp(self, key):
557         index = self.sortedKeys.index(key)
558         self.sortedKeys[index] = self.sortedKeys[index-1]
559         self.sortedKeys[index-1] = key
560         
561     def moveDown(self, key):
562         index = self.sortedKeys.index(key)
563         index2 = (index+1)%len(self.sortedKeys)
564         self.sortedKeys[index] = self.sortedKeys[index2]
565         self.sortedKeys[index2] = key
566         
567     def setCurrentlyDisplayedFeed(self, key):
568         self.currentlyDisplayedFeed = key
569     def closeCurrentlyDisplayedFeed(self):
570         self.currentlyDisplayedFeed = False
571     def getCurrentlyDisplayedFeed(self):
572         return self.currentlyDisplayedFeed
573     
574 if __name__ == "__main__":
575     listing = Listing('/home/user/.feedingit/')
576     list = listing.getListOfFeeds()[:]
577         #list.reverse()
578     for key in list:
579         if key.startswith('d8'):
580             print listing.getFeedUpdateTime(key)