Feed/Listing refactored
[feedingit] / src / rss.py
1 #!/usr/bin/env python2.5
2
3
4 # Copyright (c) 2007-2008 INdT.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
9 #
10 #  This program is distributed in the hope that it will be useful,
11 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 #  GNU Lesser General Public License for more details.
14 #
15 #  You should have received a copy of the GNU Lesser General Public License
16 #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 #
18
19 # ============================================================================
20 # Name        : FeedingIt.py
21 # Author      : Yves Marcoz
22 # Version     : 0.5.0
23 # Description : Simple RSS Reader
24 # ============================================================================
25
26 from os.path import isfile
27 from os.path import isdir
28 from shutil import rmtree
29 from os import mkdir
30 import pickle
31 import md5
32 import feedparser
33 import time
34 import urllib2
35
36 #CONFIGDIR="/home/user/.feedingit/"
37
38 def getId(string):
39     return md5.new(string).hexdigest()
40
41 class Entry:
42     def __init__(self, title, content, date, link):
43         self.title = title
44         self.content = content
45         self.date = date
46         self.link = link
47         
48 # Entry = {"title":XXX, "content":XXX, "date":XXX, "link":XXX, images = [] }
49
50 class Feed:
51     def __init__(self, name, url):
52         self.titles = []
53         self.entries = {}
54         self.ids = []
55         self.readItems = {}
56         self.name = name
57         self.url = url
58         self.countUnread = 0
59         self.updateTime = "Never"
60
61     def editFeed(self, url):
62         self.url = url
63
64     def saveFeed(self, configdir):
65         if not isdir(configdir+getId(self.name)+".d"):
66              mkdir(configdir+getId(self.name)+".d")
67         file = open(configdir+getId(self.name)+".d/feed", "w")
68         pickle.dump(self, file )
69         file.close()
70         self.saveUnread(configdir)
71         
72     def saveUnread(self, configdir):
73         if not isdir(configdir+getId(self.name)+".d"):
74              mkdir(configdir+getId(self.name)+".d")
75         file = open(configdir+getId(self.name)+".d/unread", "w")
76         pickle.dump(self.readItems, file )
77         file.close()
78
79     def reloadUnread(self, configdir):
80         try:
81             file = open(configdir+getId(self.name)+".d/unread", "r")
82             self.readItems = pickle.load( file )
83             file.close()
84             self.countUnread = 0
85             for id in self.getIds():
86                if self.readItems[id]==False:
87                   self.countUnread = self.countUnread + 1
88         except:
89             pass
90         return self.countUnread
91
92     def updateFeed(self, configdir, expiryTime=24):
93         # Expiry time is in hours
94         tmp=feedparser.parse(self.url)
95         # Check if the parse was succesful (number of entries > 0, else do nothing)
96         if len(tmp["entries"])>0:
97            #reversedEntries = self.getEntries()
98            #reversedEntries.reverse()
99            tmpEntries = {}
100            tmpIds = []
101            for entry in tmp["entries"]:
102                (dateTuple, date) = self.extractDate(entry)
103                tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
104                             "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[] }
105                id = self.generateUniqueId(tmpEntry)
106                tmpEntries[id] = tmpEntry
107                tmpIds.append(id)               
108            for entryId in self.getIds():
109                currentTime = time.time()
110                expiry = float(expiryTime) * 3600.
111                articleTime = time.mktime(self.entries[entryId]["dateTuple"])
112                if currentTime - articleTime < expiry:
113                    if not id in tmpIds:
114                        tmpEntries[entryId] = self.entries[entryId]
115                        tmpIds.append(entryId)
116                    
117            self.entries = tmpEntries
118            self.ids = tmpIds
119            self.countUnread = 0
120            # Initialize the new articles to unread
121            tmpReadItems = self.readItems
122            self.readItems = {}
123            for id in self.getIds():
124                if not tmpReadItems.has_key(id):
125                    self.readItems[id] = False
126                else:
127                    self.readItems[id] = tmpReadItems[id]
128                if self.readItems[id]==False:
129                   self.countUnread = self.countUnread + 1
130            del tmp
131            self.updateTime = time.asctime()
132            self.saveFeed(configdir)
133
134     def extractContent(self, entry):
135         content = ""
136         if entry.has_key('summary'):
137             content = entry.get('summary', '')
138         if entry.has_key('content'):
139             if len(entry.content[0].value) > len(content):
140                 content = entry.content[0].value
141         if content == "":
142             content = entry.get('description', '')
143         return content
144         
145     def extractDate(self, entry):
146         if entry.has_key("updated_parsed"):
147             date1 = entry["updated_parsed"]
148             date = time.strftime("%a, %d %b %Y %H:%M:%S",entry["updated_parsed"])
149         elif entry.has_key("published_parsed"):
150             date1 = entry["published_parsed"]
151             date = time.strftime("%a, %d %b %Y %H:%M:%S", entry["published_parsed"])
152         else:
153             date1= ""
154             date = ""
155         return (date1, date)
156
157     def setEntryRead(self, id):
158         if self.readItems[id]==False:
159             self.countUnread = self.countUnread - 1
160             self.readItems[id] = True
161             
162     def setEntryUnread(self, id):
163         if self.readItems[id]==True:
164             self.countUnread = self.countUnread + 1
165             self.readItems[id] = False
166     
167     def isEntryRead(self, id):
168         return self.readItems[id]
169     
170     def getTitle(self, id):
171         return self.entries[id]["title"]
172     
173     def getLink(self, id):
174         return self.entries[id]["link"]
175     
176     def getDate(self, id):
177         return self.entries[id]["date"]
178  
179     def getUniqueId(self, index):
180         return self.ids[index]
181     
182     def generateUniqueId(self, entry):
183         return getId(entry["date"] + entry["title"])
184     
185     def getUpdateTime(self):
186         return self.updateTime
187     
188     def getEntries(self):
189         return self.entries
190     
191     def getIds(self):
192         return self.ids
193     
194     def getNextId(self, id):
195         return self.ids[(self.ids.index(id)+1) % self.getNumberOfEntries()]
196     
197     def getPreviousId(self, id):
198         return self.ids[(self.ids.index(id)-1) % self.getNumberOfEntries()]
199     
200     def getNumberOfUnreadItems(self):
201         return self.countUnread
202     
203     def getNumberOfEntries(self):
204         return len(self.ids)
205     
206     def getItem(self, id):
207         try:
208             return self.entries[id]
209         except:
210             return []
211     
212     def getContent(self, id):
213         return self.entries[id]["content"]
214     
215     def getArticle(self, id):
216         self.setEntryRead(id)
217         entry = self.entries[id]
218         title = entry['title']
219         #content = entry.get('content', entry.get('summary_detail', {}))
220         content = entry["content"]
221
222         link = entry['link']
223         date = entry["date"]
224
225         #text = '''<div style="color: black; background-color: white;">'''
226         text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
227         text += "<html><head><title>" + title + "</title>"
228         text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
229         text += '<style> body {-webkit-user-select: none;} </style></head>'
230         text += '<body><div><a href=\"' + link + '\">' + title + "</a>"
231         text += "<BR /><small><i>Date: " + date + "</i></small></div>"
232         text += "<BR /><BR />"
233         text += content
234         text += "</body></html>"
235         return text
236         
237 class ArchivedArticles(Feed):
238     def addArchivedArticle(self, title, link, updated_parsed, configdir):
239         entry = {}
240         entry["title"] = title
241         entry["link"] = link
242         entry["downloaded"] = False
243         entry["summary"] = '<a href=\"' + link + '\">' + title + "</a>"
244         entry["updated_parsed"] = updated_parsed
245         entry["time"] = time.time()
246         self.entries.append(entry)
247         self.readItems[self.getUniqueId(len(self.entries)-1)] = False
248         self.countUnread = self.countUnread + 1
249         self.saveFeed(configdir)
250         #print entry
251         
252     def updateFeed(self, configdir, expiryTime=24):
253         index = 0
254         for entry in self.getEntries():
255             if not entry["downloaded"]:
256                 try:
257                     f = urllib2.urlopen(entry["link"])
258                     entry["summary"] = f.read()
259                     f.close()
260                     if len(entry["summary"]) > 0:
261                         entry["downloaded"] = True
262                         entry["time"] = time.time()
263                         self.setEntryUnread(index)
264                 except:
265                     pass
266             currentTime = time.time()
267             expiry = float(expiryTime) * 3600
268             if currentTime - entry["time"] > expiry:
269                 self.entries.remove(entry)
270             index += 1
271         self.updateTime = time.asctime()
272         self.saveFeed(configdir)
273
274     def getArticle(self, index):
275         self.setEntryRead(index)
276         content = self.getContent(index)
277         return content
278
279
280 class Listing:
281     # Lists all the feeds in a dictionary, and expose the data
282     def __init__(self, configdir):
283         self.configdir = configdir
284         #self.feeds = {}
285         if isfile(self.configdir+"feeds.pickle"):
286             file = open(self.configdir+"feeds.pickle")
287             self.listOfFeeds = pickle.load(file)
288             file.close()
289         else:
290             self.listOfFeeds = {getId("Slashdot"):{"title":"Slashdot", "url":"http://rss.slashdot.org/Slashdot/slashdot", "unread":0, "updateTime":"Never"}, }
291         if self.listOfFeeds.has_key("font"):
292             del self.listOfFeeds["font"]
293         if self.listOfFeeds.has_key("feedingit-order"):
294             self.sortedKeys = self.listOfFeeds["feedingit-order"]
295         else:
296             self.sortedKeys = self.listOfFeeds.keys()
297             if "font" in self.sortedKeys:
298                 self.sortedKeys.remove("font")
299             self.sortedKeys.sort(key=lambda obj: self.getFeedTitle(obj))
300         list = self.sortedKeys[:]
301         #for key in list:
302         #    try:
303         #        self.loadFeed(key)
304         #    except:
305                 #import traceback
306                 #if key.startswith('d8'):
307                 #traceback.print_exc()
308         #        self.sortedKeys.remove(key)
309             #print key
310                 #print key in self.sortedKeys
311         #print "d8eb3f07572892a7b5ed9c81c5bb21a2" in self.sortedKeys
312         #print self.listOfFeeds["d8eb3f07572892a7b5ed9c81c5bb21a2"]
313         self.closeCurrentlyDisplayedFeed()
314         #self.saveConfig()
315
316     def addArchivedArticle(self, key, index):
317         title = self.getFeed(key).getTitle(index)
318         link = self.getFeed(key).getLink(index)
319         date = self.getFeed(key).getDate(index)
320         if not self.listOfFeeds.has_key(getId("Archived Articles")):
321             self.listOfFeeds[getId("Archived Articles")] = {"title":"Archived Articles", "url":""}
322             self.sortedKeys.append(getId("Archived Articles"))
323             self.feeds[getId("Archived Articles")] = ArchivedArticles("Archived Articles", "")
324             self.saveConfig()
325             
326         self.getFeed(getId("Archived Articles")).addArchivedArticle(title, link, date, self.configdir)
327         
328     def loadFeed(self, key):
329             if isfile(self.configdir+key+".d/feed"):
330                 file = open(self.configdir+key+".d/feed")
331                 feed = pickle.load(file)
332                 file.close()
333                 #feed.reloadUnread(self.configdir)
334             else:
335                 #print key
336                 title = self.listOfFeeds[key]["title"]
337                 url = self.listOfFeeds[key]["url"]
338                 feed = Feed(title, url)
339             return feed
340         
341     def updateFeeds(self, expiryTime=24):
342         for key in self.getListOfFeeds():
343             feed = self.loadFeed(key)
344             feed.updateFeed(self.configdir, expiryTime)
345             self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
346             self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
347             
348     def updateFeed(self, key, expiryTime=24):
349         feed = self.loadFeed(key)
350         feed.updateFeed(self.configdir, expiryTime)
351         self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
352         self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
353         
354     def editFeed(self, key, title, url):
355         self.listOfFeeds[key]["title"] = title
356         self.listOfFeeds[key]["url"] = url
357         feed = self.loadFeed(key)
358         feed.editFeed(url)
359
360     def getFeed(self, key):
361         feed = self.loadFeed(key)
362         feed.reloadUnread(self.configdir)
363         return feed
364     
365     def getFeedUpdateTime(self, key):
366         #print self.listOfFeeds.has_key(key)
367         if not self.listOfFeeds[key].has_key("updateTime"):
368             self.listOfFeeds[key]["updateTime"] = "Never"
369         return self.listOfFeeds[key]["updateTime"]
370     
371     def getFeedNumberOfUnreadItems(self, key):
372         if not self.listOfFeeds[key].has_key("unread"):
373             self.listOfFeeds[key]["unread"] = 0
374         return self.listOfFeeds[key]["unread"]
375
376     def updateUnread(self, key, unreadItems):
377         self.listOfFeeds[key]["unread"] = unreadItems
378    
379     def getFeedTitle(self, key):
380         return self.listOfFeeds[key]["title"]
381     
382     def getFeedUrl(self, key):
383         return self.listOfFeeds[key]["url"]
384     
385     def getListOfFeeds(self):
386         return self.sortedKeys
387     
388     #def getNumberOfUnreadItems(self, key):
389     #    if self.listOfFeeds.has_key("unread"):
390     #       return self.listOfFeeds[key]["unread"]
391     #    else:
392     #       return 0
393     
394     def addFeed(self, title, url):
395         if not self.listOfFeeds.has_key(getId(title)):
396             self.listOfFeeds[getId(title)] = {"title":title, "url":url, "unread":0, "updateTime":"Never"}
397             self.sortedKeys.append(getId(title))
398             self.saveConfig()
399             #self.feeds[getId(title)] = Feed(title, url)
400             return True
401         else:
402             return False
403         
404     def removeFeed(self, key):
405         del self.listOfFeeds[key]
406         self.sortedKeys.remove(key)
407         #del self.feeds[key]
408         if isfile(self.configdir+key):
409            rmtree(self.configdir+key+".d/")
410         self.saveConfig()
411     
412     def saveConfig(self):
413         self.listOfFeeds["feedingit-order"] = self.sortedKeys
414         file = open(self.configdir+"feeds.pickle", "w")
415         pickle.dump(self.listOfFeeds, file)
416         file.close()
417         
418     def moveUp(self, key):
419         index = self.sortedKeys.index(key)
420         self.sortedKeys[index] = self.sortedKeys[index-1]
421         self.sortedKeys[index-1] = key
422         
423     def moveDown(self, key):
424         index = self.sortedKeys.index(key)
425         index2 = (index+1)%len(self.sortedKeys)
426         self.sortedKeys[index] = self.sortedKeys[index2]
427         self.sortedKeys[index2] = key
428         
429     def setCurrentlyDisplayedFeed(self, key):
430         self.currentlyDisplayedFeed = key
431     def closeCurrentlyDisplayedFeed(self):
432         self.currentlyDisplayedFeed = False
433     def getCurrentlyDisplayedFeed(self):
434         return self.currentlyDisplayedFeed
435     
436 if __name__ == "__main__":
437     listing = Listing('/home/user/.feedingit/')
438     list = listing.getListOfFeeds()[:]
439         #list.reverse()
440     for key in list:
441         if key.startswith('d8'):
442             print listing.getFeedUpdateTime(key)