refactoring Feed, step 1
[feedingit] / src / rss.py
1 #!/usr/bin/env python2.5
2
3
4 # Copyright (c) 2007-2008 INdT.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
9 #
10 #  This program is distributed in the hope that it will be useful,
11 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 #  GNU Lesser General Public License for more details.
14 #
15 #  You should have received a copy of the GNU Lesser General Public License
16 #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 #
18
19 # ============================================================================
20 # Name        : FeedingIt.py
21 # Author      : Yves Marcoz
22 # Version     : 0.5.0
23 # Description : Simple RSS Reader
24 # ============================================================================
25
26 from os.path import isfile
27 from os.path import isdir
28 from os import remove
29 import pickle
30 import md5
31 import feedparser
32 import time
33 import urllib2
34
35 #CONFIGDIR="/home/user/.feedingit/"
36
37 def getId(string):
38     return md5.new(string).hexdigest()
39
40 class Entry:
41     def __init__(self, title, content, date, link):
42         self.title = title
43         self.content = content
44         self.date = date
45         self.link = link
46         
47 # Entry = {"title":XXX, "content":XXX, "date":XXX, "link":XXX, images = [] }
48
49 class Feed:
50     def __init__(self, name, url):
51         self.titles = []
52         self.entries = []
53         self.readItems = {}
54         self.name = name
55         self.url = url
56         self.updateTime = "Never"
57
58     def editFeed(self, url):
59         self.url = url
60
61     def saveFeed(self, configdir):
62
63     def updateFeed(self, configdir, expiryTime=24):
64         # Expiry time is in hours
65         tmp=feedparser.parse(self.url)
66         # Check if the parse was succesful (number of entries > 0, else do nothing)
67         if len(tmp["entries"])>0:
68            #reversedEntries = self.getEntries()
69            #reversedEntries.reverse()
70            tmpIds = []
71            for entry in tmp["entries"]:
72                tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
73                             "date":self.extractDate(entry), "link":entry["link"], images = [] }
74                
75                tmpIds.append(self.getUniqueId(-1, entry))
76            for entry in self.getEntries():
77                currentTime = time.time()
78                expiry = float(expiryTime) * 3600.
79                if entry.has_key("updated_parsed"):
80                    articleTime = time.mktime(entry["updated_parsed"])
81                    if currentTime - articleTime < expiry:
82                        id = self.getUniqueId(-1, entry)
83                        if not id in tmpIds:
84                            tmp["entries"].append(entry)
85                    
86            self.entries = tmp["entries"]
87            self.countUnread = 0
88            # Initialize the new articles to unread
89            tmpReadItems = self.readItems
90            self.readItems = {}
91            for index in range(self.getNumberOfEntries()):
92                if not tmpReadItems.has_key(self.getUniqueId(index)):
93                    self.readItems[self.getUniqueId(index)] = False
94                else:
95                    self.readItems[self.getUniqueId(index)] = tmpReadItems[self.getUniqueId(index)]
96                if self.readItems[self.getUniqueId(index)]==False:
97                   self.countUnread = self.countUnread + 1
98            del tmp
99            self.updateTime = time.asctime()
100            self.saveFeed(configdir)
101
102     def extractContent(self, entry):
103         if entry.has_key('summary'):
104             content = entry.get('summary', '')
105         if entry.has_key('content'):
106             if len(entry.content[0].value) > len(content):
107                 content = entry.content[0].value
108         if content == "":
109             content = entry.get('description', '')
110         return content
111         
112     def extractDate(self, entry):
113         if entry.has_key("updated_parsed"):
114             date = time.strftime("%a, %d %b %Y %H:%M:%S",entry["updated_parsed"])
115         elif entry.has_key("published_parsed"):
116             date = time.strftime("%a, %d %b %Y %H:%M:%S", entry["published_parsed"])
117         else:
118             date = ""
119         return date
120
121     def setEntryRead(self, index):
122         if self.readItems[self.getUniqueId(index)]==False:
123             self.countUnread = self.countUnread - 1
124             self.readItems[self.getUniqueId(index)] = True
125             
126     def setEntryUnread(self, index):
127         if self.readItems[self.getUniqueId(index)]==True:
128             self.countUnread = self.countUnread + 1
129             self.readItems[self.getUniqueId(index)] = False
130     
131     def isEntryRead(self, index):
132         return self.readItems[self.getUniqueId(index)]
133     
134     def getTitle(self, index):
135         return self.entries[index]["title"]
136     
137     def getLink(self, index):
138         return self.entries[index]["link"]
139     
140     def getDate(self, index):
141  
142     def getUniqueId(self, index, entry=None):
143         if index >=0:
144             entry = self.entries[index]
145         if entry.has_key("updated_parsed"):
146             return getId(time.strftime("%a, %d %b %Y %H:%M:%S",entry["updated_parsed"]) + entry["title"])
147         elif entry.has_key("link"):
148             return getId(entry["link"] + entry["title"])
149         else:
150             return getId(entry["title"])
151     
152     def getUpdateTime(self):
153         return self.updateTime
154     
155     def getEntries(self):
156         try:
157             return self.entries
158         except:
159             return []
160     
161     def getNumberOfUnreadItems(self):
162         return self.countUnread
163     
164     def getNumberOfEntries(self):
165         return len(self.entries)
166     
167     def getItem(self, index):
168         try:
169             return self.entries[index]
170         except:
171             return []
172     
173     def getContent(self, index):
174         content = ""
175         entry = self.entries[index]
176         if entry.has_key('summary'):
177             content = entry.get('summary', '')
178         if entry.has_key('content'):
179             if len(entry.content[0].value) > len(content):
180                 content = entry.content[0].value
181         if content == "":
182             content = entry.get('description', '')
183         return content
184     
185     def getArticle(self, index):
186         
187         
188
189 class FeedX:
190     # Contains all the info about a single feed (articles, ...), and expose the data
191     def __init__(self, name, url):
192         self.entries = []
193         self.readItems = {}
194         self.countUnread = 0
195         self.name = name
196         self.url = url
197         self.updateTime = "Never"
198
199     def editFeed(self, url):
200         self.url = url
201
202     def saveFeed(self, configdir):
203         file = open(configdir+getId(self.name), "w")
204         pickle.dump(self, file )
205         file.close()
206
207     def updateFeed(self, configdir, expiryTime=24):
208         # Expiry time is in hours
209         tmp=feedparser.parse(self.url)
210         # Check if the parse was succesful (number of entries > 0, else do nothing)
211         if len(tmp["entries"])>0:
212            #reversedEntries = self.getEntries()
213            #reversedEntries.reverse()
214            tmpIds = []
215            for entry in tmp["entries"]:
216                tmpIds.append(self.getUniqueId(-1, entry))
217            for entry in self.getEntries():
218                currentTime = time.time()
219                expiry = float(expiryTime) * 3600.
220                if entry.has_key("updated_parsed"):
221                    articleTime = time.mktime(entry["updated_parsed"])
222                    if currentTime - articleTime < expiry:
223                        id = self.getUniqueId(-1, entry)
224                        if not id in tmpIds:
225                            tmp["entries"].append(entry)
226                    
227            self.entries = tmp["entries"]
228            self.countUnread = 0
229            # Initialize the new articles to unread
230            tmpReadItems = self.readItems
231            self.readItems = {}
232            for index in range(self.getNumberOfEntries()):
233                if not tmpReadItems.has_key(self.getUniqueId(index)):
234                    self.readItems[self.getUniqueId(index)] = False
235                else:
236                    self.readItems[self.getUniqueId(index)] = tmpReadItems[self.getUniqueId(index)]
237                if self.readItems[self.getUniqueId(index)]==False:
238                   self.countUnread = self.countUnread + 1
239            del tmp
240            self.updateTime = time.asctime()
241            self.saveFeed(configdir)
242     
243     def setEntryRead(self, index):
244         if self.readItems[self.getUniqueId(index)]==False:
245             self.countUnread = self.countUnread - 1
246             self.readItems[self.getUniqueId(index)] = True
247             
248     def setEntryUnread(self, index):
249         if self.readItems[self.getUniqueId(index)]==True:
250             self.countUnread = self.countUnread + 1
251             self.readItems[self.getUniqueId(index)] = False
252     
253     def isEntryRead(self, index):
254         return self.readItems[self.getUniqueId(index)]
255     
256     def getTitle(self, index):
257         return self.entries[index]["title"]
258     
259     def getLink(self, index):
260         return self.entries[index]["link"]
261     
262     def getDate(self, index):
263         try:
264             return self.entries[index]["updated_parsed"]
265         except:
266             return time.localtime()
267     
268     def getUniqueId(self, index, entry=None):
269         if index >=0:
270             entry = self.entries[index]
271         if entry.has_key("updated_parsed"):
272             return getId(time.strftime("%a, %d %b %Y %H:%M:%S",entry["updated_parsed"]) + entry["title"])
273         elif entry.has_key("link"):
274             return getId(entry["link"] + entry["title"])
275         else:
276             return getId(entry["title"])
277     
278     def getUpdateTime(self):
279         return self.updateTime
280     
281     def getEntries(self):
282         try:
283             return self.entries
284         except:
285             return []
286     
287     def getNumberOfUnreadItems(self):
288         return self.countUnread
289     
290     def getNumberOfEntries(self):
291         return len(self.entries)
292     
293     def getItem(self, index):
294         try:
295             return self.entries[index]
296         except:
297             return []
298     
299     def getContent(self, index):
300         content = ""
301         entry = self.entries[index]
302         if entry.has_key('summary'):
303             content = entry.get('summary', '')
304         if entry.has_key('content'):
305             if len(entry.content[0].value) > len(content):
306                 content = entry.content[0].value
307         if content == "":
308             content = entry.get('description', '')
309         return content
310     
311     def getArticle(self, index):
312         self.setEntryRead(index)
313         entry = self.entries[index]
314         title = entry.get('title', 'No title')
315         #content = entry.get('content', entry.get('summary_detail', {}))
316         content = self.getContent(index)
317
318         link = entry.get('link', 'NoLink')
319         if entry.has_key("updated_parsed"):
320             date = time.strftime("%a, %d %b %Y %H:%M:%S",entry["updated_parsed"])
321         elif entry.has_key("published_parsed"):
322             date = time.strftime("%a, %d %b %Y %H:%M:%S", entry["published_parsed"])
323         else:
324             date = ""
325         #text = '''<div style="color: black; background-color: white;">'''
326         text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
327         text += "<html><head><title>" + title + "</title>"
328         text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
329         text += '<style> body {-webkit-user-select: none;} </style></head>'
330         text += '<body><div><a href=\"' + link + '\">' + title + "</a>"
331         text += "<BR /><small><i>Date: " + date + "</i></small></div>"
332         text += "<BR /><BR />"
333         text += content
334         text += "</body></html>"
335         return text
336
337 class ArchivedArticles(Feed):
338     def addArchivedArticle(self, title, link, updated_parsed, configdir):
339         entry = {}
340         entry["title"] = title
341         entry["link"] = link
342         entry["downloaded"] = False
343         entry["summary"] = '<a href=\"' + link + '\">' + title + "</a>"
344         entry["updated_parsed"] = updated_parsed
345         entry["time"] = time.time()
346         self.entries.append(entry)
347         self.readItems[self.getUniqueId(len(self.entries)-1)] = False
348         self.countUnread = self.countUnread + 1
349         self.saveFeed(configdir)
350         #print entry
351         
352     def updateFeed(self, configdir, expiryTime=24):
353         index = 0
354         for entry in self.getEntries():
355             if not entry["downloaded"]:
356                 try:
357                     f = urllib2.urlopen(entry["link"])
358                     entry["summary"] = f.read()
359                     f.close()
360                     if len(entry["summary"]) > 0:
361                         entry["downloaded"] = True
362                         entry["time"] = time.time()
363                         self.setEntryUnread(index)
364                 except:
365                     pass
366             currentTime = time.time()
367             expiry = float(expiryTime) * 3600
368             if currentTime - entry["time"] > expiry:
369                 self.entries.remove(entry)
370             index += 1
371         self.updateTime = time.asctime()
372         self.saveFeed(configdir)
373
374     def getArticle(self, index):
375         self.setEntryRead(index)
376         content = self.getContent(index)
377         return content
378
379
380 class Listing:
381     # Lists all the feeds in a dictionary, and expose the data
382     def __init__(self, configdir):
383         self.configdir = configdir
384         self.feeds = {}
385         if isfile(self.configdir+"feeds.pickle"):
386             file = open(self.configdir+"feeds.pickle")
387             self.listOfFeeds = pickle.load(file)
388             file.close()
389         else:
390             self.listOfFeeds = {getId("Slashdot"):{"title":"Slashdot", "url":"http://rss.slashdot.org/Slashdot/slashdot"}, }
391         if self.listOfFeeds.has_key("font"):
392             del self.listOfFeeds["font"]
393         if self.listOfFeeds.has_key("feedingit-order"):
394             self.sortedKeys = self.listOfFeeds["feedingit-order"]
395         else:
396             self.sortedKeys = self.listOfFeeds.keys()
397             if "font" in self.sortedKeys:
398                 self.sortedKeys.remove("font")
399             self.sortedKeys.sort(key=lambda obj: self.getFeedTitle(obj))
400         list = self.sortedKeys[:]
401         for key in list:
402             try:
403                 self.loadFeed(key)
404             except:
405                 #import traceback
406                 #if key.startswith('d8'):
407                 #traceback.print_exc()
408                 self.sortedKeys.remove(key)
409             #print key
410                 #print key in self.sortedKeys
411         #print "d8eb3f07572892a7b5ed9c81c5bb21a2" in self.sortedKeys
412         #print self.listOfFeeds["d8eb3f07572892a7b5ed9c81c5bb21a2"]
413         self.closeCurrentlyDisplayedFeed()
414         #self.saveConfig()
415
416     def addArchivedArticle(self, key, index):
417         title = self.getFeed(key).getTitle(index)
418         link = self.getFeed(key).getLink(index)
419         date = self.getFeed(key).getDate(index)
420         if not self.listOfFeeds.has_key(getId("Archived Articles")):
421             self.listOfFeeds[getId("Archived Articles")] = {"title":"Archived Articles", "url":""}
422             self.sortedKeys.append(getId("Archived Articles"))
423             self.feeds[getId("Archived Articles")] = ArchivedArticles("Archived Articles", "")
424             self.saveConfig()
425             
426         self.getFeed(getId("Archived Articles")).addArchivedArticle(title, link, date, self.configdir)
427         
428     def loadFeed(self, key):
429             if isfile(self.configdir+key):
430                 file = open(self.configdir+key)
431                 self.feeds[key] = pickle.load(file)
432                 file.close()
433             else:
434                 #print key
435                 title = self.listOfFeeds[key]["title"]
436                 url = self.listOfFeeds[key]["url"]
437                 self.feeds[key] = Feed(title, url)
438         
439     def updateFeeds(self, expiryTime=24):
440         for key in self.getListOfFeeds():
441             self.feeds[key].updateFeed(self.configdir, expiryTime)
442             
443     def updateFeed(self, key, expiryTime=24):
444         self.feeds[key].updateFeed(self.configdir, expiryTime)
445         
446     def editFeed(self, key, title, url):
447         self.listOfFeeds[key]["title"] = title
448         self.listOfFeeds[key]["url"] = url
449         self.feeds[key].editFeed(url)
450             
451     def getFeed(self, key):
452         return self.feeds[key]
453     
454     def getFeedUpdateTime(self, key):
455         #print self.listOfFeeds.has_key(key)
456         return self.feeds[key].getUpdateTime()
457     
458     def getFeedNumberOfUnreadItems(self, key):
459         return self.feeds[key].getNumberOfUnreadItems()
460    
461     def getFeedTitle(self, key):
462         return self.listOfFeeds[key]["title"]
463     
464     def getFeedUrl(self, key):
465         return self.listOfFeeds[key]["url"]
466     
467     def getListOfFeeds(self):
468         return self.sortedKeys
469     
470     def addFeed(self, title, url):
471         if not self.listOfFeeds.has_key(getId(title)):
472             self.listOfFeeds[getId(title)] = {"title":title, "url":url}
473             self.sortedKeys.append(getId(title))
474             self.saveConfig()
475             self.feeds[getId(title)] = Feed(title, url)
476             return True
477         else:
478             return False
479         
480     def removeFeed(self, key):
481         del self.listOfFeeds[key]
482         self.sortedKeys.remove(key)
483         del self.feeds[key]
484         if isfile(self.configdir+key):
485            remove(self.configdir+key)
486         self.saveConfig()
487     
488     def saveConfig(self):
489         self.listOfFeeds["feedingit-order"] = self.sortedKeys
490         file = open(self.configdir+"feeds.pickle", "w")
491         pickle.dump(self.listOfFeeds, file)
492         file.close()
493         
494     def moveUp(self, key):
495         index = self.sortedKeys.index(key)
496         self.sortedKeys[index] = self.sortedKeys[index-1]
497         self.sortedKeys[index-1] = key
498         
499     def moveDown(self, key):
500         index = self.sortedKeys.index(key)
501         index2 = (index+1)%len(self.sortedKeys)
502         self.sortedKeys[index] = self.sortedKeys[index2]
503         self.sortedKeys[index2] = key
504         
505     def setCurrentlyDisplayedFeed(self, key):
506         self.currentlyDisplayedFeed = key
507     def closeCurrentlyDisplayedFeed(self):
508         self.currentlyDisplayedFeed = False
509     def getCurrentlyDisplayedFeed(self):
510         return self.currentlyDisplayedFeed
511     
512 if __name__ == "__main__":
513     listing = Listing('/home/user/.feedingit/')
514     list = listing.getListOfFeeds()[:]
515         #list.reverse()
516     for key in list:
517         if key.startswith('d8'):
518             print listing.getFeedUpdateTime(key)