psa: remove old event feed items
[feedingit] / src / rss.py
index 539b71f..66149d4 100644 (file)
@@ -33,6 +33,8 @@ import time
 import urllib2
 from BeautifulSoup import BeautifulSoup
 from urlparse import urljoin
+import logging
+logger = logging.getLogger(__name__)
 
 #CONFIGDIR="/home/user/.feedingit/"
 
@@ -70,7 +72,10 @@ class Feed:
         self.url = url
         self.countUnread = 0
         self.updateTime = "Never"
+        self.updateStamp = 0
         self.uniqueId = uniqueId
+        self.etag = None
+        self.modified = None
 
     def addImage(self, configdir, key, baseurl, url):
         filename = configdir+key+".d/"+getId(url)
@@ -85,7 +90,7 @@ class Feed:
                 f.close()
                 outf.close()
             except:
-                print "Could not download " + url
+                logger.error("Could not download " + url)
         else:
             #open(filename,"a").close()  # "Touch" the file
             file = open(filename,"a")
@@ -127,12 +132,23 @@ class Feed:
     def updateFeed(self, configdir, expiryTime=24, proxy=None, imageCache=False):
         # Expiry time is in hours
         if proxy == None:
-            tmp=feedparser.parse(self.url)
+            tmp=feedparser.parse(self.url, etag = self.etag, modified = self.modified)
         else:
-            tmp=feedparser.parse(self.url, handlers = [proxy])
+            tmp=feedparser.parse(self.url, etag = self.etag, modified = self.modified, handlers = [proxy])
         expiry = float(expiryTime) * 3600.
+
         # Check if the parse was succesful (number of entries > 0, else do nothing)
         if len(tmp["entries"])>0:
+           # The etag and modified value should only be updated if the content was not null
+           try:
+               self.etag = tmp["etag"]
+           except KeyError:
+               self.etag = None
+           try:
+               self.modified = tmp["modified"]
+           except KeyError:
+               self.modified = None
+           #if len(tmp["entries"])>0:
            if not isdir(configdir+self.uniqueId+".d"):
                mkdir(configdir+self.uniqueId+".d")
            try:
@@ -144,8 +160,9 @@ class Feed:
                outf.close()
                del data
            except:
-                import traceback
-                traceback.print_exc()
+               #import traceback
+               #traceback.print_exc()
+                pass
 
 
            #reversedEntries = self.getEntries()
@@ -180,7 +197,7 @@ class Feed:
                             img['src']=filename
                             tmpEntry["images"].append(filename)
                           except:
-                              print "Error downloading image %s" % img
+                              logger.error("Error downloading image %s" % img)
                    tmpEntry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html"
                    file = open(tmpEntry["contentLink"], "w")
                    file.write(soup.prettify())
@@ -219,7 +236,7 @@ class Feed:
                         tmpEntries[entryId] = self.entries[entryId]
                         tmpIds.append(entryId)
                     except:
-                        print "Error purging old articles %s" % entryId
+                        logger.error("Error purging old articles %s" % entryId)
                         self.removeEntry(entryId)
 
            self.entries = tmpEntries
@@ -240,6 +257,7 @@ class Feed:
            del tmp
            self.countUnread = tmpUnread
            self.updateTime = time.asctime()
+           self.updateStamp = currentTime
            self.saveFeed(configdir)
            from glob import glob
            from os import stat
@@ -264,7 +282,7 @@ class Feed:
                         #
                     except OSError:
                         #
-                        print 'Could not remove', file
+                        logger.error('Could not remove %s' % file)
            
 
     def extractContent(self, entry):
@@ -302,7 +320,9 @@ class Feed:
             self.readItems[id] = False
     
     def isEntryRead(self, id):
-        return self.readItems[id]
+        # Check if an entry is read; return False if the read
+        # status of an entry is unknown (id not in readItems)
+        return self.readItems.get(id, False)
     
     def getTitle(self, id):
         return self.entries[id]["title"]
@@ -330,6 +350,13 @@ class Feed:
     def getUpdateTime(self):
         return self.updateTime
     
+    def getUpdateStamp(self):
+        try:
+            return self.updateStamp
+        except:
+            self.updateStamp = 0
+            return self.updateStamp
+
     def getEntries(self):
         return self.entries
     
@@ -353,6 +380,9 @@ class Feed:
             return self.entries[id]
         except:
             return []
+        
+    def getImages(self, id):
+        return self.entries[id]["images"]
     
     def getContent(self, id):
         if self.entries[id].has_key("contentLink"):
@@ -371,20 +401,21 @@ class Feed:
                 try:
                     remove(entry["contentLink"])  #os.remove
                 except:
-                    print "File not found for deletion: %s" % entry["contentLink"]
+                    logger.error("File not found for deletion: %s"
+                                 % entry["contentLink"])
             del self.entries[id]
         else:
-            print "Entries has no %s key" % id
+            logger.error("Entries has no %s key" % id)
         if id in self.ids:
             self.ids.remove(id)
         else:
-            print "Ids has no %s key" % id
+            logger.error("Ids has no %s key" % id)
         if self.readItems.has_key(id):
             if self.readItems[id]==False:
                 self.countUnread = self.countUnread - 1
             del self.readItems[id]
         else:
-            print "ReadItems has no %s key" % id
+            logger.error("ReadItems has no %s key" % id)
         #except:
         #    print "Error removing entry %s" %id
     
@@ -464,6 +495,7 @@ class ArchivedArticles(Feed):
             #        if currentTime - entry["time"] > 2*expiry:
             #            self.removeEntry(id)
         self.updateTime = time.asctime()
+        self.updateStamp = time.time()
         self.saveFeed(configdir)
         
     def purgeReadArticles(self):
@@ -492,7 +524,7 @@ class Listing:
             self.listOfFeeds = pickle.load(file)
             file.close()
         else:
-            self.listOfFeeds = {getId("Slashdot"):{"title":"Slashdot", "url":"http://rss.slashdot.org/Slashdot/slashdot", "unread":0, "updateTime":"Never"}, }
+            self.listOfFeeds = {getId("Maemo News"):{"title":"Maemo News", "url":"http://maemo.org/news/items.xml", "unread":0, "updateTime":"Never"}, }
         if self.listOfFeeds.has_key("font"):
             del self.listOfFeeds["font"]
         if self.listOfFeeds.has_key("feedingit-order"):
@@ -532,6 +564,14 @@ class Listing:
                     del feed.imageHandler
                 except:
                     pass
+                try:
+                    feed.etag
+                except AttributeError:
+                    feed.etag = None
+                try:
+                    feed.modified
+                except AttributeError:
+                    feed.modified = None
                 #feed.reloadUnread(self.configdir)
             else:
                 #print key
@@ -549,12 +589,14 @@ class Listing:
             feed.updateFeed(self.configdir, expiryTime, proxy, imageCache)
             self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
             self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
+            self.listOfFeeds[key]["updateStamp"] = feed.getUpdateStamp()
             
     def updateFeed(self, key, expiryTime=24, proxy=None, imageCache=False):
         feed = self.getFeed(key)
         feed.updateFeed(self.configdir, expiryTime, proxy, imageCache)
         self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
         self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
+        self.listOfFeeds[key]["updateStamp"] = feed.getUpdateStamp()
         
     def editFeed(self, key, title, url):
         self.listOfFeeds[key]["title"] = title
@@ -569,7 +611,7 @@ class Listing:
         except:
             # If the feed file gets corrupted, we need to reset the feed.
             import traceback
-            traceback.print_exc()
+            logger.error("getFeed: %s" % traceback.format_exc())
             import dbus
             bus = dbus.SessionBus()
             remote_object = bus.get_object("org.freedesktop.Notifications", # Connection name
@@ -588,6 +630,12 @@ class Listing:
             self.listOfFeeds[key]["updateTime"] = "Never"
         return self.listOfFeeds[key]["updateTime"]
     
+    def getFeedUpdateStamp(self, key):
+        #print self.listOfFeeds.has_key(key)
+        if not self.listOfFeeds[key].has_key("updateStamp"):
+            self.listOfFeeds[key]["updateStamp"] = 0
+        return self.listOfFeeds[key]["updateStamp"]
+
     def getFeedNumberOfUnreadItems(self, key):
         if not self.listOfFeeds[key].has_key("unread"):
             self.listOfFeeds[key]["unread"] = 0