Fix implementation of feed.getContent.

[feedingit] / src / rss_sqlite.py
diff --git a/src/rss_sqlite.py b/src/rss_sqlite.py

index d243933..9cbeb82 100644 (file)
--- a/src/rss_sqlite.py
+++ b/src/rss_sqlite.py
@@ -498,6 +498,22 @@ class Feed(BaseObject):
                         and current_version[0] == date):
                         logger.debug("ALREADY DOWNLOADED %s (%s)"
                                      % (entry["title"], entry["link"]))
+                       ## This article is already present in the feed listing. Update the "updated" time, so it doesn't expire 
+                       self.db.execute("UPDATE feed SET updated=? WHERE id=?;",(currentTime,id))
+                       try: 
+                           logger.debug("Updating already downloaded files for %s" %(id))
+                           filename = configdir+self.key+".d/"+id+".html"
+                           file = open(filename,"a")
+                           utime(filename, None)
+                           file.close()
+                           images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
+                           for image in images:
+                                file = open(image[0],"a")
+                                utime(image[0], None)
+                                file.close()
+                       except:
+                           logger.debug("Error in refreshing images for %s" % (id))
+                       self.db.commit()
                         continue                       
  
                     if current_version is not None:
@@ -803,7 +819,7 @@ class Feed(BaseObject):
          #text = '''<div style="color: black; background-color: white;">'''
          text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
          text += "<html><head><title>" + title + "</title>"
-        text += '<meta http-equiv="Content-Type" content="text/html; charset="UTF-8"/>\n'
+        text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
          #text += '<style> body {-webkit-user-select: none;} </style>'
          text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
          if author != None:
@@ -815,13 +831,18 @@ class Feed(BaseObject):
          return text
     
      def getContent(self, id):
-        contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
+        """
+        Return the content of the article with the specified ID.  If
+        the content is not available, returns None.
+        """
+        contentLink = self.getContentLink(id)
          try:
-            file = open(self.entries[id]["contentLink"])
-            content = file.read()
-            file.close()
-        except:
-            content = "Content unavailable"
+            with open(contentLink, 'r') as file:
+                content = file.read()
+        except Exception:
+            logger.exception("Failed get content for %s: reading %s failed",
+                             id, contentLink)
+            content = None
          return content
      
      def extractDate(self, entry):
@@ -871,33 +892,41 @@ class ArchivedArticles(Feed):
          self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
          self.db.commit()
  
-    def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
+    # Feed.UpdateFeed calls this function.
+    def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
          currentTime = 0
          rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
          for row in rows:
-            currentTime = time.time()
-            id = row[0]
-            link = row[1]
-            f = urllib2.urlopen(link)
-            #entry["content"] = f.read()
-            html = f.read()
-            f.close()
-            soup = BeautifulSoup(html)
-            images = soup('img')
-            baseurl = link
-            for img in images:
-                filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
-                img['src']=filename
-                self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
+            try:
+                currentTime = time.time()
+                id = row[0]
+                link = row[1]
+                f = urllib2.urlopen(link)
+                #entry["content"] = f.read()
+                html = f.read()
+                f.close()
+                soup = BeautifulSoup(html)
+                images = soup('img')
+                baseurl = link
+                for img in images:
+                    filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
+                    img['src']=filename
+                    self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
+                    self.db.commit()
+                contentLink = configdir+self.key+".d/"+id+".html"
+                file = open(contentLink, "w")
+                file.write(soup.prettify())
+                file.close()
+                
+                self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
                  self.db.commit()
-            contentLink = configdir+self.key+".d/"+id+".html"
-            file = open(contentLink, "w")
-            file.write(soup.prettify())
-            file.close()
-            
-            self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
-            self.db.commit()
-        return (currentTime, None, None)
+            except:
+                logger.error("Error updating Archived Article: %s %s"
+                             % (link,traceback.format_exc(),))
+
+        if postFeedUpdateFunc is not None:
+            postFeedUpdateFunc (self.key, currentTime, None, None, None,
+                                *postFeedUpdateFuncArgs)
      
      def purgeReadArticles(self):
          rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
@@ -968,7 +997,7 @@ class Listing(BaseObject):
          # state.
          try:
              updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
-            wc_init (self, True if updater else False)
+            wc_init(config, self, True if updater else False)
              if wc().available() and updater:
                  # The list of known streams.
                  streams = wc().streams_list ()
@@ -984,13 +1013,16 @@ class Listing(BaseObject):
                          logger.debug(
                              "Registering previously unknown channel: %s (%s)"
                              % (key, title,))
-                        # Use a default refresh interval of 6 hours.
-                        wc().stream_register (key, title, 6 * 60 * 60)
+                        wc().stream_register(
+                            key, title,
+                            self.config.getUpdateInterval() * 60 * 60)
                      else:
                          # Make sure the human readable name is up to date.
                          if wc()[key].human_readable_name != title:
                              wc()[key].human_readable_name = title
                          stream_ids.remove (key)
+                        wc()[key].freshness \
+                            = self.config.getUpdateInterval() * 60 * 60
                          
      
                  # Unregister any streams that are no longer subscribed to.
@@ -1285,7 +1317,7 @@ class Listing(BaseObject):
          if wc().available ():
              try:
                  del wc()[key]
-            except KeyError:
+            except KeyError, woodchuck.Error:
                  logger.debug("Removing unregistered feed %s failed" % (key,))
  
          rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]