Fix img detection.

[feedingit] / src / rss_sqlite.py
diff --git a/src/rss_sqlite.py b/src/rss_sqlite.py

index 990cc45..129f500 100644 (file)
--- a/src/rss_sqlite.py
+++ b/src/rss_sqlite.py
@@ -24,6 +24,8 @@
  # Description : Simple RSS Reader
  # ============================================================================
  
+from __future__ import with_statement
+
  import sqlite3
  from os.path import isfile, isdir
  from shutil import rmtree
@@ -36,20 +38,20 @@ import urllib2
  from BeautifulSoup import BeautifulSoup
  from urlparse import urljoin
  from calendar import timegm
-from updatedbus import get_lock, release_lock
  import threading
  import traceback
-from wc import wc, wc_init
-try:
-    import woodchuck
-except:
-    woodchuck = None
+from wc import wc, wc_init, woodchuck
+import subprocess
+import dbus
+from updatedbus import update_server_object
  
  from jobmanager import JobManager
  import mainthread
  from httpprogresshandler import HTTPProgressHandler
  import random
  import sys
+import logging
+logger = logging.getLogger(__name__)
  
  def getId(string):
      return md5.new(string).hexdigest()
@@ -61,17 +63,154 @@ def download_callback(connection):
  def downloader(progress_handler=None, proxy=None):
      openers = []
  
-    if progress_handler:
-        openers.append (progress_handler)
+    if progress_handler is not None:
+        openers.append(progress_handler)
      else:
          openers.append(HTTPProgressHandler(download_callback))
  
      if proxy:
-        openers.append (proxy)
+        openers.append(proxy)
+
+    return urllib2.build_opener(*openers)
+
+def transfer_stats(sent, received, **kwargs):
+    """
+    This function takes two arguments: sent is the number of bytes
+    sent so far, received is the number of bytes received.  The
+    function returns a continuation that you can call later.
+
+    The continuation takes the same two arguments.  It returns a tuple
+    of the number of bytes sent, the number of bytes received and the
+    time since the original function was invoked.
+    """
+    start_time = time.time()
+    start_sent = sent
+    start_received = received
+
+    def e(sent, received, **kwargs):
+        return (sent - start_sent,
+                received - start_received,
+                time.time() - start_time)
+
+    return e
+
+# If not None, a subprocess.Popen object corresponding to a
+# update_feeds.py process.
+update_feed_process = None
+
+update_feeds_iface = None
+
+jobs_at_start = 0
+
+class BaseObject(object):
+    # Columns to cache.  Classes that inherit from this and use the
+    # cache mechanism should set this to a list of tuples, each of
+    # which contains two entries: the table and the column.  Note that
+    # both are case sensitive.
+    cached_columns = ()
+
+    def cache_invalidate(self, table=None):
+        """
+        Invalidate the cache.
+
+        If table is not None, invalidate only the specified table.
+        Otherwise, drop the whole cache.
+        """
+        if not hasattr(self, 'cache'):
+            return
+
+        if table is None:
+            del self.cache
+        else:
+            if table in self.cache:
+                del self.cache[table]
+
+    def lookup(self, table, column, id=None):
+        """
+        Look up a column or value.  Uses a cache for columns in
+        cached_columns.  Note: the column is returned unsorted.
+        """
+        if not hasattr(self, 'cache'):
+            self.cache = {}
+
+        # Cache data for at most 60 seconds.
+        now = time.time()
+        try:
+            cache = self.cache[table]
+
+            if time.time() - cache[None] > 60:
+                # logger.debug("%s: Cache too old: clearing" % (table,))
+                del self.cache[table]
+                cache = None
+        except KeyError:
+            cache = None
+
+        if (cache is None
+            or (table, column) not in self.cached_columns):
+            # The cache is empty or the caller wants a column that we
+            # don't cache.
+            if (table, column) in self.cached_columns:
+                # logger.debug("%s: Rebuilding cache" % (table,))
+
+                do_cache = True
+
+                self.cache[table] = cache = {}
+                columns = []
+                for t, c in self.cached_columns:
+                    if table == t:
+                        cache[c] = {}
+                        columns.append(c)
+
+                columns.append('id')
+                where = ""
+            else:
+                do_cache = False
+
+                columns = (colums,)
+                if id is not None:
+                    where = "where id = '%s'" % id
+                else:
+                    where = ""
+
+            results = self.db.execute(
+                "SELECT %s FROM %s %s" % (','.join(columns), table, where))
+
+            if do_cache:
+                for r in results:
+                    values = list(r)
+                    i = values.pop()
+                    for index, value in enumerate(values):
+                        cache[columns[index]][i] = value
+
+                cache[None] = now
+            else:
+                results = []
+                for r in results:
+                    if id is not None:
+                        return values[0]
+
+                    results.append(values[0])
+
+                return results
+        else:
+            cache = self.cache[table]
+
+        try:
+            if id is not None:
+                value = cache[column][id]
+                # logger.debug("%s.%s:%s -> %s" % (table, column, id, value))
+                return value
+            else:
+                return cache[column].values()
+        except KeyError:
+            # logger.debug("%s.%s:%s -> Not found" % (table, column, id))
+            return None
  
-    return urllib2.build_opener (*openers)
+class Feed(BaseObject):
+    # Columns to cache.
+    cached_columns = (('feed', 'read'),
+                      ('feed', 'title'))
  
-class Feed:
      serial_execution_lock = threading.Lock()
  
      def _getdb(self):
@@ -105,26 +244,27 @@ class Feed:
  
                  abs_url = urljoin(baseurl,url)
                  f = opener.open(abs_url)
-                outf = open(filename, "w")
-                outf.write(f.read())
-                f.close()
-                outf.close()
+                try:
+                    with open(filename, "w") as outf:
+                        for data in f:
+                            outf.write(data)
+                finally:
+                    f.close()
              except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
-                print ("Could not download image %s: %s"
-                       % (abs_url, str (exception)))
+                logger.info("Could not download image %s: %s"
+                            % (abs_url, str (exception)))
                  return None
              except:
                  exception = sys.exc_info()[0]
  
-                print "Downloading image: %s" % abs_url
-                traceback.print_exc()
-
+                logger.info("Downloading image %s: %s" %
+                            (abs_url, traceback.format_exc()))
                  try:
                      remove(filename)
                  except OSError:
                      pass
  
-                raise exception
+                return None
          else:
              #open(filename,"a").close()  # "Touch" the file
              file = open(filename,"a")
@@ -133,23 +273,64 @@ class Feed:
          return filename
  
      def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
-        def doit():
-            def it():
-                self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
-            return it
-        JobManager().execute(doit(), self.key, priority=priority)
+        if (os.path.basename(sys.argv[0]) == 'update_feeds.py'):
+            def doit():
+                def it():
+                    self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
+                return it
+            JobManager().execute(doit(), self.key, priority=priority)
+        else:
+            def send_update_request():
+                global update_feeds_iface
+                if update_feeds_iface is None:
+                    bus=dbus.SessionBus()
+                    remote_object = bus.get_object(
+                        "org.marcoz.feedingit", # Connection name
+                        "/org/marcoz/feedingit/update" # Object's path
+                        )
+                    update_feeds_iface = dbus.Interface(
+                        remote_object, 'org.marcoz.feedingit')
+
+                try:
+                    update_feeds_iface.Update(self.key)
+                except Exception, e:
+                    logger.error("Invoking org.marcoz.feedingit.Update: %s"
+                                 % str(e))
+                    update_feeds_iface = None
+                else:
+                    return True
+
+            if send_update_request():
+                # Success!  It seems we were able to start the update
+                # daemon via dbus (or, it was already running).
+                return
+
+            global update_feed_process
+            if (update_feed_process is None
+                or update_feed_process.poll() is not None):
+                # The update_feeds process is not running.  Start it.
+                update_feeds = os.path.join(os.path.dirname(__file__),
+                                            'update_feeds.py')
+                argv = ['/usr/bin/env', 'python', update_feeds, '--daemon' ]
+                logger.debug("Starting update_feeds: running %s"
+                             % (str(argv),))
+                update_feed_process = subprocess.Popen(argv)
+                # Make sure the dbus calls go to the right process:
+                # rebind.
+                update_feeds_iface = None
+
+            for _ in xrange(5):
+                if send_update_request():
+                    break
+                time.sleep(1)
  
      def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
+        logger.debug("Updating %s" % url)
+
          success = False
          have_serial_execution_lock = False
          try:
-            update_lock = None
-            update_lock = get_lock("key")
-            if not update_lock:
-                # Someone else is doing an update.
-                return
-
-            download_start = time.time ()
+            update_start = time.time ()
  
              progress_handler = HTTPProgressHandler(download_callback)
  
@@ -158,9 +339,11 @@ class Feed:
                  openers.append (proxy)
              kwargs = {'handlers':openers}
              
+            feed_transfer_stats = transfer_stats(0, 0)
+
              tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
-            download_duration = time.time () - download_start
-    
+            download_duration = time.time () - update_start
+
              opener = downloader(progress_handler, proxy)
  
              if JobManager().do_quit:
@@ -172,8 +355,9 @@ class Feed:
              expiry = float(expiryTime) * 3600.
      
              currentTime = 0
-    
-            have_woodchuck = mainthread.execute (wc().available)
+            
+            updated_objects = 0
+            new_objects = 0
  
              def wc_success():
                  try:
@@ -186,13 +370,15 @@ class Feed:
                                     |woodchuck.Indicator.StreamWide),
                          transferred_down=progress_handler.stats['received'],
                          transferred_up=progress_handler.stats['sent'],
-                        transfer_time=download_start,
+                        transfer_time=update_start,
                          transfer_duration=download_duration,
-                        new_objects=len (tmp.entries),
-                        objects_inline=len (tmp.entries))
+                        new_objects=new_objects,
+                        updated_objects=updated_objects,
+                        objects_inline=new_objects + updated_objects)
                  except KeyError:
-                    print "Failed to register update with woodchuck!"
-                    pass
+                    logger.warn(
+                        "Failed to register update of %s with woodchuck!"
+                        % (self.key))
      
              http_status = tmp.get ('status', 200)
      
@@ -203,20 +389,21 @@ class Feed:
              # parse fails.  But really, everything went great!  Check for
              # this first.
              if http_status == 304:
-                print "%s: No changes to feed." % (self.key,)
-                mainthread.execute (wc_success, async=True)
+                logger.debug("%s: No changes to feed." % (self.key,))
+                mainthread.execute(wc_success, async=True)
                  success = True
-            elif len(tmp["entries"])==0 and not tmp.version:
+            elif len(tmp["entries"])==0 and not tmp.get('version', None):
                  # An error occured fetching or parsing the feed.  (Version
                  # will be either None if e.g. the connection timed our or
                  # '' if the data is not a proper feed)
-                print ("Error fetching %s: version is: %s: error: %s"
-                       % (url, str (tmp.version),
-                          str (tmp.get ('bozo_exception', 'Unknown error'))))
-                print tmp
-                if have_woodchuck:
-                    def e():
-                        print "%s: stream update failed!" % self.key
+                logger.error(
+                    "Error fetching %s: version is: %s: error: %s"
+                    % (url, str (tmp.get('version', 'unset')),
+                       str (tmp.get ('bozo_exception', 'Unknown error'))))
+                logger.debug(tmp)
+                def register_stream_update_failed(http_status):
+                    def doit():
+                        logger.debug("%s: stream update failed!" % self.key)
      
                          try:
                              # It's not easy to get the feed's title from here.
@@ -233,7 +420,12 @@ class Feed:
                          if 500 <= http_status and http_status < 600:
                              ec = woodchuck.TransferStatus.TransientNetwork
                          wc()[self.key].update_failed(ec)
-                    mainthread.execute (e, async=True)
+                    return doit
+                if wc().available:
+                    mainthread.execute(
+                        register_stream_update_failed(
+                            http_status=http_status),
+                        async=True)
              else:
                 currentTime = time.time()
                 # The etag and modified value should only be updated if the content was not null
@@ -255,8 +447,8 @@ class Feed:
                     outf.close()
                     del data
                 except (urllib2.HTTPError, urllib2.URLError), exception:
-                   print ("Could not download favicon %s: %s"
-                          % (abs_url, str (exception)))
+                   logger.debug("Could not download favicon %s: %s"
+                                % (abs_url, str (exception)))
      
                 self.serial_execution_lock.acquire ()
                 have_serial_execution_lock = True
@@ -264,19 +456,18 @@ class Feed:
                 #reversedEntries = self.getEntries()
                 #reversedEntries.reverse()
      
-               ids = self.getIds()
-    
                 tmp["entries"].reverse()
                 for entry in tmp["entries"]:
                     # Yield so as to make the main thread a bit more
                     # responsive.
                     time.sleep(0)
      
+                   entry_transfer_stats = transfer_stats(
+                       *feed_transfer_stats(**progress_handler.stats)[0:2])
+
                     if JobManager().do_quit:
                         raise KeyboardInterrupt
  
-                   received_base = progress_handler.stats['received']
-                   sent_base = progress_handler.stats['sent']
                     object_size = 0
  
                     date = self.extractDate(entry)
@@ -296,22 +487,44 @@ class Feed:
                         entry["id"] = None
                     content = self.extractContent(entry)
                     object_size = len (content)
-                   received_base -= len (content)
                     tmpEntry = {"title":entry["title"], "content":content,
                                  "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
                     id = self.generateUniqueId(tmpEntry)
                     
+                   current_version = self.db.execute(
+                       'select date, ROWID from feed where id=?',
+                       (id,)).fetchone()
+                   if (current_version is not None
+                       and current_version[0] == date):
+                       logger.debug("ALREADY DOWNLOADED %s (%s)"
+                                    % (entry["title"], entry["link"]))
+                       continue                       
+
+                   if current_version is not None:
+                       # The version was updated.  Mark it as unread.
+                       logger.debug("UPDATED: %s (%s)"
+                                    % (entry["title"], entry["link"]))
+                       updated_objects += 1
+                   else:
+                       logger.debug("NEW: %s (%s)"
+                                    % (entry["title"], entry["link"]))
+                       new_objects += 1
+
                     #articleTime = time.mktime(self.entries[id]["dateTuple"])
                     soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
                     images = soup('img')
                     baseurl = tmpEntry["link"]
-                   #if not id in ids:
                     if imageCache and len(images) > 0:
                         self.serial_execution_lock.release ()
                         have_serial_execution_lock = False
                         for img in images:
-                            filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
-                            if filename:
+                           if not img.has_key('src'):
+                               continue
+
+                           filename = self.addImage(
+                               configdir, self.key, baseurl, img['src'],
+                               opener=opener)
+                           if filename:
                                  img['src']="file://%s" %filename
                                  count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
                                  if count == 0:
@@ -321,9 +534,8 @@ class Feed:
                                  try:
                                      object_size += os.path.getsize (filename)
                                  except os.error, exception:
-                                    print ("Error getting size of %s: %s"
-                                           % (filename, exception))
-                                    pass
+                                    logger.error ("Error getting size of %s: %s"
+                                                  % (filename, exception))
                         self.serial_execution_lock.acquire ()
                         have_serial_execution_lock = True
      
@@ -331,63 +543,84 @@ class Feed:
                     file = open(tmpEntry["contentLink"], "w")
                     file.write(soup.prettify())
                     file.close()
-                   if id in ids:
-                       self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
-                       self.db.commit()
-                   else:
-                       values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
-                       self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
-                       self.db.commit()
-#                   else:
-#                       try:
-#                           self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
-#                           self.db.commit()
-#                           filename = configdir+self.key+".d/"+id+".html"
-#                           file = open(filename,"a")
-#                           utime(filename, None)
-#                           file.close()
-#                           images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
-#                           for image in images:
-#                                file = open(image[0],"a")
-#                                utime(image[0], None)
-#                                file.close()
-#                       except:
-#                           pass
-    
+
+                   values = {'id': id,
+                             'title': tmpEntry["title"],
+                             'contentLink': tmpEntry["contentLink"],
+                             'date': tmpEntry["date"],
+                             'updated': currentTime,
+                             'link': tmpEntry["link"],
+                             'read': 0}
+
+                   if current_version is not None:
+                       # This is an update.  Ensure that the existing
+                       # entry is replaced.
+                       values['ROWID'] = current_version[1]
+
+                   cols, values = zip(*values.items())
+                   self.db.execute(
+                       "INSERT OR REPLACE INTO feed (%s) VALUES (%s);"
+                       % (','.join(cols), ','.join(('?',) * len(values))),
+                       values)
+                   self.db.commit()
+
                     # Register the object with Woodchuck and mark it as
                     # downloaded.
-                   if have_woodchuck:
-                       def e():
+                   def register_object_transferred(
+                           id, title, publication_time,
+                           sent, received, object_size):
+                       def doit():
+                           logger.debug("Registering transfer of object %s"
+                                        % title)
                             try:
                                 obj = wc()[self.key].object_register(
                                     object_identifier=id,
-                                   human_readable_name=tmpEntry["title"])
+                                   human_readable_name=title)
                             except woodchuck.ObjectExistsError:
                                 obj = wc()[self.key][id]
                             else:
-                               # If the entry does not contain a publication
-                               # time, the attribute won't exist.
-                               pubtime = entry.get ('date_parsed', None)
-                               if pubtime:
-                                   obj.publication_time = time.mktime (pubtime)
-        
-                               received = (progress_handler.stats['received']
-                                           - received_base)
-                               sent = progress_handler.stats['sent'] - sent_base
-                               obj.transferred (
-                                   indicator=(woodchuck.Indicator.ApplicationVisual
-                                              |woodchuck.Indicator.StreamWide),
+                               obj.publication_time = publication_time
+                               obj.transferred(
+                                   indicator=(
+                                       woodchuck.Indicator.ApplicationVisual
+                                       |woodchuck.Indicator.StreamWide),
                                     transferred_down=received,
                                     transferred_up=sent,
                                     object_size=object_size)
-                       mainthread.execute(e, async=True)
+                       return doit
+                   if wc().available:
+                       # If the entry does not contain a publication
+                       # time, the attribute won't exist.
+                       pubtime = entry.get('date_parsed', None)
+                       if pubtime:
+                           publication_time = time.mktime (pubtime)
+                       else:
+                           publication_time = None
+
+                       sent, received, _ \
+                           = entry_transfer_stats(**progress_handler.stats)
+                       # sent and received are for objects (in
+                       # particular, images) associated with this
+                       # item.  We also want to attribute the data
+                       # transferred for the item's content.  This is
+                       # a good first approximation.
+                       received += len(content)
+
+                       mainthread.execute(
+                           register_object_transferred(
+                               id=id,
+                               title=tmpEntry["title"],
+                               publication_time=publication_time,
+                               sent=sent, received=received,
+                               object_size=object_size),
+                           async=True)
                 self.db.commit()
  
-               print ("%s: Update successful: transferred: %d/%d; objects: %d)"
-                      % (self.key,
-                         progress_handler.stats['sent'],
-                         progress_handler.stats['received'],
-                         len (tmp.entries)))
+               sent, received, _ \
+                   = feed_transfer_stats(**progress_handler.stats)
+               logger.debug (
+                   "%s: Update successful: transferred: %d/%d; objects: %d)"
+                   % (url, sent, received, len (tmp.entries)))
                 mainthread.execute (wc_success, async=True)
                 success = True
  
@@ -419,30 +652,27 @@ class Feed:
                          #
                      except OSError, exception:
                          #
-                        print 'Could not remove %s: %s' % (file, str (exception))
-            print ("updated %s: %fs in download, %fs in processing"
-                   % (self.key, download_duration,
-                      time.time () - process_start))
+                        logger.error('Could not remove %s: %s'
+                                     % (file, str (exception)))
+            logger.debug("updated %s: %fs in download, %fs in processing"
+                         % (self.key, download_duration,
+                            time.time () - process_start))
          except:
-            print "Updating %s: %s" % (self.key, sys.exc_info()[0])
-            traceback.print_exc()
+            logger.error("Updating %s: %s" % (self.key, traceback.format_exc()))
          finally:
              self.db.commit ()
  
              if have_serial_execution_lock:
                  self.serial_execution_lock.release ()
  
-            if update_lock is not None:
-                release_lock (update_lock)
-
              updateTime = 0
              try:
                  rows = self.db.execute("SELECT MAX(date) FROM feed;")
                  for row in rows:
                      updateTime=row[0]
-            except:
-                print "Fetching update time."
-                traceback.print_exc()
+            except Exception, e:
+                logger.error("Fetching update time: %s: %s"
+                             % (str(e), traceback.format_exc()))
              finally:
                  if not success:
                      etag = None
@@ -456,31 +686,36 @@ class Feed:
                      postFeedUpdateFunc (self.key, updateTime, etag, modified,
                                          title, *postFeedUpdateFuncArgs)
  
+        self.cache_invalidate()
+
      def setEntryRead(self, id):
          self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
          self.db.commit()
  
-        def e():
-            if wc().available():
-                try:
-                    wc()[self.key][id].used()
-                except KeyError:
-                    pass
+        def doit():
+            try:
+                wc()[self.key][id].used()
+            except KeyError:
+                pass
+        if wc().available():
+            mainthread.execute(doit, async=True)
+        self.cache_invalidate('feed')
  
      def setEntryUnread(self, id):
          self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
          self.db.commit()     
+        self.cache_invalidate('feed')
          
      def markAllAsRead(self):
          self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
          self.db.commit()
+        self.cache_invalidate('feed')
  
      def isEntryRead(self, id):
-        read_status = self.db.execute("SELECT read FROM feed WHERE id=?;", (id,) ).fetchone()[0]
-        return read_status==1  # Returns True if read==1, and False if read==0
+        return self.lookup('feed', 'read', id) == 1
      
      def getTitle(self, id):
-        return self.db.execute("SELECT title FROM feed WHERE id=?;", (id,) ).fetchone()[0]
+        return self.lookup('feed', 'title', id)
      
      def getContentLink(self, id):
          return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
@@ -536,15 +771,17 @@ class Feed:
          #ids.reverse()
          return ids
      
-    def getNextId(self, id):
+    def getNextId(self, id, forward=True):
+        if forward:
+            delta = 1
+        else:
+            delta = -1
          ids = self.getIds()
          index = ids.index(id)
-        return ids[(index+1)%len(ids)]
+        return ids[(index + delta) % len(ids)]
          
      def getPreviousId(self, id):
-        ids = self.getIds()
-        index = ids.index(id)
-        return ids[(index-1)%len(ids)]
+        return self.getNextId(id, forward=False)
      
      def getNumberOfUnreadItems(self):
          return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
@@ -612,20 +849,20 @@ class Feed:
              try:
                  remove(contentLink)
              except OSError, exception:
-                print "Deleting %s: %s" % (contentLink, str (exception))
+                logger.error("Deleting %s: %s" % (contentLink, str (exception)))
          self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
          self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
          self.db.commit()
  
-        def e():
-            if wc().available():
-                try:
-                    wc()[self.key][id].files_deleted (
-                        woodchuck.DeletionResponse.Deleted)
-                    del wc()[self.key][id]
-                except KeyError:
-                    pass
-        mainthread.execute (e, async=True)
+        def doit():
+            try:
+                wc()[self.key][id].files_deleted (
+                    woodchuck.DeletionResponse.Deleted)
+                del wc()[self.key][id]
+            except KeyError:
+                pass
+        if wc().available():
+            mainthread.execute (doit, async=True)
   
  class ArchivedArticles(Feed):    
      def addArchivedArticle(self, title, link, date, configdir):
@@ -679,7 +916,13 @@ class ArchivedArticles(Feed):
                  pass
          self.removeEntry(id)
  
-class Listing:
+class Listing(BaseObject):
+    # Columns to cache.
+    cached_columns = (('feeds', 'updateTime'),
+                      ('feeds', 'unread'),
+                      ('feeds', 'title'),
+                      ('categories', 'title'))
+
      def _getdb(self):
          try:
              db = self.tls.db
@@ -723,34 +966,39 @@ class Listing:
  
          # Check that Woodchuck's state is up to date with respect our
          # state.
-        wc_init (self)
-        if wc().available():
-            # The list of known streams.
-            streams = wc().streams_list ()
-            stream_ids = [s.identifier for s in streams]
-
-            # Register any unknown streams.  Remove known streams from
-            # STREAMS_IDS.
-            for key in self.getListOfFeeds():
-                title = self.getFeedTitle(key)
-                # XXX: We should also check whether the list of
-                # articles/objects in each feed/stream is up to date.
-                if key not in stream_ids:
-                    print ("Registering previously unknown channel: %s (%s)"
-                           % (key, title,))
-                    # Use a default refresh interval of 6 hours.
-                    wc().stream_register (key, title, 6 * 60 * 60)
-                else:
-                    # Make sure the human readable name is up to date.
-                    if wc()[key].human_readable_name != title:
-                        wc()[key].human_readable_name = title
-                    stream_ids.remove (key)
-                    
-
-            # Unregister any streams that are no longer subscribed to.
-            for id in stream_ids:
-                print ("Unregistering %s" % (id,))
-                w.stream_unregister (id)
+        try:
+            updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
+            wc_init (self, True if updater else False)
+            if wc().available() and updater:
+                # The list of known streams.
+                streams = wc().streams_list ()
+                stream_ids = [s.identifier for s in streams]
+    
+                # Register any unknown streams.  Remove known streams from
+                # STREAMS_IDS.
+                for key in self.getListOfFeeds():
+                    title = self.getFeedTitle(key)
+                    # XXX: We should also check whether the list of
+                    # articles/objects in each feed/stream is up to date.
+                    if key not in stream_ids:
+                        logger.debug(
+                            "Registering previously unknown channel: %s (%s)"
+                            % (key, title,))
+                        # Use a default refresh interval of 6 hours.
+                        wc().stream_register (key, title, 6 * 60 * 60)
+                    else:
+                        # Make sure the human readable name is up to date.
+                        if wc()[key].human_readable_name != title:
+                            wc()[key].human_readable_name = title
+                        stream_ids.remove (key)
+                        
+    
+                # Unregister any streams that are no longer subscribed to.
+                for id in stream_ids:
+                    logger.debug("Unregistering %s" % (id,))
+                    w.stream_unregister (id)
+        except Exception:
+            logger.exception("Registering streams with Woodchuck")
  
      def importOldFormatFeeds(self):
          """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
@@ -789,7 +1037,8 @@ class Listing:
                              pass
                  self.updateUnread(id)
              except:
-                traceback.print_exc()
+                logger.error("importOldFormatFeeds: %s"
+                             % (traceback.format_exc(),))
          remove(self.configdir+"feeds.pickle")
                  
          
@@ -847,7 +1096,28 @@ class Listing:
              self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
                              (title, key))
          self.db.commit()
+        self.cache_invalidate('feeds')
          self.updateUnread(key)
+
+        update_server_object().ArticleCountUpdated()
+
+        stats = JobManager().stats()
+        global jobs_at_start
+        completed = stats['jobs-completed'] - jobs_at_start
+        in_progress = stats['jobs-in-progress']
+        queued = stats['jobs-queued']
+
+        try:
+            percent = (100 * ((completed + in_progress / 2.))
+                       / (completed + in_progress + queued))
+        except ZeroDivisionError:
+            percent = 100
+
+        update_server_object().UpdateProgress(
+            percent, completed, in_progress, queued, 0, 0, 0, key)
+
+        if in_progress == 0 and queued == 0:
+            jobs_at_start = stats['jobs-completed']
          
      def getFeed(self, key):
          if key == "ArchivedArticles":
@@ -860,25 +1130,57 @@ class Listing:
          else:
              self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
          self.db.commit()
+        self.cache_invalidate('feeds')
  
          if wc().available():
              try:
                  wc()[key].human_readable_name = title
              except KeyError:
-                print "Feed %s (%s) unknown." % (key, title)
-                pass
+                logger.debug("Feed %s (%s) unknown." % (key, title))
          
      def getFeedUpdateTime(self, key):
-        return time.ctime(self.db.execute("SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0])
+        update_time = self.lookup('feeds', 'updateTime', key)
+
+        if not update_time:
+            return "Never"
+
+        delta = time.time() - update_time
+
+        delta_hours = delta / (60. * 60.)
+        if delta_hours < .1:
+            return "A few minutes ago"
+        if delta_hours < .75:
+            return "Less than an hour ago"
+        if delta_hours < 1.5:
+            return "About an hour ago"
+        if delta_hours < 18:
+            return "About %d hours ago" % (int(delta_hours + 0.5),)
+
+        delta_days = delta_hours / 24.
+        if delta_days < 1.5:
+            return "About a day ago"
+        if delta_days < 18:
+            return "%d days ago" % (int(delta_days + 0.5),)
+
+        delta_weeks = delta_days / 7.
+        if delta_weeks <= 8:
+            return "%d weeks ago" % int(delta_weeks + 0.5)
+
+        delta_months = delta_days / 30.
+        if delta_months <= 30:
+            return "%d months ago" % int(delta_months + 0.5)
+
+        return time.strftime("%x", time.gmtime(update_time))
          
      def getFeedNumberOfUnreadItems(self, key):
-        return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
+        return self.lookup('feeds', 'unread', key)
          
      def getFeedTitle(self, key):
-        (title, url) = self.db.execute("SELECT title, url FROM feeds WHERE id=?;", (key,)).fetchone()
+        title = self.lookup('feeds', 'title', key)
          if title:
              return title
-        return url
+
+        return self.getFeedUrl(key)
          
      def getFeedUrl(self, key):
          return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
@@ -898,16 +1200,11 @@ class Listing:
          return keys
      
      def getListOfCategories(self):
-        rows = self.db.execute("SELECT id FROM categories ORDER BY rank;" )
-        keys = []
-        for row in rows:
-            if row[0]:
-                keys.append(row[0])
-        return keys
+        return list(row[0] for row in self.db.execute(
+                "SELECT id FROM categories ORDER BY rank;"))
      
      def getCategoryTitle(self, id):
-        row = self.db.execute("SELECT title FROM categories WHERE id=?;", (id, )).fetchone()
-        return row[0]
+        return self.lookup('categories', 'title', id)
      
      def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
          if   order == "Most unread":
@@ -947,6 +1244,7 @@ class Listing:
          feed = self.getFeed(key)
          self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
          self.db.commit()
+        self.cache_invalidate('feeds')
  
      def addFeed(self, title, url, id=None, category=1):
          if not id:
@@ -988,7 +1286,7 @@ class Listing:
              try:
                  del wc()[key]
              except KeyError:
-                print "Removing unregistered feed %s failed" % (key,)
+                logger.debug("Removing unregistered feed %s failed" % (key,))
  
          rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
          self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))