Fix typo accessing woodchuck handle.
[feedingit] / src / rss_sqlite.py
index 6551d93..867e1af 100644 (file)
@@ -54,6 +54,9 @@ import logging
 logger = logging.getLogger(__name__)
 
 def getId(string):
+    if issubclass(string.__class__, unicode):
+        string = string.encode('utf8', 'replace')
+
     return md5.new(string).hexdigest()
 
 def download_callback(connection):
@@ -73,6 +76,27 @@ def downloader(progress_handler=None, proxy=None):
 
     return urllib2.build_opener(*openers)
 
+def transfer_stats(sent, received, **kwargs):
+    """
+    This function takes two arguments: sent is the number of bytes
+    sent so far, received is the number of bytes received.  The
+    function returns a continuation that you can call later.
+
+    The continuation takes the same two arguments.  It returns a tuple
+    of the number of bytes sent, the number of bytes received and the
+    time since the original function was invoked.
+    """
+    start_time = time.time()
+    start_sent = sent
+    start_received = received
+
+    def e(sent, received, **kwargs):
+        return (sent - start_sent,
+                received - start_received,
+                time.time() - start_time)
+
+    return e
+
 # If not None, a subprocess.Popen object corresponding to a
 # update_feeds.py process.
 update_feed_process = None
@@ -81,7 +105,115 @@ update_feeds_iface = None
 
 jobs_at_start = 0
 
-class Feed:
+class BaseObject(object):
+    # Columns to cache.  Classes that inherit from this and use the
+    # cache mechanism should set this to a list of tuples, each of
+    # which contains two entries: the table and the column.  Note that
+    # both are case sensitive.
+    cached_columns = ()
+
+    def cache_invalidate(self, table=None):
+        """
+        Invalidate the cache.
+
+        If table is not None, invalidate only the specified table.
+        Otherwise, drop the whole cache.
+        """
+        if not hasattr(self, 'cache'):
+            return
+
+        if table is None:
+            del self.cache
+        else:
+            if table in self.cache:
+                del self.cache[table]
+
+    def lookup(self, table, column, id=None):
+        """
+        Look up a column or value.  Uses a cache for columns in
+        cached_columns.  Note: the column is returned unsorted.
+        """
+        if not hasattr(self, 'cache'):
+            self.cache = {}
+
+        # Cache data for at most 60 seconds.
+        now = time.time()
+        try:
+            cache = self.cache[table]
+
+            if time.time() - cache[None] > 60:
+                # logger.debug("%s: Cache too old: clearing" % (table,))
+                del self.cache[table]
+                cache = None
+        except KeyError:
+            cache = None
+
+        if (cache is None
+            or (table, column) not in self.cached_columns):
+            # The cache is empty or the caller wants a column that we
+            # don't cache.
+            if (table, column) in self.cached_columns:
+                # logger.debug("%s: Rebuilding cache" % (table,))
+
+                do_cache = True
+
+                self.cache[table] = cache = {}
+                columns = []
+                for t, c in self.cached_columns:
+                    if table == t:
+                        cache[c] = {}
+                        columns.append(c)
+
+                columns.append('id')
+                where = ""
+            else:
+                do_cache = False
+
+                columns = (colums,)
+                if id is not None:
+                    where = "where id = '%s'" % id
+                else:
+                    where = ""
+
+            results = self.db.execute(
+                "SELECT %s FROM %s %s" % (','.join(columns), table, where))
+
+            if do_cache:
+                for r in results:
+                    values = list(r)
+                    i = values.pop()
+                    for index, value in enumerate(values):
+                        cache[columns[index]][i] = value
+
+                cache[None] = now
+            else:
+                results = []
+                for r in results:
+                    if id is not None:
+                        return values[0]
+
+                    results.append(values[0])
+
+                return results
+        else:
+            cache = self.cache[table]
+
+        try:
+            if id is not None:
+                value = cache[column][id]
+                # logger.debug("%s.%s:%s -> %s" % (table, column, id, value))
+                return value
+            else:
+                return cache[column].values()
+        except KeyError:
+            # logger.debug("%s.%s:%s -> Not found" % (table, column, id))
+            return None
+
+class Feed(BaseObject):
+    # Columns to cache.
+    cached_columns = (('feed', 'read'),
+                      ('feed', 'title'))
+
     serial_execution_lock = threading.Lock()
 
     def _getdb(self):
@@ -97,14 +229,24 @@ class Feed:
         self.key = key
         self.configdir = configdir
         self.dir = "%s/%s.d" %(self.configdir, self.key)
-        self.tls = threading.local ()
+        self.tls = threading.local()
 
         if not isdir(self.dir):
             mkdir(self.dir)
-        if not isfile("%s/%s.db" %(self.dir, self.key)):
-            self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
+        filename = "%s/%s.db" % (self.dir, self.key)
+        if not isfile(filename):
+            self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, contentHash text, date float, updated float, link text, read int);")
             self.db.execute("CREATE TABLE images (id text, imagePath text);")
             self.db.commit()
+        else:
+            try:
+                self.db.execute("ALTER TABLE feed ADD COLUMN contentHash text")
+                self.db.commit()
+            except sqlite3.OperationalError, e:
+                if 'duplicate column name' in str(e):
+                    pass
+                else:
+                    logger.exception("Add column contentHash to %s", filename)
 
     def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
         filename = configdir+key+".d/"+getId(url)
@@ -135,7 +277,7 @@ class Feed:
                 except OSError:
                     pass
 
-                raise exception
+                return None
         else:
             #open(filename,"a").close()  # "Touch" the file
             file = open(filename,"a")
@@ -196,10 +338,12 @@ class Feed:
                 time.sleep(1)
 
     def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
+        logger.debug("Updating %s" % url)
+
         success = False
         have_serial_execution_lock = False
         try:
-            download_start = time.time ()
+            update_start = time.time ()
 
             progress_handler = HTTPProgressHandler(download_callback)
 
@@ -208,9 +352,11 @@ class Feed:
                 openers.append (proxy)
             kwargs = {'handlers':openers}
             
+            feed_transfer_stats = transfer_stats(0, 0)
+
             tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
-            download_duration = time.time () - download_start
-    
+            download_duration = time.time () - update_start
+
             opener = downloader(progress_handler, proxy)
 
             if JobManager().do_quit:
@@ -222,7 +368,10 @@ class Feed:
             expiry = float(expiryTime) * 3600.
     
             currentTime = 0
-    
+            
+            updated_objects = 0
+            new_objects = 0
+
             def wc_success():
                 try:
                     wc().stream_register (self.key, "", 6 * 60 * 60)
@@ -234,10 +383,11 @@ class Feed:
                                    |woodchuck.Indicator.StreamWide),
                         transferred_down=progress_handler.stats['received'],
                         transferred_up=progress_handler.stats['sent'],
-                        transfer_time=download_start,
+                        transfer_time=update_start,
                         transfer_duration=download_duration,
-                        new_objects=len (tmp.entries),
-                        objects_inline=len (tmp.entries))
+                        new_objects=new_objects,
+                        updated_objects=updated_objects,
+                        objects_inline=new_objects + updated_objects)
                 except KeyError:
                     logger.warn(
                         "Failed to register update of %s with woodchuck!"
@@ -255,13 +405,13 @@ class Feed:
                 logger.debug("%s: No changes to feed." % (self.key,))
                 mainthread.execute(wc_success, async=True)
                 success = True
-            elif len(tmp["entries"])==0 and not tmp.version:
+            elif len(tmp["entries"])==0 and not tmp.get('version', None):
                 # An error occured fetching or parsing the feed.  (Version
                 # will be either None if e.g. the connection timed our or
                 # '' if the data is not a proper feed)
                 logger.error(
                     "Error fetching %s: version is: %s: error: %s"
-                    % (url, str (tmp.version),
+                    % (url, str (tmp.get('version', 'unset')),
                        str (tmp.get ('bozo_exception', 'Unknown error'))))
                 logger.debug(tmp)
                 def register_stream_update_failed(http_status):
@@ -319,19 +469,18 @@ class Feed:
                #reversedEntries = self.getEntries()
                #reversedEntries.reverse()
     
-               ids = self.getIds()
-    
                tmp["entries"].reverse()
                for entry in tmp["entries"]:
                    # Yield so as to make the main thread a bit more
                    # responsive.
                    time.sleep(0)
     
+                   entry_transfer_stats = transfer_stats(
+                       *feed_transfer_stats(**progress_handler.stats)[0:2])
+
                    if JobManager().do_quit:
                        raise KeyboardInterrupt
 
-                   received_base = progress_handler.stats['received']
-                   sent_base = progress_handler.stats['sent']
                    object_size = 0
 
                    date = self.extractDate(entry)
@@ -350,21 +499,67 @@ class Feed:
                    if(not(entry.has_key("id"))):
                        entry["id"] = None
                    content = self.extractContent(entry)
+                   contentHash = getId(content)
                    object_size = len (content)
-                   received_base -= len (content)
                    tmpEntry = {"title":entry["title"], "content":content,
                                 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
                    id = self.generateUniqueId(tmpEntry)
                    
+                   current_version = self.db.execute(
+                       'select date, ROWID, contentHash from feed where id=?',
+                       (id,)).fetchone()
+                   if (current_version is not None
+                       # To detect updates, don't compare by date:
+                       # compare by content.
+                       #
+                       # - If an article update is just a date change
+                       #   and the content remains the same, we don't
+                       #   want to register an update.
+                       #
+                       # - If an article's content changes but not the
+                       #   date, we want to recognize an update.
+                       and current_version[2] == contentHash):
+                       logger.debug("ALREADY DOWNLOADED %s (%s)"
+                                    % (entry["title"], entry["link"]))
+                       ## This article is already present in the feed listing. Update the "updated" time, so it doesn't expire 
+                       self.db.execute("UPDATE feed SET updated=? WHERE id=?;",(currentTime,id))
+                       try: 
+                           logger.debug("Updating already downloaded files for %s" %(id))
+                           filename = configdir+self.key+".d/"+id+".html"
+                           file = open(filename,"a")
+                           utime(filename, None)
+                           file.close()
+                           images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
+                           for image in images:
+                                file = open(image[0],"a")
+                                utime(image[0], None)
+                                file.close()
+                       except:
+                           logger.debug("Error in refreshing images for %s" % (id))
+                       self.db.commit()
+                       continue                       
+
+                   if current_version is not None:
+                       # The version was updated.  Mark it as unread.
+                       logger.debug("UPDATED: %s (%s)"
+                                    % (entry["title"], entry["link"]))
+                       updated_objects += 1
+                   else:
+                       logger.debug("NEW: %s (%s)"
+                                    % (entry["title"], entry["link"]))
+                       new_objects += 1
+
                    #articleTime = time.mktime(self.entries[id]["dateTuple"])
                    soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
                    images = soup('img')
                    baseurl = tmpEntry["link"]
-                   #if not id in ids:
                    if imageCache and len(images) > 0:
                        self.serial_execution_lock.release ()
                        have_serial_execution_lock = False
                        for img in images:
+                           if not img.has_key('src'):
+                               continue
+
                            filename = self.addImage(
                                configdir, self.key, baseurl, img['src'],
                                opener=opener)
@@ -387,29 +582,28 @@ class Feed:
                    file = open(tmpEntry["contentLink"], "w")
                    file.write(soup.prettify())
                    file.close()
-                   if id in ids:
-                       self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
-                       self.db.commit()
-                   else:
-                       values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
-                       self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
-                       self.db.commit()
-#                   else:
-#                       try:
-#                           self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
-#                           self.db.commit()
-#                           filename = configdir+self.key+".d/"+id+".html"
-#                           file = open(filename,"a")
-#                           utime(filename, None)
-#                           file.close()
-#                           images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
-#                           for image in images:
-#                                file = open(image[0],"a")
-#                                utime(image[0], None)
-#                                file.close()
-#                       except:
-#                           pass
-    
+
+                   values = {'id': id,
+                             'title': tmpEntry["title"],
+                             'contentLink': tmpEntry["contentLink"],
+                             'contentHash': contentHash,
+                             'date': tmpEntry["date"],
+                             'updated': currentTime,
+                             'link': tmpEntry["link"],
+                             'read': 0}
+
+                   if current_version is not None:
+                       # This is an update.  Ensure that the existing
+                       # entry is replaced.
+                       values['ROWID'] = current_version[1]
+
+                   cols, values = zip(*values.items())
+                   self.db.execute(
+                       "INSERT OR REPLACE INTO feed (%s) VALUES (%s);"
+                       % (','.join(cols), ','.join(('?',) * len(values))),
+                       values)
+                   self.db.commit()
+
                    # Register the object with Woodchuck and mark it as
                    # downloaded.
                    def register_object_transferred(
@@ -443,9 +637,14 @@ class Feed:
                        else:
                            publication_time = None
 
-                       sent = progress_handler.stats['sent'] - sent_base
-                       received = (progress_handler.stats['received']
-                                   - received_base)
+                       sent, received, _ \
+                           = entry_transfer_stats(**progress_handler.stats)
+                       # sent and received are for objects (in
+                       # particular, images) associated with this
+                       # item.  We also want to attribute the data
+                       # transferred for the item's content.  This is
+                       # a good first approximation.
+                       received += len(content)
 
                        mainthread.execute(
                            register_object_transferred(
@@ -457,12 +656,11 @@ class Feed:
                            async=True)
                self.db.commit()
 
+               sent, received, _ \
+                   = feed_transfer_stats(**progress_handler.stats)
                logger.debug (
                    "%s: Update successful: transferred: %d/%d; objects: %d)"
-                   % (self.key,
-                      progress_handler.stats['sent'],
-                      progress_handler.stats['received'],
-                      len (tmp.entries)))
+                   % (url, sent, received, len (tmp.entries)))
                mainthread.execute (wc_success, async=True)
                success = True
 
@@ -528,6 +726,8 @@ class Feed:
                     postFeedUpdateFunc (self.key, updateTime, etag, modified,
                                         title, *postFeedUpdateFuncArgs)
 
+        self.cache_invalidate()
+
     def setEntryRead(self, id):
         self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
         self.db.commit()
@@ -539,25 +739,30 @@ class Feed:
                 pass
         if wc().available():
             mainthread.execute(doit, async=True)
+        self.cache_invalidate('feed')
 
     def setEntryUnread(self, id):
         self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
         self.db.commit()     
+        self.cache_invalidate('feed')
         
     def markAllAsRead(self):
         self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
         self.db.commit()
+        self.cache_invalidate('feed')
 
     def isEntryRead(self, id):
-        read_status = self.db.execute("SELECT read FROM feed WHERE id=?;", (id,) ).fetchone()[0]
-        return read_status==1  # Returns True if read==1, and False if read==0
+        return self.lookup('feed', 'read', id) == 1
     
     def getTitle(self, id):
-        return self.db.execute("SELECT title FROM feed WHERE id=?;", (id,) ).fetchone()[0]
+        return self.lookup('feed', 'title', id)
     
     def getContentLink(self, id):
         return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
     
+    def getContentHash(self, id):
+        return self.db.execute("SELECT contentHash FROM feed WHERE id=?;", (id,) ).fetchone()[0]
+    
     def getExternalLink(self, id):
         return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
     
@@ -653,13 +858,18 @@ class Feed:
         return text
    
     def getContent(self, id):
-        contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
+        """
+        Return the content of the article with the specified ID.  If
+        the content is not available, returns None.
+        """
+        contentLink = self.getContentLink(id)
         try:
-            file = open(self.entries[id]["contentLink"])
-            content = file.read()
-            file.close()
-        except:
-            content = "Content unavailable"
+            with open(contentLink, 'r') as file:
+                content = file.read()
+        except Exception:
+            logger.exception("Failed get content for %s: reading %s failed",
+                             id, contentLink)
+            content = None
         return content
     
     def extractDate(self, entry):
@@ -709,33 +919,41 @@ class ArchivedArticles(Feed):
         self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
         self.db.commit()
 
-    def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
+    # Feed.UpdateFeed calls this function.
+    def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
         currentTime = 0
         rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
         for row in rows:
-            currentTime = time.time()
-            id = row[0]
-            link = row[1]
-            f = urllib2.urlopen(link)
-            #entry["content"] = f.read()
-            html = f.read()
-            f.close()
-            soup = BeautifulSoup(html)
-            images = soup('img')
-            baseurl = link
-            for img in images:
-                filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
-                img['src']=filename
-                self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
+            try:
+                currentTime = time.time()
+                id = row[0]
+                link = row[1]
+                f = urllib2.urlopen(link)
+                #entry["content"] = f.read()
+                html = f.read()
+                f.close()
+                soup = BeautifulSoup(html)
+                images = soup('img')
+                baseurl = link
+                for img in images:
+                    filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
+                    img['src']=filename
+                    self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
+                    self.db.commit()
+                contentLink = configdir+self.key+".d/"+id+".html"
+                file = open(contentLink, "w")
+                file.write(soup.prettify())
+                file.close()
+                
+                self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
                 self.db.commit()
-            contentLink = configdir+self.key+".d/"+id+".html"
-            file = open(contentLink, "w")
-            file.write(soup.prettify())
-            file.close()
-            
-            self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
-            self.db.commit()
-        return (currentTime, None, None)
+            except:
+                logger.error("Error updating Archived Article: %s %s"
+                             % (link,traceback.format_exc(),))
+
+        if postFeedUpdateFunc is not None:
+            postFeedUpdateFunc (self.key, currentTime, None, None, None,
+                                *postFeedUpdateFuncArgs)
     
     def purgeReadArticles(self):
         rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
@@ -754,7 +972,13 @@ class ArchivedArticles(Feed):
                 pass
         self.removeEntry(id)
 
-class Listing:
+class Listing(BaseObject):
+    # Columns to cache.
+    cached_columns = (('feeds', 'updateTime'),
+                      ('feeds', 'unread'),
+                      ('feeds', 'title'),
+                      ('categories', 'title'))
+
     def _getdb(self):
         try:
             db = self.tls.db
@@ -798,36 +1022,42 @@ class Listing:
 
         # Check that Woodchuck's state is up to date with respect our
         # state.
-        updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
-        wc_init (self, True if updater else False)
-        if wc().available() and updater:
-            # The list of known streams.
-            streams = wc().streams_list ()
-            stream_ids = [s.identifier for s in streams]
-
-            # Register any unknown streams.  Remove known streams from
-            # STREAMS_IDS.
-            for key in self.getListOfFeeds():
-                title = self.getFeedTitle(key)
-                # XXX: We should also check whether the list of
-                # articles/objects in each feed/stream is up to date.
-                if key not in stream_ids:
-                    logger.debug(
-                        "Registering previously unknown channel: %s (%s)"
-                        % (key, title,))
-                    # Use a default refresh interval of 6 hours.
-                    wc().stream_register (key, title, 6 * 60 * 60)
-                else:
-                    # Make sure the human readable name is up to date.
-                    if wc()[key].human_readable_name != title:
-                        wc()[key].human_readable_name = title
-                    stream_ids.remove (key)
-                    
-
-            # Unregister any streams that are no longer subscribed to.
-            for id in stream_ids:
-                logger.debug("Unregistering %s" % (id,))
-                w.stream_unregister (id)
+        try:
+            updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
+            wc_init(config, self, True if updater else False)
+            if wc().available() and updater:
+                # The list of known streams.
+                streams = wc().streams_list ()
+                stream_ids = [s.identifier for s in streams]
+    
+                # Register any unknown streams.  Remove known streams from
+                # STREAMS_IDS.
+                for key in self.getListOfFeeds():
+                    title = self.getFeedTitle(key)
+                    # XXX: We should also check whether the list of
+                    # articles/objects in each feed/stream is up to date.
+                    if key not in stream_ids:
+                        logger.debug(
+                            "Registering previously unknown channel: %s (%s)"
+                            % (key, title,))
+                        wc().stream_register(
+                            key, title,
+                            self.config.getUpdateInterval() * 60 * 60)
+                    else:
+                        # Make sure the human readable name is up to date.
+                        if wc()[key].human_readable_name != title:
+                            wc()[key].human_readable_name = title
+                        stream_ids.remove (key)
+                        wc()[key].freshness \
+                            = self.config.getUpdateInterval() * 60 * 60
+                        
+    
+                # Unregister any streams that are no longer subscribed to.
+                for id in stream_ids:
+                    logger.debug("Unregistering %s" % (id,))
+                    wc().stream_unregister (id)
+        except Exception:
+            logger.exception("Registering streams with Woodchuck")
 
     def importOldFormatFeeds(self):
         """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
@@ -925,6 +1155,7 @@ class Listing:
             self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
                             (title, key))
         self.db.commit()
+        self.cache_invalidate('feeds')
         self.updateUnread(key)
 
         update_server_object().ArticleCountUpdated()
@@ -935,8 +1166,11 @@ class Listing:
         in_progress = stats['jobs-in-progress']
         queued = stats['jobs-queued']
 
-        percent = (100 * ((completed + in_progress / 2.))
-                   / (completed + in_progress + queued))
+        try:
+            percent = (100 * ((completed + in_progress / 2.))
+                       / (completed + in_progress + queued))
+        except ZeroDivisionError:
+            percent = 100
 
         update_server_object().UpdateProgress(
             percent, completed, in_progress, queued, 0, 0, 0, key)
@@ -955,6 +1189,7 @@ class Listing:
         else:
             self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
         self.db.commit()
+        self.cache_invalidate('feeds')
 
         if wc().available():
             try:
@@ -963,8 +1198,7 @@ class Listing:
                 logger.debug("Feed %s (%s) unknown." % (key, title))
         
     def getFeedUpdateTime(self, key):
-        update_time = self.db.execute(
-            "SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0]
+        update_time = self.lookup('feeds', 'updateTime', key)
 
         if not update_time:
             return "Never"
@@ -998,13 +1232,14 @@ class Listing:
         return time.strftime("%x", time.gmtime(update_time))
         
     def getFeedNumberOfUnreadItems(self, key):
-        return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
+        return self.lookup('feeds', 'unread', key)
         
     def getFeedTitle(self, key):
-        (title, url) = self.db.execute("SELECT title, url FROM feeds WHERE id=?;", (key,)).fetchone()
+        title = self.lookup('feeds', 'title', key)
         if title:
             return title
-        return url
+
+        return self.getFeedUrl(key)
         
     def getFeedUrl(self, key):
         return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
@@ -1024,16 +1259,20 @@ class Listing:
         return keys
     
     def getListOfCategories(self):
-        rows = self.db.execute("SELECT id FROM categories ORDER BY rank;" )
-        keys = []
-        for row in rows:
-            if row[0]:
-                keys.append(row[0])
-        return keys
+        return list(row[0] for row in self.db.execute(
+                "SELECT id FROM categories ORDER BY rank;"))
     
     def getCategoryTitle(self, id):
-        row = self.db.execute("SELECT title FROM categories WHERE id=?;", (id, )).fetchone()
-        return row[0]
+        return self.lookup('categories', 'title', id)
+    
+    def getCategoryUnread(self, id):
+        count = 0
+        for key in self.getListOfFeeds(category=id):
+            try: 
+                count = count + self.getFeedNumberOfUnreadItems(key)
+            except:
+                pass
+        return count
     
     def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
         if   order == "Most unread":
@@ -1073,6 +1312,7 @@ class Listing:
         feed = self.getFeed(key)
         self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
         self.db.commit()
+        self.cache_invalidate('feeds')
 
     def addFeed(self, title, url, id=None, category=1):
         if not id:
@@ -1095,6 +1335,7 @@ class Listing:
                                      human_readable_name=title,
                                      freshness=6*60*60)
 
+            self.cache_invalidate('feeds')
             return True
         else:
             return False
@@ -1108,12 +1349,13 @@ class Listing:
             id=1
         self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
         self.db.commit()
+        self.cache_invalidate('categories')
     
     def removeFeed(self, key):
         if wc().available ():
             try:
                 del wc()[key]
-            except KeyError:
+            except KeyError, woodchuck.Error:
                 logger.debug("Removing unregistered feed %s failed" % (key,))
 
         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
@@ -1123,6 +1365,7 @@ class Listing:
 
         if isdir(self.configdir+key+".d/"):
            rmtree(self.configdir+key+".d/")
+        self.cache_invalidate('feeds')
            
     def removeCategory(self, key):
         if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
@@ -1131,6 +1374,7 @@ class Listing:
             self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
             self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
             self.db.commit()
+            self.cache_invalidate('categories')
         
     #def saveConfig(self):
     #    self.listOfFeeds["feedingit-order"] = self.sortedKeys