From 720c7eaf4b808cecc9285cafec98df612034fda9 Mon Sep 17 00:00:00 2001 From: "Neal H. Walfield" Date: Wed, 7 Sep 2011 01:03:32 +0200 Subject: [PATCH] Don't reprocess downloaded articles that are already up to date. --- src/rss_sqlite.py | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/src/rss_sqlite.py b/src/rss_sqlite.py index 0319999..077fce0 100644 --- a/src/rss_sqlite.py +++ b/src/rss_sqlite.py @@ -348,7 +348,10 @@ class Feed(BaseObject): expiry = float(expiryTime) * 3600. currentTime = 0 - + + updated_objects = 0 + new_objects = 0 + def wc_success(): try: wc().stream_register (self.key, "", 6 * 60 * 60) @@ -362,8 +365,9 @@ class Feed(BaseObject): transferred_up=progress_handler.stats['sent'], transfer_time=update_start, transfer_duration=download_duration, - new_objects=len (tmp.entries), - objects_inline=len (tmp.entries)) + new_objects=new_objects, + updated_objects=updated_objects, + objects_inline=new_objects + updated_objects) except KeyError: logger.warn( "Failed to register update of %s with woodchuck!" @@ -482,6 +486,26 @@ class Feed(BaseObject): "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]} id = self.generateUniqueId(tmpEntry) + current_version \ + = self.db.execute('select date from feed where id=?', + (id,)).fetchone() + if (current_version is not None + and current_version[0] == date): + logger.debug("ALREADY DOWNLOADED %s (%s)" + % (entry["title"], entry["link"])) + continue + + if current_version is not None: + # The version was updated. Mark it as unread. + logger.debug("UPDATED: %s (%s)" + % (entry["title"], entry["link"])) + self.setEntryUnread(id) + updated_objects += 1 + else: + logger.debug("NEW: %s (%s)" + % (entry["title"], entry["link"])) + new_objects += 1 + #articleTime = time.mktime(self.entries[id]["dateTuple"]) soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"]) images = soup('img') -- 1.7.9.5