From: Neal H. Walfield Date: Tue, 6 Sep 2011 22:16:45 +0000 (+0200) Subject: rss_sqlite: When update a feed: improve statistics reporting. X-Git-Url: http://vcs.maemo.org/git/?p=feedingit;a=commitdiff_plain;h=2fe8ecf075d0b91edb29273b96aabec62e0a71eb rss_sqlite: When update a feed: improve statistics reporting. --- diff --git a/src/rss_sqlite.py b/src/rss_sqlite.py index 5735de2..0319999 100644 --- a/src/rss_sqlite.py +++ b/src/rss_sqlite.py @@ -73,6 +73,27 @@ def downloader(progress_handler=None, proxy=None): return urllib2.build_opener(*openers) +def transfer_stats(sent, received, **kwargs): + """ + This function takes two arguments: sent is the number of bytes + sent so far, received is the number of bytes received. The + function returns a continuation that you can call later. + + The continuation takes the same two arguments. It returns a tuple + of the number of bytes sent, the number of bytes received and the + time since the original function was invoked. + """ + start_time = time.time() + start_sent = sent + start_received = received + + def e(sent, received, **kwargs): + return (sent - start_sent, + received - start_received, + time.time() - start_time) + + return e + # If not None, a subprocess.Popen object corresponding to a # update_feeds.py process. update_feed_process = None @@ -297,10 +318,12 @@ class Feed(BaseObject): time.sleep(1) def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs): + logger.debug("Updating %s" % url) + success = False have_serial_execution_lock = False try: - download_start = time.time () + update_start = time.time () progress_handler = HTTPProgressHandler(download_callback) @@ -309,9 +332,11 @@ class Feed(BaseObject): openers.append (proxy) kwargs = {'handlers':openers} + feed_transfer_stats = transfer_stats(0, 0) + tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs) - download_duration = time.time () - download_start - + download_duration = time.time () - update_start + opener = downloader(progress_handler, proxy) if JobManager().do_quit: @@ -335,7 +360,7 @@ class Feed(BaseObject): |woodchuck.Indicator.StreamWide), transferred_down=progress_handler.stats['received'], transferred_up=progress_handler.stats['sent'], - transfer_time=download_start, + transfer_time=update_start, transfer_duration=download_duration, new_objects=len (tmp.entries), objects_inline=len (tmp.entries)) @@ -428,11 +453,12 @@ class Feed(BaseObject): # responsive. time.sleep(0) + entry_transfer_stats = transfer_stats( + *feed_transfer_stats(**progress_handler.stats)[0:2]) + if JobManager().do_quit: raise KeyboardInterrupt - received_base = progress_handler.stats['received'] - sent_base = progress_handler.stats['sent'] object_size = 0 date = self.extractDate(entry) @@ -452,7 +478,6 @@ class Feed(BaseObject): entry["id"] = None content = self.extractContent(entry) object_size = len (content) - received_base -= len (content) tmpEntry = {"title":entry["title"], "content":content, "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]} id = self.generateUniqueId(tmpEntry) @@ -544,9 +569,14 @@ class Feed(BaseObject): else: publication_time = None - sent = progress_handler.stats['sent'] - sent_base - received = (progress_handler.stats['received'] - - received_base) + sent, received, _ \ + = entry_transfer_stats(**progress_handler.stats) + # sent and received are for objects (in + # particular, images) associated with this + # item. We also want to attribute the data + # transferred for the item's content. This is + # a good first approximation. + received += len(content) mainthread.execute( register_object_transferred( @@ -558,12 +588,11 @@ class Feed(BaseObject): async=True) self.db.commit() + sent, received, _ \ + = feed_transfer_stats(**progress_handler.stats) logger.debug ( "%s: Update successful: transferred: %d/%d; objects: %d)" - % (self.key, - progress_handler.stats['sent'], - progress_handler.stats['received'], - len (tmp.entries))) + % (url, sent, received, len (tmp.entries))) mainthread.execute (wc_success, async=True) success = True