# Description : Simple RSS Reader
# ============================================================================
+from __future__ import with_statement
+
import sqlite3
from os.path import isfile, isdir
from shutil import rmtree
logger = logging.getLogger(__name__)
def getId(string):
+ if issubclass(string.__class__, unicode):
+ string = string.encode('utf8', 'replace')
+
return md5.new(string).hexdigest()
def download_callback(connection):
def downloader(progress_handler=None, proxy=None):
openers = []
- if progress_handler:
- openers.append (progress_handler)
+ if progress_handler is not None:
+ openers.append(progress_handler)
else:
openers.append(HTTPProgressHandler(download_callback))
if proxy:
- openers.append (proxy)
+ openers.append(proxy)
+
+ return urllib2.build_opener(*openers)
+
+def transfer_stats(sent, received, **kwargs):
+ """
+ This function takes two arguments: sent is the number of bytes
+ sent so far, received is the number of bytes received. The
+ function returns a continuation that you can call later.
- return urllib2.build_opener (*openers)
+ The continuation takes the same two arguments. It returns a tuple
+ of the number of bytes sent, the number of bytes received and the
+ time since the original function was invoked.
+ """
+ start_time = time.time()
+ start_sent = sent
+ start_received = received
+
+ def e(sent, received, **kwargs):
+ return (sent - start_sent,
+ received - start_received,
+ time.time() - start_time)
+
+ return e
# If not None, a subprocess.Popen object corresponding to a
# update_feeds.py process.
jobs_at_start = 0
-class Feed:
+class BaseObject(object):
+ # Columns to cache. Classes that inherit from this and use the
+ # cache mechanism should set this to a list of tuples, each of
+ # which contains two entries: the table and the column. Note that
+ # both are case sensitive.
+ cached_columns = ()
+
+ def cache_invalidate(self, table=None):
+ """
+ Invalidate the cache.
+
+ If table is not None, invalidate only the specified table.
+ Otherwise, drop the whole cache.
+ """
+ if not hasattr(self, 'cache'):
+ return
+
+ if table is None:
+ del self.cache
+ else:
+ if table in self.cache:
+ del self.cache[table]
+
+ def lookup(self, table, column, id=None):
+ """
+ Look up a column or value. Uses a cache for columns in
+ cached_columns. Note: the column is returned unsorted.
+ """
+ if not hasattr(self, 'cache'):
+ self.cache = {}
+
+ # Cache data for at most 60 seconds.
+ now = time.time()
+ try:
+ cache = self.cache[table]
+
+ if time.time() - cache[None] > 60:
+ # logger.debug("%s: Cache too old: clearing" % (table,))
+ del self.cache[table]
+ cache = None
+ except KeyError:
+ cache = None
+
+ if (cache is None
+ or (table, column) not in self.cached_columns):
+ # The cache is empty or the caller wants a column that we
+ # don't cache.
+ if (table, column) in self.cached_columns:
+ # logger.debug("%s: Rebuilding cache" % (table,))
+
+ do_cache = True
+
+ self.cache[table] = cache = {}
+ columns = []
+ for t, c in self.cached_columns:
+ if table == t:
+ cache[c] = {}
+ columns.append(c)
+
+ columns.append('id')
+ where = ""
+ else:
+ do_cache = False
+
+ columns = (colums,)
+ if id is not None:
+ where = "where id = '%s'" % id
+ else:
+ where = ""
+
+ results = self.db.execute(
+ "SELECT %s FROM %s %s" % (','.join(columns), table, where))
+
+ if do_cache:
+ for r in results:
+ values = list(r)
+ i = values.pop()
+ for index, value in enumerate(values):
+ cache[columns[index]][i] = value
+
+ cache[None] = now
+ else:
+ results = []
+ for r in results:
+ if id is not None:
+ return values[0]
+
+ results.append(values[0])
+
+ return results
+ else:
+ cache = self.cache[table]
+
+ try:
+ if id is not None:
+ value = cache[column][id]
+ # logger.debug("%s.%s:%s -> %s" % (table, column, id, value))
+ return value
+ else:
+ return cache[column].values()
+ except KeyError:
+ # logger.debug("%s.%s:%s -> Not found" % (table, column, id))
+ return None
+
+class Feed(BaseObject):
+ # Columns to cache.
+ cached_columns = (('feed', 'read'),
+ ('feed', 'title'))
+
serial_execution_lock = threading.Lock()
def _getdb(self):
self.key = key
self.configdir = configdir
self.dir = "%s/%s.d" %(self.configdir, self.key)
- self.tls = threading.local ()
+ self.tls = threading.local()
if not isdir(self.dir):
mkdir(self.dir)
- if not isfile("%s/%s.db" %(self.dir, self.key)):
- self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
+ filename = "%s/%s.db" % (self.dir, self.key)
+ if not isfile(filename):
+ self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, contentHash text, date float, updated float, link text, read int);")
self.db.execute("CREATE TABLE images (id text, imagePath text);")
self.db.commit()
+ else:
+ try:
+ self.db.execute("ALTER TABLE feed ADD COLUMN contentHash text")
+ self.db.commit()
+ except sqlite3.OperationalError, e:
+ if 'duplicate column name' in str(e):
+ pass
+ else:
+ logger.exception("Add column contentHash to %s", filename)
def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
filename = configdir+key+".d/"+getId(url)
abs_url = urljoin(baseurl,url)
f = opener.open(abs_url)
- outf = open(filename, "w")
- outf.write(f.read())
- f.close()
- outf.close()
+ try:
+ with open(filename, "w") as outf:
+ for data in f:
+ outf.write(data)
+ finally:
+ f.close()
except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
logger.info("Could not download image %s: %s"
% (abs_url, str (exception)))
except OSError:
pass
- raise exception
+ return None
else:
#open(filename,"a").close() # "Touch" the file
file = open(filename,"a")
time.sleep(1)
def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
+ logger.debug("Updating %s" % url)
+
success = False
have_serial_execution_lock = False
try:
- download_start = time.time ()
+ update_start = time.time ()
progress_handler = HTTPProgressHandler(download_callback)
openers.append (proxy)
kwargs = {'handlers':openers}
+ feed_transfer_stats = transfer_stats(0, 0)
+
tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
- download_duration = time.time () - download_start
-
+ download_duration = time.time () - update_start
+
opener = downloader(progress_handler, proxy)
if JobManager().do_quit:
expiry = float(expiryTime) * 3600.
currentTime = 0
-
- have_woodchuck = mainthread.execute (wc().available)
+
+ updated_objects = 0
+ new_objects = 0
def wc_success():
try:
|woodchuck.Indicator.StreamWide),
transferred_down=progress_handler.stats['received'],
transferred_up=progress_handler.stats['sent'],
- transfer_time=download_start,
+ transfer_time=update_start,
transfer_duration=download_duration,
- new_objects=len (tmp.entries),
- objects_inline=len (tmp.entries))
+ new_objects=new_objects,
+ updated_objects=updated_objects,
+ objects_inline=new_objects + updated_objects)
except KeyError:
logger.warn(
"Failed to register update of %s with woodchuck!"
# this first.
if http_status == 304:
logger.debug("%s: No changes to feed." % (self.key,))
- mainthread.execute (wc_success, async=True)
+ mainthread.execute(wc_success, async=True)
success = True
- elif len(tmp["entries"])==0 and not tmp.version:
+ elif len(tmp["entries"])==0 and not tmp.get('version', None):
# An error occured fetching or parsing the feed. (Version
# will be either None if e.g. the connection timed our or
# '' if the data is not a proper feed)
logger.error(
"Error fetching %s: version is: %s: error: %s"
- % (url, str (tmp.version),
+ % (url, str (tmp.get('version', 'unset')),
str (tmp.get ('bozo_exception', 'Unknown error'))))
logger.debug(tmp)
- if have_woodchuck:
- def e():
+ def register_stream_update_failed(http_status):
+ def doit():
logger.debug("%s: stream update failed!" % self.key)
try:
if 500 <= http_status and http_status < 600:
ec = woodchuck.TransferStatus.TransientNetwork
wc()[self.key].update_failed(ec)
- mainthread.execute (e, async=True)
+ return doit
+ if wc().available:
+ mainthread.execute(
+ register_stream_update_failed(
+ http_status=http_status),
+ async=True)
else:
currentTime = time.time()
# The etag and modified value should only be updated if the content was not null
#reversedEntries = self.getEntries()
#reversedEntries.reverse()
- ids = self.getIds()
-
tmp["entries"].reverse()
for entry in tmp["entries"]:
# Yield so as to make the main thread a bit more
# responsive.
time.sleep(0)
+ entry_transfer_stats = transfer_stats(
+ *feed_transfer_stats(**progress_handler.stats)[0:2])
+
if JobManager().do_quit:
raise KeyboardInterrupt
- received_base = progress_handler.stats['received']
- sent_base = progress_handler.stats['sent']
object_size = 0
date = self.extractDate(entry)
if(not(entry.has_key("id"))):
entry["id"] = None
content = self.extractContent(entry)
+ contentHash = getId(content)
object_size = len (content)
- received_base -= len (content)
tmpEntry = {"title":entry["title"], "content":content,
"date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
id = self.generateUniqueId(tmpEntry)
+ current_version = self.db.execute(
+ 'select date, ROWID, contentHash from feed where id=?',
+ (id,)).fetchone()
+ if (current_version is not None
+ # To detect updates, don't compare by date:
+ # compare by content.
+ #
+ # - If an article update is just a date change
+ # and the content remains the same, we don't
+ # want to register an update.
+ #
+ # - If an article's content changes but not the
+ # date, we want to recognize an update.
+ and current_version[2] == contentHash):
+ logger.debug("ALREADY DOWNLOADED %s (%s)"
+ % (entry["title"], entry["link"]))
+ ## This article is already present in the feed listing. Update the "updated" time, so it doesn't expire
+ self.db.execute("UPDATE feed SET updated=? WHERE id=?;",(currentTime,id))
+ try:
+ logger.debug("Updating already downloaded files for %s" %(id))
+ filename = configdir+self.key+".d/"+id+".html"
+ file = open(filename,"a")
+ utime(filename, None)
+ file.close()
+ images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
+ for image in images:
+ file = open(image[0],"a")
+ utime(image[0], None)
+ file.close()
+ except:
+ logger.debug("Error in refreshing images for %s" % (id))
+ self.db.commit()
+ continue
+
+ if current_version is not None:
+ # The version was updated. Mark it as unread.
+ logger.debug("UPDATED: %s (%s)"
+ % (entry["title"], entry["link"]))
+ updated_objects += 1
+ else:
+ logger.debug("NEW: %s (%s)"
+ % (entry["title"], entry["link"]))
+ new_objects += 1
+
#articleTime = time.mktime(self.entries[id]["dateTuple"])
soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
images = soup('img')
baseurl = tmpEntry["link"]
- #if not id in ids:
if imageCache and len(images) > 0:
self.serial_execution_lock.release ()
have_serial_execution_lock = False
for img in images:
- filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
- if filename:
+ if not img.has_key('src'):
+ continue
+
+ filename = self.addImage(
+ configdir, self.key, baseurl, img['src'],
+ opener=opener)
+ if filename:
img['src']="file://%s" %filename
count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
if count == 0:
file = open(tmpEntry["contentLink"], "w")
file.write(soup.prettify())
file.close()
- if id in ids:
- self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
- self.db.commit()
- else:
- values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
- self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
- self.db.commit()
-# else:
-# try:
-# self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
-# self.db.commit()
-# filename = configdir+self.key+".d/"+id+".html"
-# file = open(filename,"a")
-# utime(filename, None)
-# file.close()
-# images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
-# for image in images:
-# file = open(image[0],"a")
-# utime(image[0], None)
-# file.close()
-# except:
-# pass
-
+
+ values = {'id': id,
+ 'title': tmpEntry["title"],
+ 'contentLink': tmpEntry["contentLink"],
+ 'contentHash': contentHash,
+ 'date': tmpEntry["date"],
+ 'updated': currentTime,
+ 'link': tmpEntry["link"],
+ 'read': 0}
+
+ if current_version is not None:
+ # This is an update. Ensure that the existing
+ # entry is replaced.
+ values['ROWID'] = current_version[1]
+
+ cols, values = zip(*values.items())
+ self.db.execute(
+ "INSERT OR REPLACE INTO feed (%s) VALUES (%s);"
+ % (','.join(cols), ','.join(('?',) * len(values))),
+ values)
+ self.db.commit()
+
# Register the object with Woodchuck and mark it as
# downloaded.
- if have_woodchuck:
- def e():
+ def register_object_transferred(
+ id, title, publication_time,
+ sent, received, object_size):
+ def doit():
+ logger.debug("Registering transfer of object %s"
+ % title)
try:
obj = wc()[self.key].object_register(
object_identifier=id,
- human_readable_name=tmpEntry["title"])
+ human_readable_name=title)
except woodchuck.ObjectExistsError:
obj = wc()[self.key][id]
else:
- # If the entry does not contain a publication
- # time, the attribute won't exist.
- pubtime = entry.get ('date_parsed', None)
- if pubtime:
- obj.publication_time = time.mktime (pubtime)
-
- received = (progress_handler.stats['received']
- - received_base)
- sent = progress_handler.stats['sent'] - sent_base
- obj.transferred (
- indicator=(woodchuck.Indicator.ApplicationVisual
- |woodchuck.Indicator.StreamWide),
+ obj.publication_time = publication_time
+ obj.transferred(
+ indicator=(
+ woodchuck.Indicator.ApplicationVisual
+ |woodchuck.Indicator.StreamWide),
transferred_down=received,
transferred_up=sent,
object_size=object_size)
- mainthread.execute(e, async=True)
+ return doit
+ if wc().available:
+ # If the entry does not contain a publication
+ # time, the attribute won't exist.
+ pubtime = entry.get('date_parsed', None)
+ if pubtime:
+ publication_time = time.mktime (pubtime)
+ else:
+ publication_time = None
+
+ sent, received, _ \
+ = entry_transfer_stats(**progress_handler.stats)
+ # sent and received are for objects (in
+ # particular, images) associated with this
+ # item. We also want to attribute the data
+ # transferred for the item's content. This is
+ # a good first approximation.
+ received += len(content)
+
+ mainthread.execute(
+ register_object_transferred(
+ id=id,
+ title=tmpEntry["title"],
+ publication_time=publication_time,
+ sent=sent, received=received,
+ object_size=object_size),
+ async=True)
self.db.commit()
+ sent, received, _ \
+ = feed_transfer_stats(**progress_handler.stats)
logger.debug (
"%s: Update successful: transferred: %d/%d; objects: %d)"
- % (self.key,
- progress_handler.stats['sent'],
- progress_handler.stats['received'],
- len (tmp.entries)))
+ % (url, sent, received, len (tmp.entries)))
mainthread.execute (wc_success, async=True)
success = True
postFeedUpdateFunc (self.key, updateTime, etag, modified,
title, *postFeedUpdateFuncArgs)
+ self.cache_invalidate()
+
def setEntryRead(self, id):
self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
self.db.commit()
- def e():
- if wc().available():
- try:
- wc()[self.key][id].used()
- except KeyError:
- pass
+ def doit():
+ try:
+ wc()[self.key][id].used()
+ except KeyError:
+ pass
+ if wc().available():
+ mainthread.execute(doit, async=True)
+ self.cache_invalidate('feed')
def setEntryUnread(self, id):
self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
self.db.commit()
+ self.cache_invalidate('feed')
def markAllAsRead(self):
self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
self.db.commit()
+ self.cache_invalidate('feed')
def isEntryRead(self, id):
- read_status = self.db.execute("SELECT read FROM feed WHERE id=?;", (id,) ).fetchone()[0]
- return read_status==1 # Returns True if read==1, and False if read==0
+ return self.lookup('feed', 'read', id) == 1
def getTitle(self, id):
- return self.db.execute("SELECT title FROM feed WHERE id=?;", (id,) ).fetchone()[0]
+ return self.lookup('feed', 'title', id)
def getContentLink(self, id):
return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
+ def getContentHash(self, id):
+ return self.db.execute("SELECT contentHash FROM feed WHERE id=?;", (id,) ).fetchone()[0]
+
def getExternalLink(self, id):
return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
#ids.reverse()
return ids
- def getNextId(self, id):
+ def getNextId(self, id, forward=True):
+ if forward:
+ delta = 1
+ else:
+ delta = -1
ids = self.getIds()
index = ids.index(id)
- return ids[(index+1)%len(ids)]
+ return ids[(index + delta) % len(ids)]
def getPreviousId(self, id):
- ids = self.getIds()
- index = ids.index(id)
- return ids[(index-1)%len(ids)]
+ return self.getNextId(id, forward=False)
def getNumberOfUnreadItems(self):
return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
return text
def getContent(self, id):
- contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
+ """
+ Return the content of the article with the specified ID. If
+ the content is not available, returns None.
+ """
+ contentLink = self.getContentLink(id)
try:
- file = open(self.entries[id]["contentLink"])
- content = file.read()
- file.close()
- except:
- content = "Content unavailable"
+ with open(contentLink, 'r') as file:
+ content = file.read()
+ except Exception:
+ logger.exception("Failed get content for %s: reading %s failed",
+ id, contentLink)
+ content = None
return content
def extractDate(self, entry):
self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
self.db.commit()
- def e():
- if wc().available():
- try:
- wc()[self.key][id].files_deleted (
- woodchuck.DeletionResponse.Deleted)
- del wc()[self.key][id]
- except KeyError:
- pass
- mainthread.execute (e, async=True)
+ def doit():
+ try:
+ wc()[self.key][id].files_deleted (
+ woodchuck.DeletionResponse.Deleted)
+ del wc()[self.key][id]
+ except KeyError:
+ pass
+ if wc().available():
+ mainthread.execute (doit, async=True)
class ArchivedArticles(Feed):
def addArchivedArticle(self, title, link, date, configdir):
self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
self.db.commit()
- def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
+ # Feed.UpdateFeed calls this function.
+ def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
currentTime = 0
rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
for row in rows:
- currentTime = time.time()
- id = row[0]
- link = row[1]
- f = urllib2.urlopen(link)
- #entry["content"] = f.read()
- html = f.read()
- f.close()
- soup = BeautifulSoup(html)
- images = soup('img')
- baseurl = link
- for img in images:
- filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
- img['src']=filename
- self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
+ try:
+ currentTime = time.time()
+ id = row[0]
+ link = row[1]
+ f = urllib2.urlopen(link)
+ #entry["content"] = f.read()
+ html = f.read()
+ f.close()
+ soup = BeautifulSoup(html)
+ images = soup('img')
+ baseurl = link
+ for img in images:
+ filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
+ img['src']=filename
+ self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
+ self.db.commit()
+ contentLink = configdir+self.key+".d/"+id+".html"
+ file = open(contentLink, "w")
+ file.write(soup.prettify())
+ file.close()
+
+ self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
self.db.commit()
- contentLink = configdir+self.key+".d/"+id+".html"
- file = open(contentLink, "w")
- file.write(soup.prettify())
- file.close()
-
- self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
- self.db.commit()
- return (currentTime, None, None)
+ except:
+ logger.error("Error updating Archived Article: %s %s"
+ % (link,traceback.format_exc(),))
+
+ if postFeedUpdateFunc is not None:
+ postFeedUpdateFunc (self.key, currentTime, None, None, None,
+ *postFeedUpdateFuncArgs)
def purgeReadArticles(self):
rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
pass
self.removeEntry(id)
-class Listing:
+class Listing(BaseObject):
+ # Columns to cache.
+ cached_columns = (('feeds', 'updateTime'),
+ ('feeds', 'unread'),
+ ('feeds', 'title'),
+ ('categories', 'title'))
+
def _getdb(self):
try:
db = self.tls.db
# Check that Woodchuck's state is up to date with respect our
# state.
- updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
- wc_init (self, True if updater else False)
- if wc().available() and updater:
- # The list of known streams.
- streams = wc().streams_list ()
- stream_ids = [s.identifier for s in streams]
-
- # Register any unknown streams. Remove known streams from
- # STREAMS_IDS.
- for key in self.getListOfFeeds():
- title = self.getFeedTitle(key)
- # XXX: We should also check whether the list of
- # articles/objects in each feed/stream is up to date.
- if key not in stream_ids:
- logger.debug(
- "Registering previously unknown channel: %s (%s)"
- % (key, title,))
- # Use a default refresh interval of 6 hours.
- wc().stream_register (key, title, 6 * 60 * 60)
- else:
- # Make sure the human readable name is up to date.
- if wc()[key].human_readable_name != title:
- wc()[key].human_readable_name = title
- stream_ids.remove (key)
-
-
- # Unregister any streams that are no longer subscribed to.
- for id in stream_ids:
- logger.debug("Unregistering %s" % (id,))
- w.stream_unregister (id)
+ try:
+ updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
+ wc_init(config, self, True if updater else False)
+ if wc().available() and updater:
+ # The list of known streams.
+ streams = wc().streams_list ()
+ stream_ids = [s.identifier for s in streams]
+
+ # Register any unknown streams. Remove known streams from
+ # STREAMS_IDS.
+ for key in self.getListOfFeeds():
+ title = self.getFeedTitle(key)
+ # XXX: We should also check whether the list of
+ # articles/objects in each feed/stream is up to date.
+ if key not in stream_ids:
+ logger.debug(
+ "Registering previously unknown channel: %s (%s)"
+ % (key, title,))
+ wc().stream_register(
+ key, title,
+ self.config.getUpdateInterval() * 60 * 60)
+ else:
+ # Make sure the human readable name is up to date.
+ if wc()[key].human_readable_name != title:
+ wc()[key].human_readable_name = title
+ stream_ids.remove (key)
+ wc()[key].freshness \
+ = self.config.getUpdateInterval() * 60 * 60
+
+
+ # Unregister any streams that are no longer subscribed to.
+ for id in stream_ids:
+ logger.debug("Unregistering %s" % (id,))
+ wc().stream_unregister (id)
+ except Exception:
+ logger.exception("Registering streams with Woodchuck")
def importOldFormatFeeds(self):
"""This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
(title, key))
self.db.commit()
+ self.cache_invalidate('feeds')
self.updateUnread(key)
update_server_object().ArticleCountUpdated()
in_progress = stats['jobs-in-progress']
queued = stats['jobs-queued']
- percent = (100 * ((completed + in_progress / 2.))
- / (completed + in_progress + queued))
+ try:
+ percent = (100 * ((completed + in_progress / 2.))
+ / (completed + in_progress + queued))
+ except ZeroDivisionError:
+ percent = 100
update_server_object().UpdateProgress(
percent, completed, in_progress, queued, 0, 0, 0, key)
else:
self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
self.db.commit()
+ self.cache_invalidate('feeds')
if wc().available():
try:
logger.debug("Feed %s (%s) unknown." % (key, title))
def getFeedUpdateTime(self, key):
- return time.ctime(self.db.execute("SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0])
+ update_time = self.lookup('feeds', 'updateTime', key)
+
+ if not update_time:
+ return "Never"
+
+ delta = time.time() - update_time
+
+ delta_hours = delta / (60. * 60.)
+ if delta_hours < .1:
+ return "A few minutes ago"
+ if delta_hours < .75:
+ return "Less than an hour ago"
+ if delta_hours < 1.5:
+ return "About an hour ago"
+ if delta_hours < 18:
+ return "About %d hours ago" % (int(delta_hours + 0.5),)
+
+ delta_days = delta_hours / 24.
+ if delta_days < 1.5:
+ return "About a day ago"
+ if delta_days < 18:
+ return "%d days ago" % (int(delta_days + 0.5),)
+
+ delta_weeks = delta_days / 7.
+ if delta_weeks <= 8:
+ return "%d weeks ago" % int(delta_weeks + 0.5)
+
+ delta_months = delta_days / 30.
+ if delta_months <= 30:
+ return "%d months ago" % int(delta_months + 0.5)
+
+ return time.strftime("%x", time.gmtime(update_time))
def getFeedNumberOfUnreadItems(self, key):
- return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
+ return self.lookup('feeds', 'unread', key)
def getFeedTitle(self, key):
- (title, url) = self.db.execute("SELECT title, url FROM feeds WHERE id=?;", (key,)).fetchone()
+ title = self.lookup('feeds', 'title', key)
if title:
return title
- return url
+
+ return self.getFeedUrl(key)
def getFeedUrl(self, key):
return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
return keys
def getListOfCategories(self):
- rows = self.db.execute("SELECT id FROM categories ORDER BY rank;" )
- keys = []
- for row in rows:
- if row[0]:
- keys.append(row[0])
- return keys
+ return list(row[0] for row in self.db.execute(
+ "SELECT id FROM categories ORDER BY rank;"))
def getCategoryTitle(self, id):
- row = self.db.execute("SELECT title FROM categories WHERE id=?;", (id, )).fetchone()
- return row[0]
+ return self.lookup('categories', 'title', id)
+
+ def getCategoryUnread(self, id):
+ count = 0
+ for key in self.getListOfFeeds(category=id):
+ try:
+ count = count + self.getFeedNumberOfUnreadItems(key)
+ except:
+ pass
+ return count
def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
if order == "Most unread":
feed = self.getFeed(key)
self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
self.db.commit()
+ self.cache_invalidate('feeds')
def addFeed(self, title, url, id=None, category=1):
if not id:
human_readable_name=title,
freshness=6*60*60)
+ self.cache_invalidate('feeds')
return True
else:
return False
id=1
self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
self.db.commit()
+ self.cache_invalidate('categories')
def removeFeed(self, key):
if wc().available ():
try:
del wc()[key]
- except KeyError:
+ except KeyError, woodchuck.Error:
logger.debug("Removing unregistered feed %s failed" % (key,))
rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
if isdir(self.configdir+key+".d/"):
rmtree(self.configdir+key+".d/")
+ self.cache_invalidate('feeds')
def removeCategory(self, key):
if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
self.db.commit()
+ self.cache_invalidate('categories')
#def saveConfig(self):
# self.listOfFeeds["feedingit-order"] = self.sortedKeys