and current_version[0] == date):
logger.debug("ALREADY DOWNLOADED %s (%s)"
% (entry["title"], entry["link"]))
+ ## This article is already present in the feed listing. Update the "updated" time, so it doesn't expire
+ self.db.execute("UPDATE feed SET updated=? WHERE id=?;",(currentTime,id))
+ try:
+ logger.debug("Updating already downloaded files for %s" %(id))
+ filename = configdir+self.key+".d/"+id+".html"
+ file = open(filename,"a")
+ utime(filename, None)
+ file.close()
+ images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
+ for image in images:
+ file = open(image[0],"a")
+ utime(image[0], None)
+ file.close()
+ except:
+ logger.debug("Error in refreshing images for %s" % (id))
+ self.db.commit()
continue
if current_version is not None:
#text = '''<div style="color: black; background-color: white;">'''
text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
text += "<html><head><title>" + title + "</title>"
- text += '<meta http-equiv="Content-Type" content="text/html; charset="UTF-8"/>\n'
+ text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
#text += '<style> body {-webkit-user-select: none;} </style>'
text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
if author != None:
return text
def getContent(self, id):
- contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
+ """
+ Return the content of the article with the specified ID. If
+ the content is not available, returns None.
+ """
+ contentLink = self.getContentLink(id)
try:
- file = open(self.entries[id]["contentLink"])
- content = file.read()
- file.close()
- except:
- content = "Content unavailable"
+ with open(contentLink, 'r') as file:
+ content = file.read()
+ except Exception:
+ logger.exception("Failed get content for %s: reading %s failed",
+ id, contentLink)
+ content = None
return content
def extractDate(self, entry):
self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
self.db.commit()
- def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
+ # Feed.UpdateFeed calls this function.
+ def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
currentTime = 0
rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
for row in rows:
- currentTime = time.time()
- id = row[0]
- link = row[1]
- f = urllib2.urlopen(link)
- #entry["content"] = f.read()
- html = f.read()
- f.close()
- soup = BeautifulSoup(html)
- images = soup('img')
- baseurl = link
- for img in images:
- filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
- img['src']=filename
- self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
+ try:
+ currentTime = time.time()
+ id = row[0]
+ link = row[1]
+ f = urllib2.urlopen(link)
+ #entry["content"] = f.read()
+ html = f.read()
+ f.close()
+ soup = BeautifulSoup(html)
+ images = soup('img')
+ baseurl = link
+ for img in images:
+ filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
+ img['src']=filename
+ self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
+ self.db.commit()
+ contentLink = configdir+self.key+".d/"+id+".html"
+ file = open(contentLink, "w")
+ file.write(soup.prettify())
+ file.close()
+
+ self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
self.db.commit()
- contentLink = configdir+self.key+".d/"+id+".html"
- file = open(contentLink, "w")
- file.write(soup.prettify())
- file.close()
-
- self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
- self.db.commit()
- return (currentTime, None, None)
+ except:
+ logger.error("Error updating Archived Article: %s %s"
+ % (link,traceback.format_exc(),))
+
+ if postFeedUpdateFunc is not None:
+ postFeedUpdateFunc (self.key, currentTime, None, None, None,
+ *postFeedUpdateFuncArgs)
def purgeReadArticles(self):
rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
# state.
try:
updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
- wc_init (self, True if updater else False)
+ wc_init(config, self, True if updater else False)
if wc().available() and updater:
# The list of known streams.
streams = wc().streams_list ()
logger.debug(
"Registering previously unknown channel: %s (%s)"
% (key, title,))
- # Use a default refresh interval of 6 hours.
- wc().stream_register (key, title, 6 * 60 * 60)
+ wc().stream_register(
+ key, title,
+ self.config.getUpdateInterval() * 60 * 60)
else:
# Make sure the human readable name is up to date.
if wc()[key].human_readable_name != title:
wc()[key].human_readable_name = title
stream_ids.remove (key)
+ wc()[key].freshness \
+ = self.config.getUpdateInterval() * 60 * 60
# Unregister any streams that are no longer subscribed to.
if wc().available ():
try:
del wc()[key]
- except KeyError:
+ except KeyError, woodchuck.Error:
logger.debug("Removing unregistered feed %s failed" % (key,))
rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]