logger.debug("%s: No changes to feed." % (self.key,))
mainthread.execute(wc_success, async=True)
success = True
- elif len(tmp["entries"])==0 and not tmp.version:
+ elif len(tmp["entries"])==0 and not tmp.get('version', None):
# An error occured fetching or parsing the feed. (Version
# will be either None if e.g. the connection timed our or
# '' if the data is not a proper feed)
logger.error(
"Error fetching %s: version is: %s: error: %s"
- % (url, str (tmp.version),
+ % (url, str (tmp.get('version', 'unset')),
str (tmp.get ('bozo_exception', 'Unknown error'))))
logger.debug(tmp)
def register_stream_update_failed(http_status):
#reversedEntries = self.getEntries()
#reversedEntries.reverse()
- ids = self.getIds()
-
tmp["entries"].reverse()
for entry in tmp["entries"]:
# Yield so as to make the main thread a bit more
"date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
id = self.generateUniqueId(tmpEntry)
- current_version \
- = self.db.execute('select date from feed where id=?',
- (id,)).fetchone()
+ current_version = self.db.execute(
+ 'select date, ROWID from feed where id=?',
+ (id,)).fetchone()
if (current_version is not None
and current_version[0] == date):
logger.debug("ALREADY DOWNLOADED %s (%s)"
# The version was updated. Mark it as unread.
logger.debug("UPDATED: %s (%s)"
% (entry["title"], entry["link"]))
- self.setEntryUnread(id)
updated_objects += 1
else:
logger.debug("NEW: %s (%s)"
soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
images = soup('img')
baseurl = tmpEntry["link"]
- #if not id in ids:
if imageCache and len(images) > 0:
self.serial_execution_lock.release ()
have_serial_execution_lock = False
for img in images:
+ if not img.has_key('src'):
+ continue
+
filename = self.addImage(
configdir, self.key, baseurl, img['src'],
opener=opener)
file = open(tmpEntry["contentLink"], "w")
file.write(soup.prettify())
file.close()
- if id in ids:
- self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
- self.db.commit()
- else:
- values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
- self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
- self.db.commit()
-# else:
-# try:
-# self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
-# self.db.commit()
-# filename = configdir+self.key+".d/"+id+".html"
-# file = open(filename,"a")
-# utime(filename, None)
-# file.close()
-# images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
-# for image in images:
-# file = open(image[0],"a")
-# utime(image[0], None)
-# file.close()
-# except:
-# pass
-
+
+ values = {'id': id,
+ 'title': tmpEntry["title"],
+ 'contentLink': tmpEntry["contentLink"],
+ 'date': tmpEntry["date"],
+ 'updated': currentTime,
+ 'link': tmpEntry["link"],
+ 'read': 0}
+
+ if current_version is not None:
+ # This is an update. Ensure that the existing
+ # entry is replaced.
+ values['ROWID'] = current_version[1]
+
+ cols, values = zip(*values.items())
+ self.db.execute(
+ "INSERT OR REPLACE INTO feed (%s) VALUES (%s);"
+ % (','.join(cols), ','.join(('?',) * len(values))),
+ values)
+ self.db.commit()
+
# Register the object with Woodchuck and mark it as
# downloaded.
def register_object_transferred(
# Check that Woodchuck's state is up to date with respect our
# state.
- updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
- wc_init (self, True if updater else False)
- if wc().available() and updater:
- # The list of known streams.
- streams = wc().streams_list ()
- stream_ids = [s.identifier for s in streams]
-
- # Register any unknown streams. Remove known streams from
- # STREAMS_IDS.
- for key in self.getListOfFeeds():
- title = self.getFeedTitle(key)
- # XXX: We should also check whether the list of
- # articles/objects in each feed/stream is up to date.
- if key not in stream_ids:
- logger.debug(
- "Registering previously unknown channel: %s (%s)"
- % (key, title,))
- # Use a default refresh interval of 6 hours.
- wc().stream_register (key, title, 6 * 60 * 60)
- else:
- # Make sure the human readable name is up to date.
- if wc()[key].human_readable_name != title:
- wc()[key].human_readable_name = title
- stream_ids.remove (key)
-
-
- # Unregister any streams that are no longer subscribed to.
- for id in stream_ids:
- logger.debug("Unregistering %s" % (id,))
- w.stream_unregister (id)
+ try:
+ updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
+ wc_init (self, True if updater else False)
+ if wc().available() and updater:
+ # The list of known streams.
+ streams = wc().streams_list ()
+ stream_ids = [s.identifier for s in streams]
+
+ # Register any unknown streams. Remove known streams from
+ # STREAMS_IDS.
+ for key in self.getListOfFeeds():
+ title = self.getFeedTitle(key)
+ # XXX: We should also check whether the list of
+ # articles/objects in each feed/stream is up to date.
+ if key not in stream_ids:
+ logger.debug(
+ "Registering previously unknown channel: %s (%s)"
+ % (key, title,))
+ # Use a default refresh interval of 6 hours.
+ wc().stream_register (key, title, 6 * 60 * 60)
+ else:
+ # Make sure the human readable name is up to date.
+ if wc()[key].human_readable_name != title:
+ wc()[key].human_readable_name = title
+ stream_ids.remove (key)
+
+
+ # Unregister any streams that are no longer subscribed to.
+ for id in stream_ids:
+ logger.debug("Unregistering %s" % (id,))
+ w.stream_unregister (id)
+ except Exception:
+ logger.exception("Registering streams with Woodchuck")
def importOldFormatFeeds(self):
"""This function loads feeds that are saved in an outdated format, and converts them to sqlite"""