1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # Copyright (c) 2011 Neal H. Walfield
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Lesser General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU Lesser General Public License for more details.
16 # You should have received a copy of the GNU Lesser General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 # ============================================================================
22 # Author : Yves Marcoz
24 # Description : Simple RSS Reader
25 # ============================================================================
27 from __future__ import with_statement
30 from os.path import isfile, isdir
31 from shutil import rmtree
32 from os import mkdir, remove, utime
38 from BeautifulSoup import BeautifulSoup
39 from urlparse import urljoin
40 from calendar import timegm
43 from wc import wc, wc_init, woodchuck
46 from updatedbus import update_server_object
48 from jobmanager import JobManager
50 from httpprogresshandler import HTTPProgressHandler
54 logger = logging.getLogger(__name__)
57 return md5.new(string).hexdigest()
59 def download_callback(connection):
60 if JobManager().do_quit:
61 raise KeyboardInterrupt
63 def downloader(progress_handler=None, proxy=None):
66 if progress_handler is not None:
67 openers.append(progress_handler)
69 openers.append(HTTPProgressHandler(download_callback))
74 return urllib2.build_opener(*openers)
76 def transfer_stats(sent, received, **kwargs):
78 This function takes two arguments: sent is the number of bytes
79 sent so far, received is the number of bytes received. The
80 function returns a continuation that you can call later.
82 The continuation takes the same two arguments. It returns a tuple
83 of the number of bytes sent, the number of bytes received and the
84 time since the original function was invoked.
86 start_time = time.time()
88 start_received = received
90 def e(sent, received, **kwargs):
91 return (sent - start_sent,
92 received - start_received,
93 time.time() - start_time)
97 # If not None, a subprocess.Popen object corresponding to a
98 # update_feeds.py process.
99 update_feed_process = None
101 update_feeds_iface = None
105 class BaseObject(object):
106 # Columns to cache. Classes that inherit from this and use the
107 # cache mechanism should set this to a list of tuples, each of
108 # which contains two entries: the table and the column. Note that
109 # both are case sensitive.
112 def cache_invalidate(self, table=None):
114 Invalidate the cache.
116 If table is not None, invalidate only the specified table.
117 Otherwise, drop the whole cache.
119 if not hasattr(self, 'cache'):
125 if table in self.cache:
126 del self.cache[table]
128 def lookup(self, table, column, id=None):
130 Look up a column or value. Uses a cache for columns in
131 cached_columns. Note: the column is returned unsorted.
133 if not hasattr(self, 'cache'):
136 # Cache data for at most 60 seconds.
139 cache = self.cache[table]
141 if time.time() - cache[None] > 60:
142 # logger.debug("%s: Cache too old: clearing" % (table,))
143 del self.cache[table]
149 or (table, column) not in self.cached_columns):
150 # The cache is empty or the caller wants a column that we
152 if (table, column) in self.cached_columns:
153 # logger.debug("%s: Rebuilding cache" % (table,))
157 self.cache[table] = cache = {}
159 for t, c in self.cached_columns:
171 where = "where id = '%s'" % id
175 results = self.db.execute(
176 "SELECT %s FROM %s %s" % (','.join(columns), table, where))
182 for index, value in enumerate(values):
183 cache[columns[index]][i] = value
192 results.append(values[0])
196 cache = self.cache[table]
200 value = cache[column][id]
201 # logger.debug("%s.%s:%s -> %s" % (table, column, id, value))
204 return cache[column].values()
206 # logger.debug("%s.%s:%s -> Not found" % (table, column, id))
209 class Feed(BaseObject):
211 cached_columns = (('feed', 'read'),
214 serial_execution_lock = threading.Lock()
219 except AttributeError:
220 db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
223 db = property(_getdb)
225 def __init__(self, configdir, key):
227 self.configdir = configdir
228 self.dir = "%s/%s.d" %(self.configdir, self.key)
229 self.tls = threading.local ()
231 if not isdir(self.dir):
233 if not isfile("%s/%s.db" %(self.dir, self.key)):
234 self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
235 self.db.execute("CREATE TABLE images (id text, imagePath text);")
238 def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
239 filename = configdir+key+".d/"+getId(url)
240 if not isfile(filename):
243 opener = downloader(proxy=proxy)
245 abs_url = urljoin(baseurl,url)
246 f = opener.open(abs_url)
248 with open(filename, "w") as outf:
253 except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
254 logger.info("Could not download image %s: %s"
255 % (abs_url, str (exception)))
258 exception = sys.exc_info()[0]
260 logger.info("Downloading image %s: %s" %
261 (abs_url, traceback.format_exc()))
269 #open(filename,"a").close() # "Touch" the file
270 file = open(filename,"a")
271 utime(filename, None)
275 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
276 if (os.path.basename(sys.argv[0]) == 'update_feeds.py'):
279 self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
281 JobManager().execute(doit(), self.key, priority=priority)
283 def send_update_request():
284 global update_feeds_iface
285 if update_feeds_iface is None:
286 bus=dbus.SessionBus()
287 remote_object = bus.get_object(
288 "org.marcoz.feedingit", # Connection name
289 "/org/marcoz/feedingit/update" # Object's path
291 update_feeds_iface = dbus.Interface(
292 remote_object, 'org.marcoz.feedingit')
295 update_feeds_iface.Update(self.key)
297 logger.error("Invoking org.marcoz.feedingit.Update: %s"
299 update_feeds_iface = None
303 if send_update_request():
304 # Success! It seems we were able to start the update
305 # daemon via dbus (or, it was already running).
308 global update_feed_process
309 if (update_feed_process is None
310 or update_feed_process.poll() is not None):
311 # The update_feeds process is not running. Start it.
312 update_feeds = os.path.join(os.path.dirname(__file__),
314 argv = ['/usr/bin/env', 'python', update_feeds, '--daemon' ]
315 logger.debug("Starting update_feeds: running %s"
317 update_feed_process = subprocess.Popen(argv)
318 # Make sure the dbus calls go to the right process:
320 update_feeds_iface = None
323 if send_update_request():
327 def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
328 logger.debug("Updating %s" % url)
331 have_serial_execution_lock = False
333 update_start = time.time ()
335 progress_handler = HTTPProgressHandler(download_callback)
337 openers = [progress_handler]
339 openers.append (proxy)
340 kwargs = {'handlers':openers}
342 feed_transfer_stats = transfer_stats(0, 0)
344 tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
345 download_duration = time.time () - update_start
347 opener = downloader(progress_handler, proxy)
349 if JobManager().do_quit:
350 raise KeyboardInterrupt
352 process_start = time.time()
354 # Expiry time is in hours
355 expiry = float(expiryTime) * 3600.
364 wc().stream_register (self.key, "", 6 * 60 * 60)
365 except woodchuck.ObjectExistsError:
368 wc()[self.key].updated (
369 indicator=(woodchuck.Indicator.ApplicationVisual
370 |woodchuck.Indicator.StreamWide),
371 transferred_down=progress_handler.stats['received'],
372 transferred_up=progress_handler.stats['sent'],
373 transfer_time=update_start,
374 transfer_duration=download_duration,
375 new_objects=new_objects,
376 updated_objects=updated_objects,
377 objects_inline=new_objects + updated_objects)
380 "Failed to register update of %s with woodchuck!"
383 http_status = tmp.get ('status', 200)
385 # Check if the parse was succesful. If the http status code
386 # is 304, then the download was successful, but there is
387 # nothing new. Indeed, no content is returned. This make a
388 # 304 look like an error because there are no entries and the
389 # parse fails. But really, everything went great! Check for
391 if http_status == 304:
392 logger.debug("%s: No changes to feed." % (self.key,))
393 mainthread.execute(wc_success, async=True)
395 elif len(tmp["entries"])==0 and not tmp.get('version', None):
396 # An error occured fetching or parsing the feed. (Version
397 # will be either None if e.g. the connection timed our or
398 # '' if the data is not a proper feed)
400 "Error fetching %s: version is: %s: error: %s"
401 % (url, str (tmp.get('version', 'unset')),
402 str (tmp.get ('bozo_exception', 'Unknown error'))))
404 def register_stream_update_failed(http_status):
406 logger.debug("%s: stream update failed!" % self.key)
409 # It's not easy to get the feed's title from here.
410 # At the latest, the next time the application is
411 # started, we'll fix up the human readable name.
412 wc().stream_register (self.key, "", 6 * 60 * 60)
413 except woodchuck.ObjectExistsError:
415 ec = woodchuck.TransferStatus.TransientOther
416 if 300 <= http_status and http_status < 400:
417 ec = woodchuck.TransferStatus.TransientNetwork
418 if 400 <= http_status and http_status < 500:
419 ec = woodchuck.TransferStatus.FailureGone
420 if 500 <= http_status and http_status < 600:
421 ec = woodchuck.TransferStatus.TransientNetwork
422 wc()[self.key].update_failed(ec)
426 register_stream_update_failed(
427 http_status=http_status),
430 currentTime = time.time()
431 # The etag and modified value should only be updated if the content was not null
437 modified = tmp["modified"]
441 abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
442 f = opener.open(abs_url)
445 outf = open(self.dir+"/favicon.ico", "w")
449 except (urllib2.HTTPError, urllib2.URLError), exception:
450 logger.debug("Could not download favicon %s: %s"
451 % (abs_url, str (exception)))
453 self.serial_execution_lock.acquire ()
454 have_serial_execution_lock = True
456 #reversedEntries = self.getEntries()
457 #reversedEntries.reverse()
459 tmp["entries"].reverse()
460 for entry in tmp["entries"]:
461 # Yield so as to make the main thread a bit more
465 entry_transfer_stats = transfer_stats(
466 *feed_transfer_stats(**progress_handler.stats)[0:2])
468 if JobManager().do_quit:
469 raise KeyboardInterrupt
473 date = self.extractDate(entry)
477 entry["title"] = "No Title"
485 entry["author"] = None
486 if(not(entry.has_key("id"))):
488 content = self.extractContent(entry)
489 object_size = len (content)
490 tmpEntry = {"title":entry["title"], "content":content,
491 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
492 id = self.generateUniqueId(tmpEntry)
494 current_version = self.db.execute(
495 'select date, ROWID from feed where id=?',
497 if (current_version is not None
498 and current_version[0] == date):
499 logger.debug("ALREADY DOWNLOADED %s (%s)"
500 % (entry["title"], entry["link"]))
501 ## This article is already present in the feed listing. Update the "updated" time, so it doesn't expire
502 self.db.execute("UPDATE feed SET updated=? WHERE id=?;",(currentTime,id))
504 logger.debug("Updating already downloaded files for %s" %(id))
505 filename = configdir+self.key+".d/"+id+".html"
506 file = open(filename,"a")
507 utime(filename, None)
509 images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
511 file = open(image[0],"a")
512 utime(image[0], None)
515 logger.debug("Error in refreshing images for %s" % (id))
519 if current_version is not None:
520 # The version was updated. Mark it as unread.
521 logger.debug("UPDATED: %s (%s)"
522 % (entry["title"], entry["link"]))
525 logger.debug("NEW: %s (%s)"
526 % (entry["title"], entry["link"]))
529 #articleTime = time.mktime(self.entries[id]["dateTuple"])
530 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
532 baseurl = tmpEntry["link"]
533 if imageCache and len(images) > 0:
534 self.serial_execution_lock.release ()
535 have_serial_execution_lock = False
537 if not img.has_key('src'):
540 filename = self.addImage(
541 configdir, self.key, baseurl, img['src'],
544 img['src']="file://%s" %filename
545 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
547 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
551 object_size += os.path.getsize (filename)
552 except os.error, exception:
553 logger.error ("Error getting size of %s: %s"
554 % (filename, exception))
555 self.serial_execution_lock.acquire ()
556 have_serial_execution_lock = True
558 tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
559 file = open(tmpEntry["contentLink"], "w")
560 file.write(soup.prettify())
564 'title': tmpEntry["title"],
565 'contentLink': tmpEntry["contentLink"],
566 'date': tmpEntry["date"],
567 'updated': currentTime,
568 'link': tmpEntry["link"],
571 if current_version is not None:
572 # This is an update. Ensure that the existing
574 values['ROWID'] = current_version[1]
576 cols, values = zip(*values.items())
578 "INSERT OR REPLACE INTO feed (%s) VALUES (%s);"
579 % (','.join(cols), ','.join(('?',) * len(values))),
583 # Register the object with Woodchuck and mark it as
585 def register_object_transferred(
586 id, title, publication_time,
587 sent, received, object_size):
589 logger.debug("Registering transfer of object %s"
592 obj = wc()[self.key].object_register(
593 object_identifier=id,
594 human_readable_name=title)
595 except woodchuck.ObjectExistsError:
596 obj = wc()[self.key][id]
598 obj.publication_time = publication_time
601 woodchuck.Indicator.ApplicationVisual
602 |woodchuck.Indicator.StreamWide),
603 transferred_down=received,
605 object_size=object_size)
608 # If the entry does not contain a publication
609 # time, the attribute won't exist.
610 pubtime = entry.get('date_parsed', None)
612 publication_time = time.mktime (pubtime)
614 publication_time = None
617 = entry_transfer_stats(**progress_handler.stats)
618 # sent and received are for objects (in
619 # particular, images) associated with this
620 # item. We also want to attribute the data
621 # transferred for the item's content. This is
622 # a good first approximation.
623 received += len(content)
626 register_object_transferred(
628 title=tmpEntry["title"],
629 publication_time=publication_time,
630 sent=sent, received=received,
631 object_size=object_size),
636 = feed_transfer_stats(**progress_handler.stats)
638 "%s: Update successful: transferred: %d/%d; objects: %d)"
639 % (url, sent, received, len (tmp.entries)))
640 mainthread.execute (wc_success, async=True)
643 rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
645 self.removeEntry(row[0])
647 from glob import glob
649 for file in glob(configdir+self.key+".d/*"):
653 # put the two dates into matching format
655 lastmodDate = stats[8]
657 expDate = time.time()-expiry*3
658 # check if image-last-modified-date is outdated
660 if expDate > lastmodDate:
664 #print 'Removing', file
666 # XXX: Tell woodchuck.
667 remove(file) # commented out for testing
669 except OSError, exception:
671 logger.error('Could not remove %s: %s'
672 % (file, str (exception)))
673 logger.debug("updated %s: %fs in download, %fs in processing"
674 % (self.key, download_duration,
675 time.time () - process_start))
677 logger.error("Updating %s: %s" % (self.key, traceback.format_exc()))
681 if have_serial_execution_lock:
682 self.serial_execution_lock.release ()
686 rows = self.db.execute("SELECT MAX(date) FROM feed;")
690 logger.error("Fetching update time: %s: %s"
691 % (str(e), traceback.format_exc()))
698 title = tmp.feed.title
699 except (AttributeError, UnboundLocalError), exception:
701 if postFeedUpdateFunc is not None:
702 postFeedUpdateFunc (self.key, updateTime, etag, modified,
703 title, *postFeedUpdateFuncArgs)
705 self.cache_invalidate()
707 def setEntryRead(self, id):
708 self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
713 wc()[self.key][id].used()
717 mainthread.execute(doit, async=True)
718 self.cache_invalidate('feed')
720 def setEntryUnread(self, id):
721 self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
723 self.cache_invalidate('feed')
725 def markAllAsRead(self):
726 self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
728 self.cache_invalidate('feed')
730 def isEntryRead(self, id):
731 return self.lookup('feed', 'read', id) == 1
733 def getTitle(self, id):
734 return self.lookup('feed', 'title', id)
736 def getContentLink(self, id):
737 return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
739 def getExternalLink(self, id):
740 return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
742 def getDate(self, id):
743 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
744 return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
746 def getDateTuple(self, id):
747 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
748 return time.localtime(dateStamp)
750 def getDateStamp(self, id):
751 return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
753 def generateUniqueId(self, entry):
755 Generate a stable identifier for the article. For the same
756 entry, this should result in the same identifier. If
757 possible, the identifier should remain the same even if the
760 # Prefer the entry's id, which is supposed to be globally
762 key = entry.get('id', None)
764 # Next, try the link to the content.
765 key = entry.get('link', None)
767 # Ok, the title and the date concatenated are likely to be
769 key = entry.get('title', None) + entry.get('date', None)
771 # Hmm, the article's content will at least guarantee no
772 # false negatives (i.e., missing articles)
773 key = entry.get('content', None)
775 # If all else fails, just use a random number.
776 key = str (random.random ())
779 def getIds(self, onlyUnread=False):
781 rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
783 rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
790 def getNextId(self, id, forward=True):
796 index = ids.index(id)
797 return ids[(index + delta) % len(ids)]
799 def getPreviousId(self, id):
800 return self.getNextId(id, forward=False)
802 def getNumberOfUnreadItems(self):
803 return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
805 def getNumberOfEntries(self):
806 return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
808 def getArticle(self, entry):
809 #self.setEntryRead(id)
810 #entry = self.entries[id]
811 title = entry['title']
812 #content = entry.get('content', entry.get('summary_detail', {}))
813 content = entry["content"]
816 author = entry['author']
817 date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
819 #text = '''<div style="color: black; background-color: white;">'''
820 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
821 text += "<html><head><title>" + title + "</title>"
822 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
823 #text += '<style> body {-webkit-user-select: none;} </style>'
824 text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
826 text += "<BR /><small><i>Author: " + author + "</i></small>"
827 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
828 text += "<BR /><BR />"
830 text += "</body></html>"
833 def getContent(self, id):
835 Return the content of the article with the specified ID. If
836 the content is not available, returns None.
838 contentLink = self.getContentLink(id)
840 with open(contentLink, 'r') as file:
841 content = file.read()
843 logger.exception("Failed get content for %s: reading %s failed",
848 def extractDate(self, entry):
849 if entry.has_key("updated_parsed"):
850 return timegm(entry["updated_parsed"])
851 elif entry.has_key("published_parsed"):
852 return timegm(entry["published_parsed"])
856 def extractContent(self, entry):
858 if entry.has_key('summary'):
859 content = entry.get('summary', '')
860 if entry.has_key('content'):
861 if len(entry.content[0].value) > len(content):
862 content = entry.content[0].value
864 content = entry.get('description', '')
867 def removeEntry(self, id):
868 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
872 except OSError, exception:
873 logger.error("Deleting %s: %s" % (contentLink, str (exception)))
874 self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
875 self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
880 wc()[self.key][id].files_deleted (
881 woodchuck.DeletionResponse.Deleted)
882 del wc()[self.key][id]
886 mainthread.execute (doit, async=True)
888 class ArchivedArticles(Feed):
889 def addArchivedArticle(self, title, link, date, configdir):
890 id = self.generateUniqueId({"date":date, "title":title})
891 values = (id, title, link, date, 0, link, 0)
892 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
895 # Feed.UpdateFeed calls this function.
896 def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
898 rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
901 currentTime = time.time()
904 f = urllib2.urlopen(link)
905 #entry["content"] = f.read()
908 soup = BeautifulSoup(html)
912 filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
914 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
916 contentLink = configdir+self.key+".d/"+id+".html"
917 file = open(contentLink, "w")
918 file.write(soup.prettify())
921 self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
924 logger.error("Error updating Archived Article: %s %s"
925 % (link,traceback.format_exc(),))
927 if postFeedUpdateFunc is not None:
928 postFeedUpdateFunc (self.key, currentTime, None, None, None,
929 *postFeedUpdateFuncArgs)
931 def purgeReadArticles(self):
932 rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
935 self.removeArticle(row[0])
937 def removeArticle(self, id):
938 rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
941 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
948 class Listing(BaseObject):
950 cached_columns = (('feeds', 'updateTime'),
953 ('categories', 'title'))
958 except AttributeError:
959 db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
962 db = property(_getdb)
964 # Lists all the feeds in a dictionary, and expose the data
965 def __init__(self, config, configdir):
967 self.configdir = configdir
969 self.tls = threading.local ()
972 table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
974 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
975 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
976 self.addCategory("Default Category")
977 if isfile(self.configdir+"feeds.pickle"):
978 self.importOldFormatFeeds()
980 self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
982 from string import find, upper
983 if find(upper(table[0]), "WIDGET")<0:
984 self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
985 self.db.execute("UPDATE feeds SET widget=1;")
987 if find(upper(table[0]), "CATEGORY")<0:
988 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
989 self.addCategory("Default Category")
990 self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
991 self.db.execute("UPDATE feeds SET category=1;")
996 # Check that Woodchuck's state is up to date with respect our
999 updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
1000 wc_init(config, self, True if updater else False)
1001 if wc().available() and updater:
1002 # The list of known streams.
1003 streams = wc().streams_list ()
1004 stream_ids = [s.identifier for s in streams]
1006 # Register any unknown streams. Remove known streams from
1008 for key in self.getListOfFeeds():
1009 title = self.getFeedTitle(key)
1010 # XXX: We should also check whether the list of
1011 # articles/objects in each feed/stream is up to date.
1012 if key not in stream_ids:
1014 "Registering previously unknown channel: %s (%s)"
1016 wc().stream_register(
1018 self.config.getUpdateInterval() * 60 * 60)
1020 # Make sure the human readable name is up to date.
1021 if wc()[key].human_readable_name != title:
1022 wc()[key].human_readable_name = title
1023 stream_ids.remove (key)
1024 wc()[key].freshness \
1025 = self.config.getUpdateInterval() * 60 * 60
1028 # Unregister any streams that are no longer subscribed to.
1029 for id in stream_ids:
1030 logger.debug("Unregistering %s" % (id,))
1031 w.stream_unregister (id)
1033 logger.exception("Registering streams with Woodchuck")
1035 def importOldFormatFeeds(self):
1036 """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
1038 listing = rss.Listing(self.configdir)
1040 for id in listing.getListOfFeeds():
1043 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
1044 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
1047 feed = listing.getFeed(id)
1048 new_feed = self.getFeed(id)
1050 items = feed.getIds()[:]
1053 if feed.isEntryRead(item):
1057 date = timegm(feed.getDateTuple(item))
1058 title = feed.getTitle(item)
1059 newId = new_feed.generateUniqueId({"date":date, "title":title})
1060 values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
1061 new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
1062 new_feed.db.commit()
1064 images = feed.getImages(item)
1065 for image in images:
1066 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
1067 new_feed.db.commit()
1070 self.updateUnread(id)
1072 logger.error("importOldFormatFeeds: %s"
1073 % (traceback.format_exc(),))
1074 remove(self.configdir+"feeds.pickle")
1077 def addArchivedArticle(self, key, index):
1078 feed = self.getFeed(key)
1079 title = feed.getTitle(index)
1080 link = feed.getExternalLink(index)
1081 date = feed.getDate(index)
1082 count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
1084 self.addFeed("Archived Articles", "", id="ArchivedArticles")
1086 archFeed = self.getFeed("ArchivedArticles")
1087 archFeed.addArchivedArticle(title, link, date, self.configdir)
1088 self.updateUnread("ArchivedArticles")
1090 def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
1092 if expiryTime is None:
1093 expiryTime = self.config.getExpiry()
1095 # Default to 24 hours
1098 (use_proxy, proxy) = self.config.getProxy()
1101 if imageCache is None:
1102 imageCache = self.config.getImageCache()
1104 feed = self.getFeed(key)
1105 (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
1107 modified = time.struct_time(eval(modified))
1111 self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
1112 priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
1114 def _queuePostFeedUpdate(self, *args, **kwargs):
1115 mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
1117 def _postFeedUpdate(self, key, updateTime, etag, modified, title):
1121 modified=str(tuple(modified))
1123 self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
1125 self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
1127 if title is not None:
1128 self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
1131 self.cache_invalidate('feeds')
1132 self.updateUnread(key)
1134 update_server_object().ArticleCountUpdated()
1136 stats = JobManager().stats()
1137 global jobs_at_start
1138 completed = stats['jobs-completed'] - jobs_at_start
1139 in_progress = stats['jobs-in-progress']
1140 queued = stats['jobs-queued']
1143 percent = (100 * ((completed + in_progress / 2.))
1144 / (completed + in_progress + queued))
1145 except ZeroDivisionError:
1148 update_server_object().UpdateProgress(
1149 percent, completed, in_progress, queued, 0, 0, 0, key)
1151 if in_progress == 0 and queued == 0:
1152 jobs_at_start = stats['jobs-completed']
1154 def getFeed(self, key):
1155 if key == "ArchivedArticles":
1156 return ArchivedArticles(self.configdir, key)
1157 return Feed(self.configdir, key)
1159 def editFeed(self, key, title, url, category=None):
1161 self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
1163 self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
1165 self.cache_invalidate('feeds')
1167 if wc().available():
1169 wc()[key].human_readable_name = title
1171 logger.debug("Feed %s (%s) unknown." % (key, title))
1173 def getFeedUpdateTime(self, key):
1174 update_time = self.lookup('feeds', 'updateTime', key)
1179 delta = time.time() - update_time
1181 delta_hours = delta / (60. * 60.)
1182 if delta_hours < .1:
1183 return "A few minutes ago"
1184 if delta_hours < .75:
1185 return "Less than an hour ago"
1186 if delta_hours < 1.5:
1187 return "About an hour ago"
1188 if delta_hours < 18:
1189 return "About %d hours ago" % (int(delta_hours + 0.5),)
1191 delta_days = delta_hours / 24.
1192 if delta_days < 1.5:
1193 return "About a day ago"
1195 return "%d days ago" % (int(delta_days + 0.5),)
1197 delta_weeks = delta_days / 7.
1198 if delta_weeks <= 8:
1199 return "%d weeks ago" % int(delta_weeks + 0.5)
1201 delta_months = delta_days / 30.
1202 if delta_months <= 30:
1203 return "%d months ago" % int(delta_months + 0.5)
1205 return time.strftime("%x", time.gmtime(update_time))
1207 def getFeedNumberOfUnreadItems(self, key):
1208 return self.lookup('feeds', 'unread', key)
1210 def getFeedTitle(self, key):
1211 title = self.lookup('feeds', 'title', key)
1215 return self.getFeedUrl(key)
1217 def getFeedUrl(self, key):
1218 return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1220 def getFeedCategory(self, key):
1221 return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1223 def getListOfFeeds(self, category=None):
1225 rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
1227 rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
1234 def getListOfCategories(self):
1235 return list(row[0] for row in self.db.execute(
1236 "SELECT id FROM categories ORDER BY rank;"))
1238 def getCategoryTitle(self, id):
1239 return self.lookup('categories', 'title', id)
1241 def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
1242 if order == "Most unread":
1243 tmp = "ORDER BY unread DESC"
1244 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
1245 elif order == "Least unread":
1246 tmp = "ORDER BY unread"
1247 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
1248 elif order == "Most recent":
1249 tmp = "ORDER BY updateTime DESC"
1250 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
1251 elif order == "Least recent":
1252 tmp = "ORDER BY updateTime"
1253 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
1254 else: # order == "Manual" or invalid value...
1255 tmp = "ORDER BY rank"
1256 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
1258 sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp
1260 sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
1261 rows = self.db.execute(sql)
1268 def getFavicon(self, key):
1269 filename = "%s%s.d/favicon.ico" % (self.configdir, key)
1270 if isfile(filename):
1275 def updateUnread(self, key):
1276 feed = self.getFeed(key)
1277 self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
1279 self.cache_invalidate('feeds')
1281 def addFeed(self, title, url, id=None, category=1):
1284 count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
1286 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1287 if max_rank == None:
1289 values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
1290 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
1292 # Ask for the feed object, it will create the necessary tables
1295 if wc().available():
1296 # Register the stream with Woodchuck. Update approximately
1298 wc().stream_register(stream_identifier=id,
1299 human_readable_name=title,
1306 def addCategory(self, title):
1307 rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
1310 id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
1313 self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
1316 def removeFeed(self, key):
1317 if wc().available ():
1320 except KeyError, woodchuck.Error:
1321 logger.debug("Removing unregistered feed %s failed" % (key,))
1323 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
1324 self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
1325 self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
1328 if isdir(self.configdir+key+".d/"):
1329 rmtree(self.configdir+key+".d/")
1331 def removeCategory(self, key):
1332 if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
1333 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
1334 self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
1335 self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
1336 self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
1339 #def saveConfig(self):
1340 # self.listOfFeeds["feedingit-order"] = self.sortedKeys
1341 # file = open(self.configdir+"feeds.pickle", "w")
1342 # pickle.dump(self.listOfFeeds, file)
1345 def moveUp(self, key):
1346 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1348 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
1349 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
1352 def moveCategoryUp(self, key):
1353 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1355 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
1356 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
1359 def moveDown(self, key):
1360 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1361 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1363 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
1364 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
1367 def moveCategoryDown(self, key):
1368 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1369 max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
1371 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
1372 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )