1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # Copyright (c) 2011 Neal H. Walfield
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Lesser General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU Lesser General Public License for more details.
16 # You should have received a copy of the GNU Lesser General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 # ============================================================================
22 # Author : Yves Marcoz
24 # Description : Simple RSS Reader
25 # ============================================================================
27 from __future__ import with_statement
30 from os.path import isfile, isdir
31 from shutil import rmtree
32 from os import mkdir, remove, utime
38 from BeautifulSoup import BeautifulSoup
39 from urlparse import urljoin
40 from calendar import timegm
43 from wc import wc, wc_init, woodchuck
46 from updatedbus import update_server_object
48 from jobmanager import JobManager
50 from httpprogresshandler import HTTPProgressHandler
54 logger = logging.getLogger(__name__)
57 return md5.new(string).hexdigest()
59 def download_callback(connection):
60 if JobManager().do_quit:
61 raise KeyboardInterrupt
63 def downloader(progress_handler=None, proxy=None):
66 if progress_handler is not None:
67 openers.append(progress_handler)
69 openers.append(HTTPProgressHandler(download_callback))
74 return urllib2.build_opener(*openers)
76 def transfer_stats(sent, received, **kwargs):
78 This function takes two arguments: sent is the number of bytes
79 sent so far, received is the number of bytes received. The
80 function returns a continuation that you can call later.
82 The continuation takes the same two arguments. It returns a tuple
83 of the number of bytes sent, the number of bytes received and the
84 time since the original function was invoked.
86 start_time = time.time()
88 start_received = received
90 def e(sent, received, **kwargs):
91 return (sent - start_sent,
92 received - start_received,
93 time.time() - start_time)
97 # If not None, a subprocess.Popen object corresponding to a
98 # update_feeds.py process.
99 update_feed_process = None
101 update_feeds_iface = None
105 class BaseObject(object):
106 # Columns to cache. Classes that inherit from this and use the
107 # cache mechanism should set this to a list of tuples, each of
108 # which contains two entries: the table and the column. Note that
109 # both are case sensitive.
112 def cache_invalidate(self, table=None):
114 Invalidate the cache.
116 If table is not None, invalidate only the specified table.
117 Otherwise, drop the whole cache.
119 if not hasattr(self, 'cache'):
125 if table in self.cache:
126 del self.cache[table]
128 def lookup(self, table, column, id=None):
130 Look up a column or value. Uses a cache for columns in
131 cached_columns. Note: the column is returned unsorted.
133 if not hasattr(self, 'cache'):
136 # Cache data for at most 60 seconds.
139 cache = self.cache[table]
141 if time.time() - cache[None] > 60:
142 # logger.debug("%s: Cache too old: clearing" % (table,))
143 del self.cache[table]
149 or (table, column) not in self.cached_columns):
150 # The cache is empty or the caller wants a column that we
152 if (table, column) in self.cached_columns:
153 # logger.debug("%s: Rebuilding cache" % (table,))
157 self.cache[table] = cache = {}
159 for t, c in self.cached_columns:
171 where = "where id = '%s'" % id
175 results = self.db.execute(
176 "SELECT %s FROM %s %s" % (','.join(columns), table, where))
182 for index, value in enumerate(values):
183 cache[columns[index]][i] = value
192 results.append(values[0])
196 cache = self.cache[table]
200 value = cache[column][id]
201 # logger.debug("%s.%s:%s -> %s" % (table, column, id, value))
204 return cache[column].values()
206 # logger.debug("%s.%s:%s -> Not found" % (table, column, id))
209 class Feed(BaseObject):
211 cached_columns = (('feed', 'read'),
214 serial_execution_lock = threading.Lock()
219 except AttributeError:
220 db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
223 db = property(_getdb)
225 def __init__(self, configdir, key):
227 self.configdir = configdir
228 self.dir = "%s/%s.d" %(self.configdir, self.key)
229 self.tls = threading.local ()
231 if not isdir(self.dir):
233 if not isfile("%s/%s.db" %(self.dir, self.key)):
234 self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
235 self.db.execute("CREATE TABLE images (id text, imagePath text);")
238 def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
239 filename = configdir+key+".d/"+getId(url)
240 if not isfile(filename):
243 opener = downloader(proxy=proxy)
245 abs_url = urljoin(baseurl,url)
246 f = opener.open(abs_url)
248 with open(filename, "w") as outf:
253 except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
254 logger.info("Could not download image %s: %s"
255 % (abs_url, str (exception)))
258 exception = sys.exc_info()[0]
260 logger.info("Downloading image %s: %s" %
261 (abs_url, traceback.format_exc()))
269 #open(filename,"a").close() # "Touch" the file
270 file = open(filename,"a")
271 utime(filename, None)
275 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
276 if (os.path.basename(sys.argv[0]) == 'update_feeds.py'):
279 self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
281 JobManager().execute(doit(), self.key, priority=priority)
283 def send_update_request():
284 global update_feeds_iface
285 if update_feeds_iface is None:
286 bus=dbus.SessionBus()
287 remote_object = bus.get_object(
288 "org.marcoz.feedingit", # Connection name
289 "/org/marcoz/feedingit/update" # Object's path
291 update_feeds_iface = dbus.Interface(
292 remote_object, 'org.marcoz.feedingit')
295 update_feeds_iface.Update(self.key)
297 logger.error("Invoking org.marcoz.feedingit.Update: %s"
299 update_feeds_iface = None
303 if send_update_request():
304 # Success! It seems we were able to start the update
305 # daemon via dbus (or, it was already running).
308 global update_feed_process
309 if (update_feed_process is None
310 or update_feed_process.poll() is not None):
311 # The update_feeds process is not running. Start it.
312 update_feeds = os.path.join(os.path.dirname(__file__),
314 argv = ['/usr/bin/env', 'python', update_feeds, '--daemon' ]
315 logger.debug("Starting update_feeds: running %s"
317 update_feed_process = subprocess.Popen(argv)
318 # Make sure the dbus calls go to the right process:
320 update_feeds_iface = None
323 if send_update_request():
327 def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
328 logger.debug("Updating %s" % url)
331 have_serial_execution_lock = False
333 update_start = time.time ()
335 progress_handler = HTTPProgressHandler(download_callback)
337 openers = [progress_handler]
339 openers.append (proxy)
340 kwargs = {'handlers':openers}
342 feed_transfer_stats = transfer_stats(0, 0)
344 tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
345 download_duration = time.time () - update_start
347 opener = downloader(progress_handler, proxy)
349 if JobManager().do_quit:
350 raise KeyboardInterrupt
352 process_start = time.time()
354 # Expiry time is in hours
355 expiry = float(expiryTime) * 3600.
364 wc().stream_register (self.key, "", 6 * 60 * 60)
365 except woodchuck.ObjectExistsError:
368 wc()[self.key].updated (
369 indicator=(woodchuck.Indicator.ApplicationVisual
370 |woodchuck.Indicator.StreamWide),
371 transferred_down=progress_handler.stats['received'],
372 transferred_up=progress_handler.stats['sent'],
373 transfer_time=update_start,
374 transfer_duration=download_duration,
375 new_objects=new_objects,
376 updated_objects=updated_objects,
377 objects_inline=new_objects + updated_objects)
380 "Failed to register update of %s with woodchuck!"
383 http_status = tmp.get ('status', 200)
385 # Check if the parse was succesful. If the http status code
386 # is 304, then the download was successful, but there is
387 # nothing new. Indeed, no content is returned. This make a
388 # 304 look like an error because there are no entries and the
389 # parse fails. But really, everything went great! Check for
391 if http_status == 304:
392 logger.debug("%s: No changes to feed." % (self.key,))
393 mainthread.execute(wc_success, async=True)
395 elif len(tmp["entries"])==0 and not tmp.get('version', None):
396 # An error occured fetching or parsing the feed. (Version
397 # will be either None if e.g. the connection timed our or
398 # '' if the data is not a proper feed)
400 "Error fetching %s: version is: %s: error: %s"
401 % (url, str (tmp.get('version', 'unset')),
402 str (tmp.get ('bozo_exception', 'Unknown error'))))
404 def register_stream_update_failed(http_status):
406 logger.debug("%s: stream update failed!" % self.key)
409 # It's not easy to get the feed's title from here.
410 # At the latest, the next time the application is
411 # started, we'll fix up the human readable name.
412 wc().stream_register (self.key, "", 6 * 60 * 60)
413 except woodchuck.ObjectExistsError:
415 ec = woodchuck.TransferStatus.TransientOther
416 if 300 <= http_status and http_status < 400:
417 ec = woodchuck.TransferStatus.TransientNetwork
418 if 400 <= http_status and http_status < 500:
419 ec = woodchuck.TransferStatus.FailureGone
420 if 500 <= http_status and http_status < 600:
421 ec = woodchuck.TransferStatus.TransientNetwork
422 wc()[self.key].update_failed(ec)
426 register_stream_update_failed(
427 http_status=http_status),
430 currentTime = time.time()
431 # The etag and modified value should only be updated if the content was not null
437 modified = tmp["modified"]
441 abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
442 f = opener.open(abs_url)
445 outf = open(self.dir+"/favicon.ico", "w")
449 except (urllib2.HTTPError, urllib2.URLError), exception:
450 logger.debug("Could not download favicon %s: %s"
451 % (abs_url, str (exception)))
453 self.serial_execution_lock.acquire ()
454 have_serial_execution_lock = True
456 #reversedEntries = self.getEntries()
457 #reversedEntries.reverse()
459 tmp["entries"].reverse()
460 for entry in tmp["entries"]:
461 # Yield so as to make the main thread a bit more
465 entry_transfer_stats = transfer_stats(
466 *feed_transfer_stats(**progress_handler.stats)[0:2])
468 if JobManager().do_quit:
469 raise KeyboardInterrupt
473 date = self.extractDate(entry)
477 entry["title"] = "No Title"
485 entry["author"] = None
486 if(not(entry.has_key("id"))):
488 content = self.extractContent(entry)
489 object_size = len (content)
490 tmpEntry = {"title":entry["title"], "content":content,
491 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
492 id = self.generateUniqueId(tmpEntry)
494 current_version = self.db.execute(
495 'select date, ROWID from feed where id=?',
497 if (current_version is not None
498 and current_version[0] == date):
499 logger.debug("ALREADY DOWNLOADED %s (%s)"
500 % (entry["title"], entry["link"]))
501 ## This article is already present in the feed listing. Update the "updated" time, so it doesn't expire
502 self.db.execute("UPDATE feed SET updated=? WHERE id=?;",(currentTime,id))
504 logger.debug("Updating already downloaded files for %s" %(id))
505 filename = configdir+self.key+".d/"+id+".html"
506 file = open(filename,"a")
507 utime(filename, None)
509 images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
511 file = open(image[0],"a")
512 utime(image[0], None)
515 logger.debug("Error in refreshing images for %s" % (id))
519 if current_version is not None:
520 # The version was updated. Mark it as unread.
521 logger.debug("UPDATED: %s (%s)"
522 % (entry["title"], entry["link"]))
523 self.setEntryUnread(id)
526 logger.debug("NEW: %s (%s)"
527 % (entry["title"], entry["link"]))
530 #articleTime = time.mktime(self.entries[id]["dateTuple"])
531 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
533 baseurl = tmpEntry["link"]
535 if imageCache and len(images) > 0:
536 self.serial_execution_lock.release ()
537 have_serial_execution_lock = False
539 if not img.has_key('src'):
542 filename = self.addImage(
543 configdir, self.key, baseurl, img['src'],
546 img['src']="file://%s" %filename
547 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
549 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
553 object_size += os.path.getsize (filename)
554 except os.error, exception:
555 logger.error ("Error getting size of %s: %s"
556 % (filename, exception))
557 self.serial_execution_lock.acquire ()
558 have_serial_execution_lock = True
560 tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
561 file = open(tmpEntry["contentLink"], "w")
562 file.write(soup.prettify())
566 'title': tmpEntry["title"],
567 'contentLink': tmpEntry["contentLink"],
568 'date': tmpEntry["date"],
569 'updated': currentTime,
570 'link': tmpEntry["link"],
573 if current_version is not None:
574 # This is an update. Ensure that the existing
576 values['ROWID'] = current_version[1]
578 cols, values = zip(*values.items())
580 "INSERT OR REPLACE INTO feed (%s) VALUES (%s);"
581 % (','.join(cols), ','.join(('?',) * len(values))),
585 # Register the object with Woodchuck and mark it as
587 def register_object_transferred(
588 id, title, publication_time,
589 sent, received, object_size):
591 logger.debug("Registering transfer of object %s"
594 obj = wc()[self.key].object_register(
595 object_identifier=id,
596 human_readable_name=title)
597 except woodchuck.ObjectExistsError:
598 obj = wc()[self.key][id]
600 obj.publication_time = publication_time
603 woodchuck.Indicator.ApplicationVisual
604 |woodchuck.Indicator.StreamWide),
605 transferred_down=received,
607 object_size=object_size)
610 # If the entry does not contain a publication
611 # time, the attribute won't exist.
612 pubtime = entry.get('date_parsed', None)
614 publication_time = time.mktime (pubtime)
616 publication_time = None
619 = entry_transfer_stats(**progress_handler.stats)
620 # sent and received are for objects (in
621 # particular, images) associated with this
622 # item. We also want to attribute the data
623 # transferred for the item's content. This is
624 # a good first approximation.
625 received += len(content)
628 register_object_transferred(
630 title=tmpEntry["title"],
631 publication_time=publication_time,
632 sent=sent, received=received,
633 object_size=object_size),
638 = feed_transfer_stats(**progress_handler.stats)
640 "%s: Update successful: transferred: %d/%d; objects: %d)"
641 % (url, sent, received, len (tmp.entries)))
642 mainthread.execute (wc_success, async=True)
645 rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
647 self.removeEntry(row[0])
649 from glob import glob
651 for file in glob(configdir+self.key+".d/*"):
655 # put the two dates into matching format
657 lastmodDate = stats[8]
659 expDate = time.time()-expiry*3
660 # check if image-last-modified-date is outdated
662 if expDate > lastmodDate:
666 #print 'Removing', file
668 # XXX: Tell woodchuck.
669 remove(file) # commented out for testing
671 except OSError, exception:
673 logger.error('Could not remove %s: %s'
674 % (file, str (exception)))
675 logger.debug("updated %s: %fs in download, %fs in processing"
676 % (self.key, download_duration,
677 time.time () - process_start))
679 logger.error("Updating %s: %s" % (self.key, traceback.format_exc()))
683 if have_serial_execution_lock:
684 self.serial_execution_lock.release ()
688 rows = self.db.execute("SELECT MAX(date) FROM feed;")
692 logger.error("Fetching update time: %s: %s"
693 % (str(e), traceback.format_exc()))
700 title = tmp.feed.title
701 except (AttributeError, UnboundLocalError), exception:
703 if postFeedUpdateFunc is not None:
704 postFeedUpdateFunc (self.key, updateTime, etag, modified,
705 title, *postFeedUpdateFuncArgs)
707 self.cache_invalidate()
709 def setEntryRead(self, id):
710 self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
715 wc()[self.key][id].used()
719 mainthread.execute(doit, async=True)
720 self.cache_invalidate('feed')
722 def setEntryUnread(self, id):
723 self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
725 self.cache_invalidate('feed')
727 def markAllAsRead(self):
728 self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
730 self.cache_invalidate('feed')
732 def isEntryRead(self, id):
733 return self.lookup('feed', 'read', id) == 1
735 def getTitle(self, id):
736 return self.lookup('feed', 'title', id)
738 def getContentLink(self, id):
739 return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
741 def getExternalLink(self, id):
742 return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
744 def getDate(self, id):
745 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
746 return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
748 def getDateTuple(self, id):
749 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
750 return time.localtime(dateStamp)
752 def getDateStamp(self, id):
753 return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
755 def generateUniqueId(self, entry):
757 Generate a stable identifier for the article. For the same
758 entry, this should result in the same identifier. If
759 possible, the identifier should remain the same even if the
762 # Prefer the entry's id, which is supposed to be globally
764 key = entry.get('id', None)
766 # Next, try the link to the content.
767 key = entry.get('link', None)
769 # Ok, the title and the date concatenated are likely to be
771 key = entry.get('title', None) + entry.get('date', None)
773 # Hmm, the article's content will at least guarantee no
774 # false negatives (i.e., missing articles)
775 key = entry.get('content', None)
777 # If all else fails, just use a random number.
778 key = str (random.random ())
781 def getIds(self, onlyUnread=False):
783 rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
785 rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
792 def getNextId(self, id, forward=True):
798 index = ids.index(id)
799 return ids[(index + delta) % len(ids)]
801 def getPreviousId(self, id):
802 return self.getNextId(id, forward=False)
804 def getNumberOfUnreadItems(self):
805 return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
807 def getNumberOfEntries(self):
808 return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
810 def getArticle(self, entry):
811 #self.setEntryRead(id)
812 #entry = self.entries[id]
813 title = entry['title']
814 #content = entry.get('content', entry.get('summary_detail', {}))
815 content = entry["content"]
818 author = entry['author']
819 date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
821 #text = '''<div style="color: black; background-color: white;">'''
822 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
823 text += "<html><head><title>" + title + "</title>"
824 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
825 #text += '<style> body {-webkit-user-select: none;} </style>'
826 text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
828 text += "<BR /><small><i>Author: " + author + "</i></small>"
829 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
830 text += "<BR /><BR />"
832 text += "</body></html>"
835 def getContent(self, id):
836 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
838 file = open(self.entries[id]["contentLink"])
839 content = file.read()
842 content = "Content unavailable"
845 def extractDate(self, entry):
846 if entry.has_key("updated_parsed"):
847 return timegm(entry["updated_parsed"])
848 elif entry.has_key("published_parsed"):
849 return timegm(entry["published_parsed"])
853 def extractContent(self, entry):
855 if entry.has_key('summary'):
856 content = entry.get('summary', '')
857 if entry.has_key('content'):
858 if len(entry.content[0].value) > len(content):
859 content = entry.content[0].value
861 content = entry.get('description', '')
864 def removeEntry(self, id):
865 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
869 except OSError, exception:
870 logger.error("Deleting %s: %s" % (contentLink, str (exception)))
871 self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
872 self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
877 wc()[self.key][id].files_deleted (
878 woodchuck.DeletionResponse.Deleted)
879 del wc()[self.key][id]
883 mainthread.execute (doit, async=True)
885 class ArchivedArticles(Feed):
886 def addArchivedArticle(self, title, link, date, configdir):
887 id = self.generateUniqueId({"date":date, "title":title})
888 values = (id, title, link, date, 0, link, 0)
889 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
892 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
894 rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
896 currentTime = time.time()
899 f = urllib2.urlopen(link)
900 #entry["content"] = f.read()
903 soup = BeautifulSoup(html)
907 filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
909 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
911 contentLink = configdir+self.key+".d/"+id+".html"
912 file = open(contentLink, "w")
913 file.write(soup.prettify())
916 self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
918 return (currentTime, None, None)
920 def purgeReadArticles(self):
921 rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
924 self.removeArticle(row[0])
926 def removeArticle(self, id):
927 rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
930 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
937 class Listing(BaseObject):
939 cached_columns = (('feeds', 'updateTime'),
942 ('categories', 'title'))
947 except AttributeError:
948 db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
951 db = property(_getdb)
953 # Lists all the feeds in a dictionary, and expose the data
954 def __init__(self, config, configdir):
956 self.configdir = configdir
958 self.tls = threading.local ()
961 table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
963 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
964 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
965 self.addCategory("Default Category")
966 if isfile(self.configdir+"feeds.pickle"):
967 self.importOldFormatFeeds()
969 self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
971 from string import find, upper
972 if find(upper(table[0]), "WIDGET")<0:
973 self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
974 self.db.execute("UPDATE feeds SET widget=1;")
976 if find(upper(table[0]), "CATEGORY")<0:
977 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
978 self.addCategory("Default Category")
979 self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
980 self.db.execute("UPDATE feeds SET category=1;")
985 # Check that Woodchuck's state is up to date with respect our
988 updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
989 wc_init (self, True if updater else False)
990 if wc().available() and updater:
991 # The list of known streams.
992 streams = wc().streams_list ()
993 stream_ids = [s.identifier for s in streams]
995 # Register any unknown streams. Remove known streams from
997 for key in self.getListOfFeeds():
998 title = self.getFeedTitle(key)
999 # XXX: We should also check whether the list of
1000 # articles/objects in each feed/stream is up to date.
1001 if key not in stream_ids:
1003 "Registering previously unknown channel: %s (%s)"
1005 # Use a default refresh interval of 6 hours.
1006 wc().stream_register (key, title, 6 * 60 * 60)
1008 # Make sure the human readable name is up to date.
1009 if wc()[key].human_readable_name != title:
1010 wc()[key].human_readable_name = title
1011 stream_ids.remove (key)
1014 # Unregister any streams that are no longer subscribed to.
1015 for id in stream_ids:
1016 logger.debug("Unregistering %s" % (id,))
1017 w.stream_unregister (id)
1019 logger.exception("Registering streams with Woodchuck")
1021 def importOldFormatFeeds(self):
1022 """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
1024 listing = rss.Listing(self.configdir)
1026 for id in listing.getListOfFeeds():
1029 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
1030 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
1033 feed = listing.getFeed(id)
1034 new_feed = self.getFeed(id)
1036 items = feed.getIds()[:]
1039 if feed.isEntryRead(item):
1043 date = timegm(feed.getDateTuple(item))
1044 title = feed.getTitle(item)
1045 newId = new_feed.generateUniqueId({"date":date, "title":title})
1046 values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
1047 new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
1048 new_feed.db.commit()
1050 images = feed.getImages(item)
1051 for image in images:
1052 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
1053 new_feed.db.commit()
1056 self.updateUnread(id)
1058 logger.error("importOldFormatFeeds: %s"
1059 % (traceback.format_exc(),))
1060 remove(self.configdir+"feeds.pickle")
1063 def addArchivedArticle(self, key, index):
1064 feed = self.getFeed(key)
1065 title = feed.getTitle(index)
1066 link = feed.getExternalLink(index)
1067 date = feed.getDate(index)
1068 count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
1070 self.addFeed("Archived Articles", "", id="ArchivedArticles")
1072 archFeed = self.getFeed("ArchivedArticles")
1073 archFeed.addArchivedArticle(title, link, date, self.configdir)
1074 self.updateUnread("ArchivedArticles")
1076 def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
1078 if expiryTime is None:
1079 expiryTime = self.config.getExpiry()
1081 # Default to 24 hours
1084 (use_proxy, proxy) = self.config.getProxy()
1087 if imageCache is None:
1088 imageCache = self.config.getImageCache()
1090 feed = self.getFeed(key)
1091 (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
1093 modified = time.struct_time(eval(modified))
1097 self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
1098 priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
1100 def _queuePostFeedUpdate(self, *args, **kwargs):
1101 mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
1103 def _postFeedUpdate(self, key, updateTime, etag, modified, title):
1107 modified=str(tuple(modified))
1109 self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
1111 self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
1113 if title is not None:
1114 self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
1117 self.cache_invalidate('feeds')
1118 self.updateUnread(key)
1120 update_server_object().ArticleCountUpdated()
1122 stats = JobManager().stats()
1123 global jobs_at_start
1124 completed = stats['jobs-completed'] - jobs_at_start
1125 in_progress = stats['jobs-in-progress']
1126 queued = stats['jobs-queued']
1129 percent = (100 * ((completed + in_progress / 2.))
1130 / (completed + in_progress + queued))
1131 except ZeroDivisionError:
1134 update_server_object().UpdateProgress(
1135 percent, completed, in_progress, queued, 0, 0, 0, key)
1137 if in_progress == 0 and queued == 0:
1138 jobs_at_start = stats['jobs-completed']
1140 def getFeed(self, key):
1141 if key == "ArchivedArticles":
1142 return ArchivedArticles(self.configdir, key)
1143 return Feed(self.configdir, key)
1145 def editFeed(self, key, title, url, category=None):
1147 self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
1149 self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
1151 self.cache_invalidate('feeds')
1153 if wc().available():
1155 wc()[key].human_readable_name = title
1157 logger.debug("Feed %s (%s) unknown." % (key, title))
1159 def getFeedUpdateTime(self, key):
1160 update_time = self.lookup('feeds', 'updateTime', key)
1165 delta = time.time() - update_time
1167 delta_hours = delta / (60. * 60.)
1168 if delta_hours < .1:
1169 return "A few minutes ago"
1170 if delta_hours < .75:
1171 return "Less than an hour ago"
1172 if delta_hours < 1.5:
1173 return "About an hour ago"
1174 if delta_hours < 18:
1175 return "About %d hours ago" % (int(delta_hours + 0.5),)
1177 delta_days = delta_hours / 24.
1178 if delta_days < 1.5:
1179 return "About a day ago"
1181 return "%d days ago" % (int(delta_days + 0.5),)
1183 delta_weeks = delta_days / 7.
1184 if delta_weeks <= 8:
1185 return "%d weeks ago" % int(delta_weeks + 0.5)
1187 delta_months = delta_days / 30.
1188 if delta_months <= 30:
1189 return "%d months ago" % int(delta_months + 0.5)
1191 return time.strftime("%x", time.gmtime(update_time))
1193 def getFeedNumberOfUnreadItems(self, key):
1194 return self.lookup('feeds', 'unread', key)
1196 def getFeedTitle(self, key):
1197 title = self.lookup('feeds', 'title', key)
1201 return self.getFeedUrl(key)
1203 def getFeedUrl(self, key):
1204 return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1206 def getFeedCategory(self, key):
1207 return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1209 def getListOfFeeds(self, category=None):
1211 rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
1213 rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
1220 def getListOfCategories(self):
1221 return list(row[0] for row in self.db.execute(
1222 "SELECT id FROM categories ORDER BY rank;"))
1224 def getCategoryTitle(self, id):
1225 return self.lookup('categories', 'title', id)
1227 def getCategoryUnread(self, id):
1229 for key in self.getListOfFeeds(category=id):
1231 count = count + self.getFeedNumberOfUnreadItems(key)
1236 def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
1237 if order == "Most unread":
1238 tmp = "ORDER BY unread DESC"
1239 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
1240 elif order == "Least unread":
1241 tmp = "ORDER BY unread"
1242 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
1243 elif order == "Most recent":
1244 tmp = "ORDER BY updateTime DESC"
1245 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
1246 elif order == "Least recent":
1247 tmp = "ORDER BY updateTime"
1248 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
1249 else: # order == "Manual" or invalid value...
1250 tmp = "ORDER BY rank"
1251 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
1253 sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp
1255 sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
1256 rows = self.db.execute(sql)
1263 def getFavicon(self, key):
1264 filename = "%s%s.d/favicon.ico" % (self.configdir, key)
1265 if isfile(filename):
1270 def updateUnread(self, key):
1271 feed = self.getFeed(key)
1272 self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
1274 self.cache_invalidate('feeds')
1276 def addFeed(self, title, url, id=None, category=1):
1279 count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
1281 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1282 if max_rank == None:
1284 values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
1285 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
1287 # Ask for the feed object, it will create the necessary tables
1290 if wc().available():
1291 # Register the stream with Woodchuck. Update approximately
1293 wc().stream_register(stream_identifier=id,
1294 human_readable_name=title,
1301 def addCategory(self, title):
1302 rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
1305 id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
1308 self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
1310 self.cache_invalidate('categories')
1312 def removeFeed(self, key):
1313 if wc().available ():
1317 logger.debug("Removing unregistered feed %s failed" % (key,))
1319 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
1320 self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
1321 self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
1324 if isdir(self.configdir+key+".d/"):
1325 rmtree(self.configdir+key+".d/")
1326 self.cache_invalidate('feeds')
1328 def removeCategory(self, key):
1329 if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
1330 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
1331 self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
1332 self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
1333 self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
1335 self.cache_invalidate('categories')
1337 #def saveConfig(self):
1338 # self.listOfFeeds["feedingit-order"] = self.sortedKeys
1339 # file = open(self.configdir+"feeds.pickle", "w")
1340 # pickle.dump(self.listOfFeeds, file)
1343 def moveUp(self, key):
1344 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1346 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
1347 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
1350 def moveCategoryUp(self, key):
1351 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1353 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
1354 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
1357 def moveDown(self, key):
1358 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1359 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1361 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
1362 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
1365 def moveCategoryDown(self, key):
1366 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1367 max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
1369 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
1370 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )