1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # Copyright (c) 2011 Neal H. Walfield
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Lesser General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU Lesser General Public License for more details.
16 # You should have received a copy of the GNU Lesser General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 # ============================================================================
22 # Author : Yves Marcoz
24 # Description : Simple RSS Reader
25 # ============================================================================
27 from __future__ import with_statement
30 from os.path import isfile, isdir
31 from shutil import rmtree
32 from os import mkdir, remove, utime
38 from BeautifulSoup import BeautifulSoup
39 from urlparse import urljoin
40 from calendar import timegm
43 from wc import wc, wc_init, woodchuck
46 from updatedbus import update_server_object
48 from jobmanager import JobManager
50 from httpprogresshandler import HTTPProgressHandler
54 logger = logging.getLogger(__name__)
57 return md5.new(string).hexdigest()
59 def download_callback(connection):
60 if JobManager().do_quit:
61 raise KeyboardInterrupt
63 def downloader(progress_handler=None, proxy=None):
66 if progress_handler is not None:
67 openers.append(progress_handler)
69 openers.append(HTTPProgressHandler(download_callback))
74 return urllib2.build_opener(*openers)
76 def transfer_stats(sent, received, **kwargs):
78 This function takes two arguments: sent is the number of bytes
79 sent so far, received is the number of bytes received. The
80 function returns a continuation that you can call later.
82 The continuation takes the same two arguments. It returns a tuple
83 of the number of bytes sent, the number of bytes received and the
84 time since the original function was invoked.
86 start_time = time.time()
88 start_received = received
90 def e(sent, received, **kwargs):
91 return (sent - start_sent,
92 received - start_received,
93 time.time() - start_time)
97 # If not None, a subprocess.Popen object corresponding to a
98 # update_feeds.py process.
99 update_feed_process = None
101 update_feeds_iface = None
105 class BaseObject(object):
106 # Columns to cache. Classes that inherit from this and use the
107 # cache mechanism should set this to a list of tuples, each of
108 # which contains two entries: the table and the column. Note that
109 # both are case sensitive.
112 def cache_invalidate(self, table=None):
114 Invalidate the cache.
116 If table is not None, invalidate only the specified table.
117 Otherwise, drop the whole cache.
119 if not hasattr(self, 'cache'):
125 if table in self.cache:
126 del self.cache[table]
128 def lookup(self, table, column, id=None):
130 Look up a column or value. Uses a cache for columns in
131 cached_columns. Note: the column is returned unsorted.
133 if not hasattr(self, 'cache'):
136 # Cache data for at most 60 seconds.
139 cache = self.cache[table]
141 if time.time() - cache[None] > 60:
142 # logger.debug("%s: Cache too old: clearing" % (table,))
143 del self.cache[table]
149 or (table, column) not in self.cached_columns):
150 # The cache is empty or the caller wants a column that we
152 if (table, column) in self.cached_columns:
153 # logger.debug("%s: Rebuilding cache" % (table,))
157 self.cache[table] = cache = {}
159 for t, c in self.cached_columns:
171 where = "where id = '%s'" % id
175 results = self.db.execute(
176 "SELECT %s FROM %s %s" % (','.join(columns), table, where))
182 for index, value in enumerate(values):
183 cache[columns[index]][i] = value
192 results.append(values[0])
196 cache = self.cache[table]
200 value = cache[column][id]
201 # logger.debug("%s.%s:%s -> %s" % (table, column, id, value))
204 return cache[column].values()
206 # logger.debug("%s.%s:%s -> Not found" % (table, column, id))
209 class Feed(BaseObject):
211 cached_columns = (('feed', 'read'),
214 serial_execution_lock = threading.Lock()
219 except AttributeError:
220 db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
223 db = property(_getdb)
225 def __init__(self, configdir, key):
227 self.configdir = configdir
228 self.dir = "%s/%s.d" %(self.configdir, self.key)
229 self.tls = threading.local ()
231 if not isdir(self.dir):
233 if not isfile("%s/%s.db" %(self.dir, self.key)):
234 self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
235 self.db.execute("CREATE TABLE images (id text, imagePath text);")
238 def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
239 filename = configdir+key+".d/"+getId(url)
240 if not isfile(filename):
243 opener = downloader(proxy=proxy)
245 abs_url = urljoin(baseurl,url)
246 f = opener.open(abs_url)
248 with open(filename, "w") as outf:
253 except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
254 logger.info("Could not download image %s: %s"
255 % (abs_url, str (exception)))
258 exception = sys.exc_info()[0]
260 logger.info("Downloading image %s: %s" %
261 (abs_url, traceback.format_exc()))
269 #open(filename,"a").close() # "Touch" the file
270 file = open(filename,"a")
271 utime(filename, None)
275 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
276 if (os.path.basename(sys.argv[0]) == 'update_feeds.py'):
279 self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
281 JobManager().execute(doit(), self.key, priority=priority)
283 def send_update_request():
284 global update_feeds_iface
285 if update_feeds_iface is None:
286 bus=dbus.SessionBus()
287 remote_object = bus.get_object(
288 "org.marcoz.feedingit", # Connection name
289 "/org/marcoz/feedingit/update" # Object's path
291 update_feeds_iface = dbus.Interface(
292 remote_object, 'org.marcoz.feedingit')
295 update_feeds_iface.Update(self.key)
297 logger.error("Invoking org.marcoz.feedingit.Update: %s"
299 update_feeds_iface = None
303 if send_update_request():
304 # Success! It seems we were able to start the update
305 # daemon via dbus (or, it was already running).
308 global update_feed_process
309 if (update_feed_process is None
310 or update_feed_process.poll() is not None):
311 # The update_feeds process is not running. Start it.
312 update_feeds = os.path.join(os.path.dirname(__file__),
314 argv = ['/usr/bin/env', 'python', update_feeds, '--daemon' ]
315 logger.debug("Starting update_feeds: running %s"
317 update_feed_process = subprocess.Popen(argv)
318 # Make sure the dbus calls go to the right process:
320 update_feeds_iface = None
323 if send_update_request():
327 def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
328 logger.debug("Updating %s" % url)
331 have_serial_execution_lock = False
333 update_start = time.time ()
335 progress_handler = HTTPProgressHandler(download_callback)
337 openers = [progress_handler]
339 openers.append (proxy)
340 kwargs = {'handlers':openers}
342 feed_transfer_stats = transfer_stats(0, 0)
344 tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
345 download_duration = time.time () - update_start
347 opener = downloader(progress_handler, proxy)
349 if JobManager().do_quit:
350 raise KeyboardInterrupt
352 process_start = time.time()
354 # Expiry time is in hours
355 expiry = float(expiryTime) * 3600.
364 wc().stream_register (self.key, "", 6 * 60 * 60)
365 except woodchuck.ObjectExistsError:
368 wc()[self.key].updated (
369 indicator=(woodchuck.Indicator.ApplicationVisual
370 |woodchuck.Indicator.StreamWide),
371 transferred_down=progress_handler.stats['received'],
372 transferred_up=progress_handler.stats['sent'],
373 transfer_time=update_start,
374 transfer_duration=download_duration,
375 new_objects=new_objects,
376 updated_objects=updated_objects,
377 objects_inline=new_objects + updated_objects)
380 "Failed to register update of %s with woodchuck!"
383 http_status = tmp.get ('status', 200)
385 # Check if the parse was succesful. If the http status code
386 # is 304, then the download was successful, but there is
387 # nothing new. Indeed, no content is returned. This make a
388 # 304 look like an error because there are no entries and the
389 # parse fails. But really, everything went great! Check for
391 if http_status == 304:
392 logger.debug("%s: No changes to feed." % (self.key,))
393 mainthread.execute(wc_success, async=True)
395 elif len(tmp["entries"])==0 and not tmp.version:
396 # An error occured fetching or parsing the feed. (Version
397 # will be either None if e.g. the connection timed our or
398 # '' if the data is not a proper feed)
400 "Error fetching %s: version is: %s: error: %s"
401 % (url, str (tmp.version),
402 str (tmp.get ('bozo_exception', 'Unknown error'))))
404 def register_stream_update_failed(http_status):
406 logger.debug("%s: stream update failed!" % self.key)
409 # It's not easy to get the feed's title from here.
410 # At the latest, the next time the application is
411 # started, we'll fix up the human readable name.
412 wc().stream_register (self.key, "", 6 * 60 * 60)
413 except woodchuck.ObjectExistsError:
415 ec = woodchuck.TransferStatus.TransientOther
416 if 300 <= http_status and http_status < 400:
417 ec = woodchuck.TransferStatus.TransientNetwork
418 if 400 <= http_status and http_status < 500:
419 ec = woodchuck.TransferStatus.FailureGone
420 if 500 <= http_status and http_status < 600:
421 ec = woodchuck.TransferStatus.TransientNetwork
422 wc()[self.key].update_failed(ec)
426 register_stream_update_failed(
427 http_status=http_status),
430 currentTime = time.time()
431 # The etag and modified value should only be updated if the content was not null
437 modified = tmp["modified"]
441 abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
442 f = opener.open(abs_url)
445 outf = open(self.dir+"/favicon.ico", "w")
449 except (urllib2.HTTPError, urllib2.URLError), exception:
450 logger.debug("Could not download favicon %s: %s"
451 % (abs_url, str (exception)))
453 self.serial_execution_lock.acquire ()
454 have_serial_execution_lock = True
456 #reversedEntries = self.getEntries()
457 #reversedEntries.reverse()
461 tmp["entries"].reverse()
462 for entry in tmp["entries"]:
463 # Yield so as to make the main thread a bit more
467 entry_transfer_stats = transfer_stats(
468 *feed_transfer_stats(**progress_handler.stats)[0:2])
470 if JobManager().do_quit:
471 raise KeyboardInterrupt
475 date = self.extractDate(entry)
479 entry["title"] = "No Title"
487 entry["author"] = None
488 if(not(entry.has_key("id"))):
490 content = self.extractContent(entry)
491 object_size = len (content)
492 tmpEntry = {"title":entry["title"], "content":content,
493 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
494 id = self.generateUniqueId(tmpEntry)
497 = self.db.execute('select date from feed where id=?',
499 if (current_version is not None
500 and current_version[0] == date):
501 logger.debug("ALREADY DOWNLOADED %s (%s)"
502 % (entry["title"], entry["link"]))
505 if current_version is not None:
506 # The version was updated. Mark it as unread.
507 logger.debug("UPDATED: %s (%s)"
508 % (entry["title"], entry["link"]))
509 self.setEntryUnread(id)
512 logger.debug("NEW: %s (%s)"
513 % (entry["title"], entry["link"]))
516 #articleTime = time.mktime(self.entries[id]["dateTuple"])
517 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
519 baseurl = tmpEntry["link"]
521 if imageCache and len(images) > 0:
522 self.serial_execution_lock.release ()
523 have_serial_execution_lock = False
525 filename = self.addImage(
526 configdir, self.key, baseurl, img['src'],
529 img['src']="file://%s" %filename
530 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
532 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
536 object_size += os.path.getsize (filename)
537 except os.error, exception:
538 logger.error ("Error getting size of %s: %s"
539 % (filename, exception))
540 self.serial_execution_lock.acquire ()
541 have_serial_execution_lock = True
543 tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
544 file = open(tmpEntry["contentLink"], "w")
545 file.write(soup.prettify())
548 self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
551 values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
552 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
556 # self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
558 # filename = configdir+self.key+".d/"+id+".html"
559 # file = open(filename,"a")
560 # utime(filename, None)
562 # images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
563 # for image in images:
564 # file = open(image[0],"a")
565 # utime(image[0], None)
570 # Register the object with Woodchuck and mark it as
572 def register_object_transferred(
573 id, title, publication_time,
574 sent, received, object_size):
576 logger.debug("Registering transfer of object %s"
579 obj = wc()[self.key].object_register(
580 object_identifier=id,
581 human_readable_name=title)
582 except woodchuck.ObjectExistsError:
583 obj = wc()[self.key][id]
585 obj.publication_time = publication_time
588 woodchuck.Indicator.ApplicationVisual
589 |woodchuck.Indicator.StreamWide),
590 transferred_down=received,
592 object_size=object_size)
595 # If the entry does not contain a publication
596 # time, the attribute won't exist.
597 pubtime = entry.get('date_parsed', None)
599 publication_time = time.mktime (pubtime)
601 publication_time = None
604 = entry_transfer_stats(**progress_handler.stats)
605 # sent and received are for objects (in
606 # particular, images) associated with this
607 # item. We also want to attribute the data
608 # transferred for the item's content. This is
609 # a good first approximation.
610 received += len(content)
613 register_object_transferred(
615 title=tmpEntry["title"],
616 publication_time=publication_time,
617 sent=sent, received=received,
618 object_size=object_size),
623 = feed_transfer_stats(**progress_handler.stats)
625 "%s: Update successful: transferred: %d/%d; objects: %d)"
626 % (url, sent, received, len (tmp.entries)))
627 mainthread.execute (wc_success, async=True)
630 rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
632 self.removeEntry(row[0])
634 from glob import glob
636 for file in glob(configdir+self.key+".d/*"):
640 # put the two dates into matching format
642 lastmodDate = stats[8]
644 expDate = time.time()-expiry*3
645 # check if image-last-modified-date is outdated
647 if expDate > lastmodDate:
651 #print 'Removing', file
653 # XXX: Tell woodchuck.
654 remove(file) # commented out for testing
656 except OSError, exception:
658 logger.error('Could not remove %s: %s'
659 % (file, str (exception)))
660 logger.debug("updated %s: %fs in download, %fs in processing"
661 % (self.key, download_duration,
662 time.time () - process_start))
664 logger.error("Updating %s: %s" % (self.key, traceback.format_exc()))
668 if have_serial_execution_lock:
669 self.serial_execution_lock.release ()
673 rows = self.db.execute("SELECT MAX(date) FROM feed;")
677 logger.error("Fetching update time: %s: %s"
678 % (str(e), traceback.format_exc()))
685 title = tmp.feed.title
686 except (AttributeError, UnboundLocalError), exception:
688 if postFeedUpdateFunc is not None:
689 postFeedUpdateFunc (self.key, updateTime, etag, modified,
690 title, *postFeedUpdateFuncArgs)
692 self.cache_invalidate()
694 def setEntryRead(self, id):
695 self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
700 wc()[self.key][id].used()
704 mainthread.execute(doit, async=True)
705 self.cache_invalidate('feed')
707 def setEntryUnread(self, id):
708 self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
710 self.cache_invalidate('feed')
712 def markAllAsRead(self):
713 self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
715 self.cache_invalidate('feed')
717 def isEntryRead(self, id):
718 return self.lookup('feed', 'read', id) == 1
720 def getTitle(self, id):
721 return self.lookup('feed', 'title', id)
723 def getContentLink(self, id):
724 return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
726 def getExternalLink(self, id):
727 return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
729 def getDate(self, id):
730 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
731 return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
733 def getDateTuple(self, id):
734 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
735 return time.localtime(dateStamp)
737 def getDateStamp(self, id):
738 return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
740 def generateUniqueId(self, entry):
742 Generate a stable identifier for the article. For the same
743 entry, this should result in the same identifier. If
744 possible, the identifier should remain the same even if the
747 # Prefer the entry's id, which is supposed to be globally
749 key = entry.get('id', None)
751 # Next, try the link to the content.
752 key = entry.get('link', None)
754 # Ok, the title and the date concatenated are likely to be
756 key = entry.get('title', None) + entry.get('date', None)
758 # Hmm, the article's content will at least guarantee no
759 # false negatives (i.e., missing articles)
760 key = entry.get('content', None)
762 # If all else fails, just use a random number.
763 key = str (random.random ())
766 def getIds(self, onlyUnread=False):
768 rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
770 rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
777 def getNextId(self, id, forward=True):
783 index = ids.index(id)
784 return ids[(index + delta) % len(ids)]
786 def getPreviousId(self, id):
787 return self.getNextId(id, forward=False)
789 def getNumberOfUnreadItems(self):
790 return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
792 def getNumberOfEntries(self):
793 return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
795 def getArticle(self, entry):
796 #self.setEntryRead(id)
797 #entry = self.entries[id]
798 title = entry['title']
799 #content = entry.get('content', entry.get('summary_detail', {}))
800 content = entry["content"]
803 author = entry['author']
804 date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
806 #text = '''<div style="color: black; background-color: white;">'''
807 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
808 text += "<html><head><title>" + title + "</title>"
809 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
810 #text += '<style> body {-webkit-user-select: none;} </style>'
811 text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
813 text += "<BR /><small><i>Author: " + author + "</i></small>"
814 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
815 text += "<BR /><BR />"
817 text += "</body></html>"
820 def getContent(self, id):
821 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
823 file = open(self.entries[id]["contentLink"])
824 content = file.read()
827 content = "Content unavailable"
830 def extractDate(self, entry):
831 if entry.has_key("updated_parsed"):
832 return timegm(entry["updated_parsed"])
833 elif entry.has_key("published_parsed"):
834 return timegm(entry["published_parsed"])
838 def extractContent(self, entry):
840 if entry.has_key('summary'):
841 content = entry.get('summary', '')
842 if entry.has_key('content'):
843 if len(entry.content[0].value) > len(content):
844 content = entry.content[0].value
846 content = entry.get('description', '')
849 def removeEntry(self, id):
850 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
854 except OSError, exception:
855 logger.error("Deleting %s: %s" % (contentLink, str (exception)))
856 self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
857 self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
862 wc()[self.key][id].files_deleted (
863 woodchuck.DeletionResponse.Deleted)
864 del wc()[self.key][id]
868 mainthread.execute (doit, async=True)
870 class ArchivedArticles(Feed):
871 def addArchivedArticle(self, title, link, date, configdir):
872 id = self.generateUniqueId({"date":date, "title":title})
873 values = (id, title, link, date, 0, link, 0)
874 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
877 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
879 rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
881 currentTime = time.time()
884 f = urllib2.urlopen(link)
885 #entry["content"] = f.read()
888 soup = BeautifulSoup(html)
892 filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
894 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
896 contentLink = configdir+self.key+".d/"+id+".html"
897 file = open(contentLink, "w")
898 file.write(soup.prettify())
901 self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
903 return (currentTime, None, None)
905 def purgeReadArticles(self):
906 rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
909 self.removeArticle(row[0])
911 def removeArticle(self, id):
912 rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
915 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
922 class Listing(BaseObject):
924 cached_columns = (('feeds', 'updateTime'),
927 ('categories', 'title'))
932 except AttributeError:
933 db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
936 db = property(_getdb)
938 # Lists all the feeds in a dictionary, and expose the data
939 def __init__(self, config, configdir):
941 self.configdir = configdir
943 self.tls = threading.local ()
946 table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
948 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
949 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
950 self.addCategory("Default Category")
951 if isfile(self.configdir+"feeds.pickle"):
952 self.importOldFormatFeeds()
954 self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
956 from string import find, upper
957 if find(upper(table[0]), "WIDGET")<0:
958 self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
959 self.db.execute("UPDATE feeds SET widget=1;")
961 if find(upper(table[0]), "CATEGORY")<0:
962 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
963 self.addCategory("Default Category")
964 self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
965 self.db.execute("UPDATE feeds SET category=1;")
970 # Check that Woodchuck's state is up to date with respect our
972 updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
973 wc_init (self, True if updater else False)
974 if wc().available() and updater:
975 # The list of known streams.
976 streams = wc().streams_list ()
977 stream_ids = [s.identifier for s in streams]
979 # Register any unknown streams. Remove known streams from
981 for key in self.getListOfFeeds():
982 title = self.getFeedTitle(key)
983 # XXX: We should also check whether the list of
984 # articles/objects in each feed/stream is up to date.
985 if key not in stream_ids:
987 "Registering previously unknown channel: %s (%s)"
989 # Use a default refresh interval of 6 hours.
990 wc().stream_register (key, title, 6 * 60 * 60)
992 # Make sure the human readable name is up to date.
993 if wc()[key].human_readable_name != title:
994 wc()[key].human_readable_name = title
995 stream_ids.remove (key)
998 # Unregister any streams that are no longer subscribed to.
999 for id in stream_ids:
1000 logger.debug("Unregistering %s" % (id,))
1001 w.stream_unregister (id)
1003 def importOldFormatFeeds(self):
1004 """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
1006 listing = rss.Listing(self.configdir)
1008 for id in listing.getListOfFeeds():
1011 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
1012 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
1015 feed = listing.getFeed(id)
1016 new_feed = self.getFeed(id)
1018 items = feed.getIds()[:]
1021 if feed.isEntryRead(item):
1025 date = timegm(feed.getDateTuple(item))
1026 title = feed.getTitle(item)
1027 newId = new_feed.generateUniqueId({"date":date, "title":title})
1028 values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
1029 new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
1030 new_feed.db.commit()
1032 images = feed.getImages(item)
1033 for image in images:
1034 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
1035 new_feed.db.commit()
1038 self.updateUnread(id)
1040 logger.error("importOldFormatFeeds: %s"
1041 % (traceback.format_exc(),))
1042 remove(self.configdir+"feeds.pickle")
1045 def addArchivedArticle(self, key, index):
1046 feed = self.getFeed(key)
1047 title = feed.getTitle(index)
1048 link = feed.getExternalLink(index)
1049 date = feed.getDate(index)
1050 count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
1052 self.addFeed("Archived Articles", "", id="ArchivedArticles")
1054 archFeed = self.getFeed("ArchivedArticles")
1055 archFeed.addArchivedArticle(title, link, date, self.configdir)
1056 self.updateUnread("ArchivedArticles")
1058 def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
1060 if expiryTime is None:
1061 expiryTime = self.config.getExpiry()
1063 # Default to 24 hours
1066 (use_proxy, proxy) = self.config.getProxy()
1069 if imageCache is None:
1070 imageCache = self.config.getImageCache()
1072 feed = self.getFeed(key)
1073 (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
1075 modified = time.struct_time(eval(modified))
1079 self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
1080 priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
1082 def _queuePostFeedUpdate(self, *args, **kwargs):
1083 mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
1085 def _postFeedUpdate(self, key, updateTime, etag, modified, title):
1089 modified=str(tuple(modified))
1091 self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
1093 self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
1095 if title is not None:
1096 self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
1099 self.cache_invalidate('feeds')
1100 self.updateUnread(key)
1102 update_server_object().ArticleCountUpdated()
1104 stats = JobManager().stats()
1105 global jobs_at_start
1106 completed = stats['jobs-completed'] - jobs_at_start
1107 in_progress = stats['jobs-in-progress']
1108 queued = stats['jobs-queued']
1111 percent = (100 * ((completed + in_progress / 2.))
1112 / (completed + in_progress + queued))
1113 except ZeroDivisionError:
1116 update_server_object().UpdateProgress(
1117 percent, completed, in_progress, queued, 0, 0, 0, key)
1119 if in_progress == 0 and queued == 0:
1120 jobs_at_start = stats['jobs-completed']
1122 def getFeed(self, key):
1123 if key == "ArchivedArticles":
1124 return ArchivedArticles(self.configdir, key)
1125 return Feed(self.configdir, key)
1127 def editFeed(self, key, title, url, category=None):
1129 self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
1131 self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
1133 self.cache_invalidate('feeds')
1135 if wc().available():
1137 wc()[key].human_readable_name = title
1139 logger.debug("Feed %s (%s) unknown." % (key, title))
1141 def getFeedUpdateTime(self, key):
1142 update_time = self.lookup('feeds', 'updateTime', key)
1147 delta = time.time() - update_time
1149 delta_hours = delta / (60. * 60.)
1150 if delta_hours < .1:
1151 return "A few minutes ago"
1152 if delta_hours < .75:
1153 return "Less than an hour ago"
1154 if delta_hours < 1.5:
1155 return "About an hour ago"
1156 if delta_hours < 18:
1157 return "About %d hours ago" % (int(delta_hours + 0.5),)
1159 delta_days = delta_hours / 24.
1160 if delta_days < 1.5:
1161 return "About a day ago"
1163 return "%d days ago" % (int(delta_days + 0.5),)
1165 delta_weeks = delta_days / 7.
1166 if delta_weeks <= 8:
1167 return "%d weeks ago" % int(delta_weeks + 0.5)
1169 delta_months = delta_days / 30.
1170 if delta_months <= 30:
1171 return "%d months ago" % int(delta_months + 0.5)
1173 return time.strftime("%x", time.gmtime(update_time))
1175 def getFeedNumberOfUnreadItems(self, key):
1176 return self.lookup('feeds', 'unread', key)
1178 def getFeedTitle(self, key):
1179 title = self.lookup('feeds', 'title', key)
1183 return self.getFeedUrl(key)
1185 def getFeedUrl(self, key):
1186 return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1188 def getFeedCategory(self, key):
1189 return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1191 def getListOfFeeds(self, category=None):
1193 rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
1195 rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
1202 def getListOfCategories(self):
1203 return list(row[0] for row in self.db.execute(
1204 "SELECT id FROM categories ORDER BY rank;"))
1206 def getCategoryTitle(self, id):
1207 return self.lookup('categories', 'title', id)
1209 def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
1210 if order == "Most unread":
1211 tmp = "ORDER BY unread DESC"
1212 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
1213 elif order == "Least unread":
1214 tmp = "ORDER BY unread"
1215 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
1216 elif order == "Most recent":
1217 tmp = "ORDER BY updateTime DESC"
1218 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
1219 elif order == "Least recent":
1220 tmp = "ORDER BY updateTime"
1221 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
1222 else: # order == "Manual" or invalid value...
1223 tmp = "ORDER BY rank"
1224 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
1226 sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp
1228 sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
1229 rows = self.db.execute(sql)
1236 def getFavicon(self, key):
1237 filename = "%s%s.d/favicon.ico" % (self.configdir, key)
1238 if isfile(filename):
1243 def updateUnread(self, key):
1244 feed = self.getFeed(key)
1245 self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
1247 self.cache_invalidate('feeds')
1249 def addFeed(self, title, url, id=None, category=1):
1252 count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
1254 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1255 if max_rank == None:
1257 values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
1258 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
1260 # Ask for the feed object, it will create the necessary tables
1263 if wc().available():
1264 # Register the stream with Woodchuck. Update approximately
1266 wc().stream_register(stream_identifier=id,
1267 human_readable_name=title,
1274 def addCategory(self, title):
1275 rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
1278 id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
1281 self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
1284 def removeFeed(self, key):
1285 if wc().available ():
1289 logger.debug("Removing unregistered feed %s failed" % (key,))
1291 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
1292 self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
1293 self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
1296 if isdir(self.configdir+key+".d/"):
1297 rmtree(self.configdir+key+".d/")
1299 def removeCategory(self, key):
1300 if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
1301 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
1302 self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
1303 self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
1304 self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
1307 #def saveConfig(self):
1308 # self.listOfFeeds["feedingit-order"] = self.sortedKeys
1309 # file = open(self.configdir+"feeds.pickle", "w")
1310 # pickle.dump(self.listOfFeeds, file)
1313 def moveUp(self, key):
1314 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1316 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
1317 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
1320 def moveCategoryUp(self, key):
1321 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1323 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
1324 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
1327 def moveDown(self, key):
1328 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1329 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1331 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
1332 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
1335 def moveCategoryDown(self, key):
1336 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1337 max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
1339 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
1340 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )