1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # Copyright (c) 2011 Neal H. Walfield
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Lesser General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU Lesser General Public License for more details.
16 # You should have received a copy of the GNU Lesser General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 # ============================================================================
22 # Author : Yves Marcoz
24 # Description : Simple RSS Reader
25 # ============================================================================
27 from __future__ import with_statement
30 from os.path import isfile, isdir
31 from shutil import rmtree
32 from os import mkdir, remove, utime
38 from BeautifulSoup import BeautifulSoup
39 from urlparse import urljoin
40 from calendar import timegm
43 from wc import wc, wc_init, woodchuck
46 from updatedbus import update_server_object
48 from jobmanager import JobManager
50 from httpprogresshandler import HTTPProgressHandler
54 logger = logging.getLogger(__name__)
57 return md5.new(string).hexdigest()
59 def download_callback(connection):
60 if JobManager().do_quit:
61 raise KeyboardInterrupt
63 def downloader(progress_handler=None, proxy=None):
66 if progress_handler is not None:
67 openers.append(progress_handler)
69 openers.append(HTTPProgressHandler(download_callback))
74 return urllib2.build_opener(*openers)
76 def transfer_stats(sent, received, **kwargs):
78 This function takes two arguments: sent is the number of bytes
79 sent so far, received is the number of bytes received. The
80 function returns a continuation that you can call later.
82 The continuation takes the same two arguments. It returns a tuple
83 of the number of bytes sent, the number of bytes received and the
84 time since the original function was invoked.
86 start_time = time.time()
88 start_received = received
90 def e(sent, received, **kwargs):
91 return (sent - start_sent,
92 received - start_received,
93 time.time() - start_time)
97 # If not None, a subprocess.Popen object corresponding to a
98 # update_feeds.py process.
99 update_feed_process = None
101 update_feeds_iface = None
105 class BaseObject(object):
106 # Columns to cache. Classes that inherit from this and use the
107 # cache mechanism should set this to a list of tuples, each of
108 # which contains two entries: the table and the column. Note that
109 # both are case sensitive.
112 def cache_invalidate(self, table=None):
114 Invalidate the cache.
116 If table is not None, invalidate only the specified table.
117 Otherwise, drop the whole cache.
119 if not hasattr(self, 'cache'):
125 if table in self.cache:
126 del self.cache[table]
128 def lookup(self, table, column, id=None):
130 Look up a column or value. Uses a cache for columns in
131 cached_columns. Note: the column is returned unsorted.
133 if not hasattr(self, 'cache'):
136 # Cache data for at most 60 seconds.
139 cache = self.cache[table]
141 if time.time() - cache[None] > 60:
142 self.cache[table].clear()
147 or (table, column) not in self.cached_columns):
148 # The cache is empty or the caller wants a column that we
150 if (table, column) in self.cached_columns:
153 self.cache[table] = cache = {}
155 for t, c in self.cached_columns:
167 where = "where id = '%s'" % id
171 results = self.db.execute(
172 "SELECT %s FROM %s %s" % (','.join(columns), table, where))
178 for index, value in enumerate(values):
179 cache[columns[index]][i] = value
188 results.append(values[0])
192 cache = self.cache[table]
196 return cache[column][id]
198 return cache[column].values()
202 class Feed(BaseObject):
204 cached_columns = (('feed', 'read'),
207 serial_execution_lock = threading.Lock()
212 except AttributeError:
213 db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
216 db = property(_getdb)
218 def __init__(self, configdir, key):
220 self.configdir = configdir
221 self.dir = "%s/%s.d" %(self.configdir, self.key)
222 self.tls = threading.local ()
224 if not isdir(self.dir):
226 if not isfile("%s/%s.db" %(self.dir, self.key)):
227 self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
228 self.db.execute("CREATE TABLE images (id text, imagePath text);")
231 def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
232 filename = configdir+key+".d/"+getId(url)
233 if not isfile(filename):
236 opener = downloader(proxy=proxy)
238 abs_url = urljoin(baseurl,url)
239 f = opener.open(abs_url)
241 with open(filename, "w") as outf:
246 except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
247 logger.info("Could not download image %s: %s"
248 % (abs_url, str (exception)))
251 exception = sys.exc_info()[0]
253 logger.info("Downloading image %s: %s" %
254 (abs_url, traceback.format_exc()))
262 #open(filename,"a").close() # "Touch" the file
263 file = open(filename,"a")
264 utime(filename, None)
268 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
269 if (os.path.basename(sys.argv[0]) == 'update_feeds.py'):
272 self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
274 JobManager().execute(doit(), self.key, priority=priority)
276 def send_update_request():
277 global update_feeds_iface
278 if update_feeds_iface is None:
279 bus=dbus.SessionBus()
280 remote_object = bus.get_object(
281 "org.marcoz.feedingit", # Connection name
282 "/org/marcoz/feedingit/update" # Object's path
284 update_feeds_iface = dbus.Interface(
285 remote_object, 'org.marcoz.feedingit')
288 update_feeds_iface.Update(self.key)
290 logger.error("Invoking org.marcoz.feedingit.Update: %s"
292 update_feeds_iface = None
296 if send_update_request():
297 # Success! It seems we were able to start the update
298 # daemon via dbus (or, it was already running).
301 global update_feed_process
302 if (update_feed_process is None
303 or update_feed_process.poll() is not None):
304 # The update_feeds process is not running. Start it.
305 update_feeds = os.path.join(os.path.dirname(__file__),
307 argv = ['/usr/bin/env', 'python', update_feeds, '--daemon' ]
308 logger.debug("Starting update_feeds: running %s"
310 update_feed_process = subprocess.Popen(argv)
311 # Make sure the dbus calls go to the right process:
313 update_feeds_iface = None
316 if send_update_request():
320 def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
321 logger.debug("Updating %s" % url)
324 have_serial_execution_lock = False
326 update_start = time.time ()
328 progress_handler = HTTPProgressHandler(download_callback)
330 openers = [progress_handler]
332 openers.append (proxy)
333 kwargs = {'handlers':openers}
335 feed_transfer_stats = transfer_stats(0, 0)
337 tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
338 download_duration = time.time () - update_start
340 opener = downloader(progress_handler, proxy)
342 if JobManager().do_quit:
343 raise KeyboardInterrupt
345 process_start = time.time()
347 # Expiry time is in hours
348 expiry = float(expiryTime) * 3600.
357 wc().stream_register (self.key, "", 6 * 60 * 60)
358 except woodchuck.ObjectExistsError:
361 wc()[self.key].updated (
362 indicator=(woodchuck.Indicator.ApplicationVisual
363 |woodchuck.Indicator.StreamWide),
364 transferred_down=progress_handler.stats['received'],
365 transferred_up=progress_handler.stats['sent'],
366 transfer_time=update_start,
367 transfer_duration=download_duration,
368 new_objects=new_objects,
369 updated_objects=updated_objects,
370 objects_inline=new_objects + updated_objects)
373 "Failed to register update of %s with woodchuck!"
376 http_status = tmp.get ('status', 200)
378 # Check if the parse was succesful. If the http status code
379 # is 304, then the download was successful, but there is
380 # nothing new. Indeed, no content is returned. This make a
381 # 304 look like an error because there are no entries and the
382 # parse fails. But really, everything went great! Check for
384 if http_status == 304:
385 logger.debug("%s: No changes to feed." % (self.key,))
386 mainthread.execute(wc_success, async=True)
388 elif len(tmp["entries"])==0 and not tmp.version:
389 # An error occured fetching or parsing the feed. (Version
390 # will be either None if e.g. the connection timed our or
391 # '' if the data is not a proper feed)
393 "Error fetching %s: version is: %s: error: %s"
394 % (url, str (tmp.version),
395 str (tmp.get ('bozo_exception', 'Unknown error'))))
397 def register_stream_update_failed(http_status):
399 logger.debug("%s: stream update failed!" % self.key)
402 # It's not easy to get the feed's title from here.
403 # At the latest, the next time the application is
404 # started, we'll fix up the human readable name.
405 wc().stream_register (self.key, "", 6 * 60 * 60)
406 except woodchuck.ObjectExistsError:
408 ec = woodchuck.TransferStatus.TransientOther
409 if 300 <= http_status and http_status < 400:
410 ec = woodchuck.TransferStatus.TransientNetwork
411 if 400 <= http_status and http_status < 500:
412 ec = woodchuck.TransferStatus.FailureGone
413 if 500 <= http_status and http_status < 600:
414 ec = woodchuck.TransferStatus.TransientNetwork
415 wc()[self.key].update_failed(ec)
419 register_stream_update_failed(
420 http_status=http_status),
423 currentTime = time.time()
424 # The etag and modified value should only be updated if the content was not null
430 modified = tmp["modified"]
434 abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
435 f = opener.open(abs_url)
438 outf = open(self.dir+"/favicon.ico", "w")
442 except (urllib2.HTTPError, urllib2.URLError), exception:
443 logger.debug("Could not download favicon %s: %s"
444 % (abs_url, str (exception)))
446 self.serial_execution_lock.acquire ()
447 have_serial_execution_lock = True
449 #reversedEntries = self.getEntries()
450 #reversedEntries.reverse()
454 tmp["entries"].reverse()
455 for entry in tmp["entries"]:
456 # Yield so as to make the main thread a bit more
460 entry_transfer_stats = transfer_stats(
461 *feed_transfer_stats(**progress_handler.stats)[0:2])
463 if JobManager().do_quit:
464 raise KeyboardInterrupt
468 date = self.extractDate(entry)
472 entry["title"] = "No Title"
480 entry["author"] = None
481 if(not(entry.has_key("id"))):
483 content = self.extractContent(entry)
484 object_size = len (content)
485 tmpEntry = {"title":entry["title"], "content":content,
486 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
487 id = self.generateUniqueId(tmpEntry)
490 = self.db.execute('select date from feed where id=?',
492 if (current_version is not None
493 and current_version[0] == date):
494 logger.debug("ALREADY DOWNLOADED %s (%s)"
495 % (entry["title"], entry["link"]))
498 if current_version is not None:
499 # The version was updated. Mark it as unread.
500 logger.debug("UPDATED: %s (%s)"
501 % (entry["title"], entry["link"]))
502 self.setEntryUnread(id)
505 logger.debug("NEW: %s (%s)"
506 % (entry["title"], entry["link"]))
509 #articleTime = time.mktime(self.entries[id]["dateTuple"])
510 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
512 baseurl = tmpEntry["link"]
514 if imageCache and len(images) > 0:
515 self.serial_execution_lock.release ()
516 have_serial_execution_lock = False
518 filename = self.addImage(
519 configdir, self.key, baseurl, img['src'],
522 img['src']="file://%s" %filename
523 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
525 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
529 object_size += os.path.getsize (filename)
530 except os.error, exception:
531 logger.error ("Error getting size of %s: %s"
532 % (filename, exception))
533 self.serial_execution_lock.acquire ()
534 have_serial_execution_lock = True
536 tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
537 file = open(tmpEntry["contentLink"], "w")
538 file.write(soup.prettify())
541 self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
544 values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
545 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
549 # self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
551 # filename = configdir+self.key+".d/"+id+".html"
552 # file = open(filename,"a")
553 # utime(filename, None)
555 # images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
556 # for image in images:
557 # file = open(image[0],"a")
558 # utime(image[0], None)
563 # Register the object with Woodchuck and mark it as
565 def register_object_transferred(
566 id, title, publication_time,
567 sent, received, object_size):
569 logger.debug("Registering transfer of object %s"
572 obj = wc()[self.key].object_register(
573 object_identifier=id,
574 human_readable_name=title)
575 except woodchuck.ObjectExistsError:
576 obj = wc()[self.key][id]
578 obj.publication_time = publication_time
581 woodchuck.Indicator.ApplicationVisual
582 |woodchuck.Indicator.StreamWide),
583 transferred_down=received,
585 object_size=object_size)
588 # If the entry does not contain a publication
589 # time, the attribute won't exist.
590 pubtime = entry.get('date_parsed', None)
592 publication_time = time.mktime (pubtime)
594 publication_time = None
597 = entry_transfer_stats(**progress_handler.stats)
598 # sent and received are for objects (in
599 # particular, images) associated with this
600 # item. We also want to attribute the data
601 # transferred for the item's content. This is
602 # a good first approximation.
603 received += len(content)
606 register_object_transferred(
608 title=tmpEntry["title"],
609 publication_time=publication_time,
610 sent=sent, received=received,
611 object_size=object_size),
616 = feed_transfer_stats(**progress_handler.stats)
618 "%s: Update successful: transferred: %d/%d; objects: %d)"
619 % (url, sent, received, len (tmp.entries)))
620 mainthread.execute (wc_success, async=True)
623 rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
625 self.removeEntry(row[0])
627 from glob import glob
629 for file in glob(configdir+self.key+".d/*"):
633 # put the two dates into matching format
635 lastmodDate = stats[8]
637 expDate = time.time()-expiry*3
638 # check if image-last-modified-date is outdated
640 if expDate > lastmodDate:
644 #print 'Removing', file
646 # XXX: Tell woodchuck.
647 remove(file) # commented out for testing
649 except OSError, exception:
651 logger.error('Could not remove %s: %s'
652 % (file, str (exception)))
653 logger.debug("updated %s: %fs in download, %fs in processing"
654 % (self.key, download_duration,
655 time.time () - process_start))
657 logger.error("Updating %s: %s" % (self.key, traceback.format_exc()))
661 if have_serial_execution_lock:
662 self.serial_execution_lock.release ()
666 rows = self.db.execute("SELECT MAX(date) FROM feed;")
670 logger.error("Fetching update time: %s: %s"
671 % (str(e), traceback.format_exc()))
678 title = tmp.feed.title
679 except (AttributeError, UnboundLocalError), exception:
681 if postFeedUpdateFunc is not None:
682 postFeedUpdateFunc (self.key, updateTime, etag, modified,
683 title, *postFeedUpdateFuncArgs)
685 self.cache_invalidate()
687 def setEntryRead(self, id):
688 self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
693 wc()[self.key][id].used()
697 mainthread.execute(doit, async=True)
698 self.cache_invalidate('feed')
700 def setEntryUnread(self, id):
701 self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
703 self.cache_invalidate('feed')
705 def markAllAsRead(self):
706 self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
708 self.cache_invalidate('feed')
710 def isEntryRead(self, id):
711 return self.lookup('feed', 'read', id) == 1
713 def getTitle(self, id):
714 return self.lookup('feed', 'title', id)
716 def getContentLink(self, id):
717 return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
719 def getExternalLink(self, id):
720 return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
722 def getDate(self, id):
723 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
724 return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
726 def getDateTuple(self, id):
727 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
728 return time.localtime(dateStamp)
730 def getDateStamp(self, id):
731 return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
733 def generateUniqueId(self, entry):
735 Generate a stable identifier for the article. For the same
736 entry, this should result in the same identifier. If
737 possible, the identifier should remain the same even if the
740 # Prefer the entry's id, which is supposed to be globally
742 key = entry.get('id', None)
744 # Next, try the link to the content.
745 key = entry.get('link', None)
747 # Ok, the title and the date concatenated are likely to be
749 key = entry.get('title', None) + entry.get('date', None)
751 # Hmm, the article's content will at least guarantee no
752 # false negatives (i.e., missing articles)
753 key = entry.get('content', None)
755 # If all else fails, just use a random number.
756 key = str (random.random ())
759 def getIds(self, onlyUnread=False):
761 rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
763 rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
770 def getNextId(self, id, forward=True):
776 index = ids.index(id)
777 return ids[(index + delta) % len(ids)]
779 def getPreviousId(self, id):
780 return self.getNextId(id, forward=False)
782 def getNumberOfUnreadItems(self):
783 return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
785 def getNumberOfEntries(self):
786 return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
788 def getArticle(self, entry):
789 #self.setEntryRead(id)
790 #entry = self.entries[id]
791 title = entry['title']
792 #content = entry.get('content', entry.get('summary_detail', {}))
793 content = entry["content"]
796 author = entry['author']
797 date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
799 #text = '''<div style="color: black; background-color: white;">'''
800 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
801 text += "<html><head><title>" + title + "</title>"
802 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
803 #text += '<style> body {-webkit-user-select: none;} </style>'
804 text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
806 text += "<BR /><small><i>Author: " + author + "</i></small>"
807 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
808 text += "<BR /><BR />"
810 text += "</body></html>"
813 def getContent(self, id):
814 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
816 file = open(self.entries[id]["contentLink"])
817 content = file.read()
820 content = "Content unavailable"
823 def extractDate(self, entry):
824 if entry.has_key("updated_parsed"):
825 return timegm(entry["updated_parsed"])
826 elif entry.has_key("published_parsed"):
827 return timegm(entry["published_parsed"])
831 def extractContent(self, entry):
833 if entry.has_key('summary'):
834 content = entry.get('summary', '')
835 if entry.has_key('content'):
836 if len(entry.content[0].value) > len(content):
837 content = entry.content[0].value
839 content = entry.get('description', '')
842 def removeEntry(self, id):
843 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
847 except OSError, exception:
848 logger.error("Deleting %s: %s" % (contentLink, str (exception)))
849 self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
850 self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
855 wc()[self.key][id].files_deleted (
856 woodchuck.DeletionResponse.Deleted)
857 del wc()[self.key][id]
861 mainthread.execute (doit, async=True)
863 class ArchivedArticles(Feed):
864 def addArchivedArticle(self, title, link, date, configdir):
865 id = self.generateUniqueId({"date":date, "title":title})
866 values = (id, title, link, date, 0, link, 0)
867 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
870 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
872 rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
874 currentTime = time.time()
877 f = urllib2.urlopen(link)
878 #entry["content"] = f.read()
881 soup = BeautifulSoup(html)
885 filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
887 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
889 contentLink = configdir+self.key+".d/"+id+".html"
890 file = open(contentLink, "w")
891 file.write(soup.prettify())
894 self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
896 return (currentTime, None, None)
898 def purgeReadArticles(self):
899 rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
902 self.removeArticle(row[0])
904 def removeArticle(self, id):
905 rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
908 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
915 class Listing(BaseObject):
917 cached_columns = (('feeds', 'updateTime'),
920 ('categories', 'title'))
925 except AttributeError:
926 db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
929 db = property(_getdb)
931 # Lists all the feeds in a dictionary, and expose the data
932 def __init__(self, config, configdir):
934 self.configdir = configdir
936 self.tls = threading.local ()
939 table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
941 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
942 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
943 self.addCategory("Default Category")
944 if isfile(self.configdir+"feeds.pickle"):
945 self.importOldFormatFeeds()
947 self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
949 from string import find, upper
950 if find(upper(table[0]), "WIDGET")<0:
951 self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
952 self.db.execute("UPDATE feeds SET widget=1;")
954 if find(upper(table[0]), "CATEGORY")<0:
955 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
956 self.addCategory("Default Category")
957 self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
958 self.db.execute("UPDATE feeds SET category=1;")
963 # Check that Woodchuck's state is up to date with respect our
965 updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
966 wc_init (self, True if updater else False)
967 if wc().available() and updater:
968 # The list of known streams.
969 streams = wc().streams_list ()
970 stream_ids = [s.identifier for s in streams]
972 # Register any unknown streams. Remove known streams from
974 for key in self.getListOfFeeds():
975 title = self.getFeedTitle(key)
976 # XXX: We should also check whether the list of
977 # articles/objects in each feed/stream is up to date.
978 if key not in stream_ids:
980 "Registering previously unknown channel: %s (%s)"
982 # Use a default refresh interval of 6 hours.
983 wc().stream_register (key, title, 6 * 60 * 60)
985 # Make sure the human readable name is up to date.
986 if wc()[key].human_readable_name != title:
987 wc()[key].human_readable_name = title
988 stream_ids.remove (key)
991 # Unregister any streams that are no longer subscribed to.
992 for id in stream_ids:
993 logger.debug("Unregistering %s" % (id,))
994 w.stream_unregister (id)
996 def importOldFormatFeeds(self):
997 """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
999 listing = rss.Listing(self.configdir)
1001 for id in listing.getListOfFeeds():
1004 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
1005 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
1008 feed = listing.getFeed(id)
1009 new_feed = self.getFeed(id)
1011 items = feed.getIds()[:]
1014 if feed.isEntryRead(item):
1018 date = timegm(feed.getDateTuple(item))
1019 title = feed.getTitle(item)
1020 newId = new_feed.generateUniqueId({"date":date, "title":title})
1021 values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
1022 new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
1023 new_feed.db.commit()
1025 images = feed.getImages(item)
1026 for image in images:
1027 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
1028 new_feed.db.commit()
1031 self.updateUnread(id)
1033 logger.error("importOldFormatFeeds: %s"
1034 % (traceback.format_exc(),))
1035 remove(self.configdir+"feeds.pickle")
1038 def addArchivedArticle(self, key, index):
1039 feed = self.getFeed(key)
1040 title = feed.getTitle(index)
1041 link = feed.getExternalLink(index)
1042 date = feed.getDate(index)
1043 count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
1045 self.addFeed("Archived Articles", "", id="ArchivedArticles")
1047 archFeed = self.getFeed("ArchivedArticles")
1048 archFeed.addArchivedArticle(title, link, date, self.configdir)
1049 self.updateUnread("ArchivedArticles")
1051 def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
1053 if expiryTime is None:
1054 expiryTime = self.config.getExpiry()
1056 # Default to 24 hours
1059 (use_proxy, proxy) = self.config.getProxy()
1062 if imageCache is None:
1063 imageCache = self.config.getImageCache()
1065 feed = self.getFeed(key)
1066 (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
1068 modified = time.struct_time(eval(modified))
1072 self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
1073 priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
1075 def _queuePostFeedUpdate(self, *args, **kwargs):
1076 mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
1078 def _postFeedUpdate(self, key, updateTime, etag, modified, title):
1082 modified=str(tuple(modified))
1084 self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
1086 self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
1088 if title is not None:
1089 self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
1092 self.cache_invalidate('feeds')
1093 self.updateUnread(key)
1095 update_server_object().ArticleCountUpdated()
1097 stats = JobManager().stats()
1098 global jobs_at_start
1099 completed = stats['jobs-completed'] - jobs_at_start
1100 in_progress = stats['jobs-in-progress']
1101 queued = stats['jobs-queued']
1103 percent = (100 * ((completed + in_progress / 2.))
1104 / (completed + in_progress + queued))
1106 update_server_object().UpdateProgress(
1107 percent, completed, in_progress, queued, 0, 0, 0, key)
1109 if in_progress == 0 and queued == 0:
1110 jobs_at_start = stats['jobs-completed']
1112 def getFeed(self, key):
1113 if key == "ArchivedArticles":
1114 return ArchivedArticles(self.configdir, key)
1115 return Feed(self.configdir, key)
1117 def editFeed(self, key, title, url, category=None):
1119 self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
1121 self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
1123 self.cache_invalidate('feeds')
1125 if wc().available():
1127 wc()[key].human_readable_name = title
1129 logger.debug("Feed %s (%s) unknown." % (key, title))
1131 def getFeedUpdateTime(self, key):
1132 update_time = self.lookup('feeds', 'updateTime', key)
1137 delta = time.time() - update_time
1139 delta_hours = delta / (60. * 60.)
1140 if delta_hours < .1:
1141 return "A few minutes ago"
1142 if delta_hours < .75:
1143 return "Less than an hour ago"
1144 if delta_hours < 1.5:
1145 return "About an hour ago"
1146 if delta_hours < 18:
1147 return "About %d hours ago" % (int(delta_hours + 0.5),)
1149 delta_days = delta_hours / 24.
1150 if delta_days < 1.5:
1151 return "About a day ago"
1153 return "%d days ago" % (int(delta_days + 0.5),)
1155 delta_weeks = delta_days / 7.
1156 if delta_weeks <= 8:
1157 return "%d weeks ago" % int(delta_weeks + 0.5)
1159 delta_months = delta_days / 30.
1160 if delta_months <= 30:
1161 return "%d months ago" % int(delta_months + 0.5)
1163 return time.strftime("%x", time.gmtime(update_time))
1165 def getFeedNumberOfUnreadItems(self, key):
1166 return self.lookup('feeds', 'unread', key)
1168 def getFeedTitle(self, key):
1169 title = self.lookup('feeds', 'title', key)
1173 return self.getFeedUrl(key)
1175 def getFeedUrl(self, key):
1176 return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1178 def getFeedCategory(self, key):
1179 return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1181 def getListOfFeeds(self, category=None):
1183 rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
1185 rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
1192 def getListOfCategories(self):
1193 return list(row[0] for row in self.db.execute(
1194 "SELECT id FROM categories ORDER BY rank;"))
1196 def getCategoryTitle(self, id):
1197 return self.lookup('categories', 'title', id)
1199 def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
1200 if order == "Most unread":
1201 tmp = "ORDER BY unread DESC"
1202 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
1203 elif order == "Least unread":
1204 tmp = "ORDER BY unread"
1205 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
1206 elif order == "Most recent":
1207 tmp = "ORDER BY updateTime DESC"
1208 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
1209 elif order == "Least recent":
1210 tmp = "ORDER BY updateTime"
1211 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
1212 else: # order == "Manual" or invalid value...
1213 tmp = "ORDER BY rank"
1214 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
1216 sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp
1218 sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
1219 rows = self.db.execute(sql)
1226 def getFavicon(self, key):
1227 filename = "%s%s.d/favicon.ico" % (self.configdir, key)
1228 if isfile(filename):
1233 def updateUnread(self, key):
1234 feed = self.getFeed(key)
1235 self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
1237 self.cache_invalidate('feeds')
1239 def addFeed(self, title, url, id=None, category=1):
1242 count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
1244 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1245 if max_rank == None:
1247 values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
1248 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
1250 # Ask for the feed object, it will create the necessary tables
1253 if wc().available():
1254 # Register the stream with Woodchuck. Update approximately
1256 wc().stream_register(stream_identifier=id,
1257 human_readable_name=title,
1264 def addCategory(self, title):
1265 rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
1268 id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
1271 self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
1274 def removeFeed(self, key):
1275 if wc().available ():
1279 logger.debug("Removing unregistered feed %s failed" % (key,))
1281 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
1282 self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
1283 self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
1286 if isdir(self.configdir+key+".d/"):
1287 rmtree(self.configdir+key+".d/")
1289 def removeCategory(self, key):
1290 if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
1291 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
1292 self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
1293 self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
1294 self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
1297 #def saveConfig(self):
1298 # self.listOfFeeds["feedingit-order"] = self.sortedKeys
1299 # file = open(self.configdir+"feeds.pickle", "w")
1300 # pickle.dump(self.listOfFeeds, file)
1303 def moveUp(self, key):
1304 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1306 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
1307 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
1310 def moveCategoryUp(self, key):
1311 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1313 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
1314 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
1317 def moveDown(self, key):
1318 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1319 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1321 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
1322 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
1325 def moveCategoryDown(self, key):
1326 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1327 max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
1329 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
1330 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )