1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # Copyright (c) 2011 Neal H. Walfield
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Lesser General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU Lesser General Public License for more details.
16 # You should have received a copy of the GNU Lesser General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 # ============================================================================
22 # Author : Yves Marcoz
24 # Description : Simple RSS Reader
25 # ============================================================================
27 from __future__ import with_statement
30 from os.path import isfile, isdir
31 from shutil import rmtree
32 from os import mkdir, remove, utime
38 from BeautifulSoup import BeautifulSoup
39 from urlparse import urljoin
40 from calendar import timegm
43 from wc import wc, wc_init, woodchuck
46 from updatedbus import update_server_object
48 from jobmanager import JobManager
50 from httpprogresshandler import HTTPProgressHandler
54 logger = logging.getLogger(__name__)
57 return md5.new(string).hexdigest()
59 def download_callback(connection):
60 if JobManager().do_quit:
61 raise KeyboardInterrupt
63 def downloader(progress_handler=None, proxy=None):
66 if progress_handler is not None:
67 openers.append(progress_handler)
69 openers.append(HTTPProgressHandler(download_callback))
74 return urllib2.build_opener(*openers)
76 def transfer_stats(sent, received, **kwargs):
78 This function takes two arguments: sent is the number of bytes
79 sent so far, received is the number of bytes received. The
80 function returns a continuation that you can call later.
82 The continuation takes the same two arguments. It returns a tuple
83 of the number of bytes sent, the number of bytes received and the
84 time since the original function was invoked.
86 start_time = time.time()
88 start_received = received
90 def e(sent, received, **kwargs):
91 return (sent - start_sent,
92 received - start_received,
93 time.time() - start_time)
97 # If not None, a subprocess.Popen object corresponding to a
98 # update_feeds.py process.
99 update_feed_process = None
101 update_feeds_iface = None
105 class BaseObject(object):
106 # Columns to cache. Classes that inherit from this and use the
107 # cache mechanism should set this to a list of tuples, each of
108 # which contains two entries: the table and the column. Note that
109 # both are case sensitive.
112 def cache_invalidate(self, table=None):
114 Invalidate the cache.
116 If table is not None, invalidate only the specified table.
117 Otherwise, drop the whole cache.
119 if not hasattr(self, 'cache'):
125 if table in self.cache:
126 del self.cache[table]
128 def lookup(self, table, column, id=None):
130 Look up a column or value. Uses a cache for columns in
131 cached_columns. Note: the column is returned unsorted.
133 if not hasattr(self, 'cache'):
136 # Cache data for at most 60 seconds.
139 cache = self.cache[table]
141 if time.time() - cache[None] > 60:
142 self.cache[table].clear()
147 or (table, column) not in self.cached_columns):
148 # The cache is empty or the caller wants a column that we
150 if (table, column) in self.cached_columns:
153 self.cache[table] = cache = {}
155 for t, c in self.cached_columns:
167 where = "where id = '%s'" % id
171 results = self.db.execute(
172 "SELECT %s FROM %s %s" % (','.join(columns), table, where))
178 for index, value in enumerate(values):
179 cache[columns[index]][i] = value
188 results.append(values[0])
192 cache = self.cache[table]
196 return cache[column][id]
198 return cache[column].values()
202 class Feed(BaseObject):
204 cached_columns = (('feed', 'read'),
207 serial_execution_lock = threading.Lock()
212 except AttributeError:
213 db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
216 db = property(_getdb)
218 def __init__(self, configdir, key):
220 self.configdir = configdir
221 self.dir = "%s/%s.d" %(self.configdir, self.key)
222 self.tls = threading.local ()
224 if not isdir(self.dir):
226 if not isfile("%s/%s.db" %(self.dir, self.key)):
227 self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
228 self.db.execute("CREATE TABLE images (id text, imagePath text);")
231 def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
232 filename = configdir+key+".d/"+getId(url)
233 if not isfile(filename):
236 opener = downloader(proxy=proxy)
238 abs_url = urljoin(baseurl,url)
239 f = opener.open(abs_url)
241 with open(filename, "w") as outf:
246 except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
247 logger.info("Could not download image %s: %s"
248 % (abs_url, str (exception)))
251 exception = sys.exc_info()[0]
253 logger.info("Downloading image %s: %s" %
254 (abs_url, traceback.format_exc()))
262 #open(filename,"a").close() # "Touch" the file
263 file = open(filename,"a")
264 utime(filename, None)
268 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
269 if (os.path.basename(sys.argv[0]) == 'update_feeds.py'):
272 self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
274 JobManager().execute(doit(), self.key, priority=priority)
276 def send_update_request():
277 global update_feeds_iface
278 if update_feeds_iface is None:
279 bus=dbus.SessionBus()
280 remote_object = bus.get_object(
281 "org.marcoz.feedingit", # Connection name
282 "/org/marcoz/feedingit/update" # Object's path
284 update_feeds_iface = dbus.Interface(
285 remote_object, 'org.marcoz.feedingit')
288 update_feeds_iface.Update(self.key)
290 logger.error("Invoking org.marcoz.feedingit.Update: %s"
292 update_feeds_iface = None
296 if send_update_request():
297 # Success! It seems we were able to start the update
298 # daemon via dbus (or, it was already running).
301 global update_feed_process
302 if (update_feed_process is None
303 or update_feed_process.poll() is not None):
304 # The update_feeds process is not running. Start it.
305 update_feeds = os.path.join(os.path.dirname(__file__),
307 argv = ['/usr/bin/env', 'python', update_feeds, '--daemon' ]
308 logger.debug("Starting update_feeds: running %s"
310 update_feed_process = subprocess.Popen(argv)
311 # Make sure the dbus calls go to the right process:
313 update_feeds_iface = None
316 if send_update_request():
320 def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
321 logger.debug("Updating %s" % url)
324 have_serial_execution_lock = False
326 update_start = time.time ()
328 progress_handler = HTTPProgressHandler(download_callback)
330 openers = [progress_handler]
332 openers.append (proxy)
333 kwargs = {'handlers':openers}
335 feed_transfer_stats = transfer_stats(0, 0)
337 tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
338 download_duration = time.time () - update_start
340 opener = downloader(progress_handler, proxy)
342 if JobManager().do_quit:
343 raise KeyboardInterrupt
345 process_start = time.time()
347 # Expiry time is in hours
348 expiry = float(expiryTime) * 3600.
354 wc().stream_register (self.key, "", 6 * 60 * 60)
355 except woodchuck.ObjectExistsError:
358 wc()[self.key].updated (
359 indicator=(woodchuck.Indicator.ApplicationVisual
360 |woodchuck.Indicator.StreamWide),
361 transferred_down=progress_handler.stats['received'],
362 transferred_up=progress_handler.stats['sent'],
363 transfer_time=update_start,
364 transfer_duration=download_duration,
365 new_objects=len (tmp.entries),
366 objects_inline=len (tmp.entries))
369 "Failed to register update of %s with woodchuck!"
372 http_status = tmp.get ('status', 200)
374 # Check if the parse was succesful. If the http status code
375 # is 304, then the download was successful, but there is
376 # nothing new. Indeed, no content is returned. This make a
377 # 304 look like an error because there are no entries and the
378 # parse fails. But really, everything went great! Check for
380 if http_status == 304:
381 logger.debug("%s: No changes to feed." % (self.key,))
382 mainthread.execute(wc_success, async=True)
384 elif len(tmp["entries"])==0 and not tmp.version:
385 # An error occured fetching or parsing the feed. (Version
386 # will be either None if e.g. the connection timed our or
387 # '' if the data is not a proper feed)
389 "Error fetching %s: version is: %s: error: %s"
390 % (url, str (tmp.version),
391 str (tmp.get ('bozo_exception', 'Unknown error'))))
393 def register_stream_update_failed(http_status):
395 logger.debug("%s: stream update failed!" % self.key)
398 # It's not easy to get the feed's title from here.
399 # At the latest, the next time the application is
400 # started, we'll fix up the human readable name.
401 wc().stream_register (self.key, "", 6 * 60 * 60)
402 except woodchuck.ObjectExistsError:
404 ec = woodchuck.TransferStatus.TransientOther
405 if 300 <= http_status and http_status < 400:
406 ec = woodchuck.TransferStatus.TransientNetwork
407 if 400 <= http_status and http_status < 500:
408 ec = woodchuck.TransferStatus.FailureGone
409 if 500 <= http_status and http_status < 600:
410 ec = woodchuck.TransferStatus.TransientNetwork
411 wc()[self.key].update_failed(ec)
415 register_stream_update_failed(
416 http_status=http_status),
419 currentTime = time.time()
420 # The etag and modified value should only be updated if the content was not null
426 modified = tmp["modified"]
430 abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
431 f = opener.open(abs_url)
434 outf = open(self.dir+"/favicon.ico", "w")
438 except (urllib2.HTTPError, urllib2.URLError), exception:
439 logger.debug("Could not download favicon %s: %s"
440 % (abs_url, str (exception)))
442 self.serial_execution_lock.acquire ()
443 have_serial_execution_lock = True
445 #reversedEntries = self.getEntries()
446 #reversedEntries.reverse()
450 tmp["entries"].reverse()
451 for entry in tmp["entries"]:
452 # Yield so as to make the main thread a bit more
456 entry_transfer_stats = transfer_stats(
457 *feed_transfer_stats(**progress_handler.stats)[0:2])
459 if JobManager().do_quit:
460 raise KeyboardInterrupt
464 date = self.extractDate(entry)
468 entry["title"] = "No Title"
476 entry["author"] = None
477 if(not(entry.has_key("id"))):
479 content = self.extractContent(entry)
480 object_size = len (content)
481 tmpEntry = {"title":entry["title"], "content":content,
482 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
483 id = self.generateUniqueId(tmpEntry)
485 #articleTime = time.mktime(self.entries[id]["dateTuple"])
486 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
488 baseurl = tmpEntry["link"]
490 if imageCache and len(images) > 0:
491 self.serial_execution_lock.release ()
492 have_serial_execution_lock = False
494 filename = self.addImage(
495 configdir, self.key, baseurl, img['src'],
498 img['src']="file://%s" %filename
499 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
501 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
505 object_size += os.path.getsize (filename)
506 except os.error, exception:
507 logger.error ("Error getting size of %s: %s"
508 % (filename, exception))
509 self.serial_execution_lock.acquire ()
510 have_serial_execution_lock = True
512 tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
513 file = open(tmpEntry["contentLink"], "w")
514 file.write(soup.prettify())
517 self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
520 values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
521 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
525 # self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
527 # filename = configdir+self.key+".d/"+id+".html"
528 # file = open(filename,"a")
529 # utime(filename, None)
531 # images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
532 # for image in images:
533 # file = open(image[0],"a")
534 # utime(image[0], None)
539 # Register the object with Woodchuck and mark it as
541 def register_object_transferred(
542 id, title, publication_time,
543 sent, received, object_size):
545 logger.debug("Registering transfer of object %s"
548 obj = wc()[self.key].object_register(
549 object_identifier=id,
550 human_readable_name=title)
551 except woodchuck.ObjectExistsError:
552 obj = wc()[self.key][id]
554 obj.publication_time = publication_time
557 woodchuck.Indicator.ApplicationVisual
558 |woodchuck.Indicator.StreamWide),
559 transferred_down=received,
561 object_size=object_size)
564 # If the entry does not contain a publication
565 # time, the attribute won't exist.
566 pubtime = entry.get('date_parsed', None)
568 publication_time = time.mktime (pubtime)
570 publication_time = None
573 = entry_transfer_stats(**progress_handler.stats)
574 # sent and received are for objects (in
575 # particular, images) associated with this
576 # item. We also want to attribute the data
577 # transferred for the item's content. This is
578 # a good first approximation.
579 received += len(content)
582 register_object_transferred(
584 title=tmpEntry["title"],
585 publication_time=publication_time,
586 sent=sent, received=received,
587 object_size=object_size),
592 = feed_transfer_stats(**progress_handler.stats)
594 "%s: Update successful: transferred: %d/%d; objects: %d)"
595 % (url, sent, received, len (tmp.entries)))
596 mainthread.execute (wc_success, async=True)
599 rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
601 self.removeEntry(row[0])
603 from glob import glob
605 for file in glob(configdir+self.key+".d/*"):
609 # put the two dates into matching format
611 lastmodDate = stats[8]
613 expDate = time.time()-expiry*3
614 # check if image-last-modified-date is outdated
616 if expDate > lastmodDate:
620 #print 'Removing', file
622 # XXX: Tell woodchuck.
623 remove(file) # commented out for testing
625 except OSError, exception:
627 logger.error('Could not remove %s: %s'
628 % (file, str (exception)))
629 logger.debug("updated %s: %fs in download, %fs in processing"
630 % (self.key, download_duration,
631 time.time () - process_start))
633 logger.error("Updating %s: %s" % (self.key, traceback.format_exc()))
637 if have_serial_execution_lock:
638 self.serial_execution_lock.release ()
642 rows = self.db.execute("SELECT MAX(date) FROM feed;")
646 logger.error("Fetching update time: %s: %s"
647 % (str(e), traceback.format_exc()))
654 title = tmp.feed.title
655 except (AttributeError, UnboundLocalError), exception:
657 if postFeedUpdateFunc is not None:
658 postFeedUpdateFunc (self.key, updateTime, etag, modified,
659 title, *postFeedUpdateFuncArgs)
661 self.cache_invalidate()
663 def setEntryRead(self, id):
664 self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
669 wc()[self.key][id].used()
673 mainthread.execute(doit, async=True)
674 self.cache_invalidate('feed')
676 def setEntryUnread(self, id):
677 self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
679 self.cache_invalidate('feed')
681 def markAllAsRead(self):
682 self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
684 self.cache_invalidate('feed')
686 def isEntryRead(self, id):
687 return self.lookup('feed', 'read', id) == 1
689 def getTitle(self, id):
690 return self.lookup('feed', 'title', id)
692 def getContentLink(self, id):
693 return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
695 def getExternalLink(self, id):
696 return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
698 def getDate(self, id):
699 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
700 return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
702 def getDateTuple(self, id):
703 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
704 return time.localtime(dateStamp)
706 def getDateStamp(self, id):
707 return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
709 def generateUniqueId(self, entry):
711 Generate a stable identifier for the article. For the same
712 entry, this should result in the same identifier. If
713 possible, the identifier should remain the same even if the
716 # Prefer the entry's id, which is supposed to be globally
718 key = entry.get('id', None)
720 # Next, try the link to the content.
721 key = entry.get('link', None)
723 # Ok, the title and the date concatenated are likely to be
725 key = entry.get('title', None) + entry.get('date', None)
727 # Hmm, the article's content will at least guarantee no
728 # false negatives (i.e., missing articles)
729 key = entry.get('content', None)
731 # If all else fails, just use a random number.
732 key = str (random.random ())
735 def getIds(self, onlyUnread=False):
737 rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
739 rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
746 def getNextId(self, id, forward=True):
752 index = ids.index(id)
753 return ids[(index + delta) % len(ids)]
755 def getPreviousId(self, id):
756 return self.getNextId(id, forward=False)
758 def getNumberOfUnreadItems(self):
759 return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
761 def getNumberOfEntries(self):
762 return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
764 def getArticle(self, entry):
765 #self.setEntryRead(id)
766 #entry = self.entries[id]
767 title = entry['title']
768 #content = entry.get('content', entry.get('summary_detail', {}))
769 content = entry["content"]
772 author = entry['author']
773 date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
775 #text = '''<div style="color: black; background-color: white;">'''
776 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
777 text += "<html><head><title>" + title + "</title>"
778 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
779 #text += '<style> body {-webkit-user-select: none;} </style>'
780 text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
782 text += "<BR /><small><i>Author: " + author + "</i></small>"
783 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
784 text += "<BR /><BR />"
786 text += "</body></html>"
789 def getContent(self, id):
790 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
792 file = open(self.entries[id]["contentLink"])
793 content = file.read()
796 content = "Content unavailable"
799 def extractDate(self, entry):
800 if entry.has_key("updated_parsed"):
801 return timegm(entry["updated_parsed"])
802 elif entry.has_key("published_parsed"):
803 return timegm(entry["published_parsed"])
807 def extractContent(self, entry):
809 if entry.has_key('summary'):
810 content = entry.get('summary', '')
811 if entry.has_key('content'):
812 if len(entry.content[0].value) > len(content):
813 content = entry.content[0].value
815 content = entry.get('description', '')
818 def removeEntry(self, id):
819 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
823 except OSError, exception:
824 logger.error("Deleting %s: %s" % (contentLink, str (exception)))
825 self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
826 self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
831 wc()[self.key][id].files_deleted (
832 woodchuck.DeletionResponse.Deleted)
833 del wc()[self.key][id]
837 mainthread.execute (doit, async=True)
839 class ArchivedArticles(Feed):
840 def addArchivedArticle(self, title, link, date, configdir):
841 id = self.generateUniqueId({"date":date, "title":title})
842 values = (id, title, link, date, 0, link, 0)
843 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
846 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
848 rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
850 currentTime = time.time()
853 f = urllib2.urlopen(link)
854 #entry["content"] = f.read()
857 soup = BeautifulSoup(html)
861 filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
863 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
865 contentLink = configdir+self.key+".d/"+id+".html"
866 file = open(contentLink, "w")
867 file.write(soup.prettify())
870 self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
872 return (currentTime, None, None)
874 def purgeReadArticles(self):
875 rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
878 self.removeArticle(row[0])
880 def removeArticle(self, id):
881 rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
884 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
891 class Listing(BaseObject):
893 cached_columns = (('feeds', 'updateTime'),
896 ('categories', 'title'))
901 except AttributeError:
902 db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
905 db = property(_getdb)
907 # Lists all the feeds in a dictionary, and expose the data
908 def __init__(self, config, configdir):
910 self.configdir = configdir
912 self.tls = threading.local ()
915 table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
917 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
918 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
919 self.addCategory("Default Category")
920 if isfile(self.configdir+"feeds.pickle"):
921 self.importOldFormatFeeds()
923 self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
925 from string import find, upper
926 if find(upper(table[0]), "WIDGET")<0:
927 self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
928 self.db.execute("UPDATE feeds SET widget=1;")
930 if find(upper(table[0]), "CATEGORY")<0:
931 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
932 self.addCategory("Default Category")
933 self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
934 self.db.execute("UPDATE feeds SET category=1;")
939 # Check that Woodchuck's state is up to date with respect our
941 updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
942 wc_init (self, True if updater else False)
943 if wc().available() and updater:
944 # The list of known streams.
945 streams = wc().streams_list ()
946 stream_ids = [s.identifier for s in streams]
948 # Register any unknown streams. Remove known streams from
950 for key in self.getListOfFeeds():
951 title = self.getFeedTitle(key)
952 # XXX: We should also check whether the list of
953 # articles/objects in each feed/stream is up to date.
954 if key not in stream_ids:
956 "Registering previously unknown channel: %s (%s)"
958 # Use a default refresh interval of 6 hours.
959 wc().stream_register (key, title, 6 * 60 * 60)
961 # Make sure the human readable name is up to date.
962 if wc()[key].human_readable_name != title:
963 wc()[key].human_readable_name = title
964 stream_ids.remove (key)
967 # Unregister any streams that are no longer subscribed to.
968 for id in stream_ids:
969 logger.debug("Unregistering %s" % (id,))
970 w.stream_unregister (id)
972 def importOldFormatFeeds(self):
973 """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
975 listing = rss.Listing(self.configdir)
977 for id in listing.getListOfFeeds():
980 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
981 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
984 feed = listing.getFeed(id)
985 new_feed = self.getFeed(id)
987 items = feed.getIds()[:]
990 if feed.isEntryRead(item):
994 date = timegm(feed.getDateTuple(item))
995 title = feed.getTitle(item)
996 newId = new_feed.generateUniqueId({"date":date, "title":title})
997 values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
998 new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
1001 images = feed.getImages(item)
1002 for image in images:
1003 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
1004 new_feed.db.commit()
1007 self.updateUnread(id)
1009 logger.error("importOldFormatFeeds: %s"
1010 % (traceback.format_exc(),))
1011 remove(self.configdir+"feeds.pickle")
1014 def addArchivedArticle(self, key, index):
1015 feed = self.getFeed(key)
1016 title = feed.getTitle(index)
1017 link = feed.getExternalLink(index)
1018 date = feed.getDate(index)
1019 count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
1021 self.addFeed("Archived Articles", "", id="ArchivedArticles")
1023 archFeed = self.getFeed("ArchivedArticles")
1024 archFeed.addArchivedArticle(title, link, date, self.configdir)
1025 self.updateUnread("ArchivedArticles")
1027 def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
1029 if expiryTime is None:
1030 expiryTime = self.config.getExpiry()
1032 # Default to 24 hours
1035 (use_proxy, proxy) = self.config.getProxy()
1038 if imageCache is None:
1039 imageCache = self.config.getImageCache()
1041 feed = self.getFeed(key)
1042 (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
1044 modified = time.struct_time(eval(modified))
1048 self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
1049 priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
1051 def _queuePostFeedUpdate(self, *args, **kwargs):
1052 mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
1054 def _postFeedUpdate(self, key, updateTime, etag, modified, title):
1058 modified=str(tuple(modified))
1060 self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
1062 self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
1064 if title is not None:
1065 self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
1068 self.cache_invalidate('feeds')
1069 self.updateUnread(key)
1071 update_server_object().ArticleCountUpdated()
1073 stats = JobManager().stats()
1074 global jobs_at_start
1075 completed = stats['jobs-completed'] - jobs_at_start
1076 in_progress = stats['jobs-in-progress']
1077 queued = stats['jobs-queued']
1079 percent = (100 * ((completed + in_progress / 2.))
1080 / (completed + in_progress + queued))
1082 update_server_object().UpdateProgress(
1083 percent, completed, in_progress, queued, 0, 0, 0, key)
1085 if in_progress == 0 and queued == 0:
1086 jobs_at_start = stats['jobs-completed']
1088 def getFeed(self, key):
1089 if key == "ArchivedArticles":
1090 return ArchivedArticles(self.configdir, key)
1091 return Feed(self.configdir, key)
1093 def editFeed(self, key, title, url, category=None):
1095 self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
1097 self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
1099 self.cache_invalidate('feeds')
1101 if wc().available():
1103 wc()[key].human_readable_name = title
1105 logger.debug("Feed %s (%s) unknown." % (key, title))
1107 def getFeedUpdateTime(self, key):
1108 update_time = self.lookup('feeds', 'updateTime', key)
1113 delta = time.time() - update_time
1115 delta_hours = delta / (60. * 60.)
1116 if delta_hours < .1:
1117 return "A few minutes ago"
1118 if delta_hours < .75:
1119 return "Less than an hour ago"
1120 if delta_hours < 1.5:
1121 return "About an hour ago"
1122 if delta_hours < 18:
1123 return "About %d hours ago" % (int(delta_hours + 0.5),)
1125 delta_days = delta_hours / 24.
1126 if delta_days < 1.5:
1127 return "About a day ago"
1129 return "%d days ago" % (int(delta_days + 0.5),)
1131 delta_weeks = delta_days / 7.
1132 if delta_weeks <= 8:
1133 return "%d weeks ago" % int(delta_weeks + 0.5)
1135 delta_months = delta_days / 30.
1136 if delta_months <= 30:
1137 return "%d months ago" % int(delta_months + 0.5)
1139 return time.strftime("%x", time.gmtime(update_time))
1141 def getFeedNumberOfUnreadItems(self, key):
1142 return self.lookup('feeds', 'unread', key)
1144 def getFeedTitle(self, key):
1145 title = self.lookup('feeds', 'title', key)
1149 return self.getFeedUrl(key)
1151 def getFeedUrl(self, key):
1152 return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1154 def getFeedCategory(self, key):
1155 return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1157 def getListOfFeeds(self, category=None):
1159 rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
1161 rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
1168 def getListOfCategories(self):
1169 return list(row[0] for row in self.db.execute(
1170 "SELECT id FROM categories ORDER BY rank;"))
1172 def getCategoryTitle(self, id):
1173 return self.lookup('categories', 'title', id)
1175 def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
1176 if order == "Most unread":
1177 tmp = "ORDER BY unread DESC"
1178 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
1179 elif order == "Least unread":
1180 tmp = "ORDER BY unread"
1181 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
1182 elif order == "Most recent":
1183 tmp = "ORDER BY updateTime DESC"
1184 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
1185 elif order == "Least recent":
1186 tmp = "ORDER BY updateTime"
1187 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
1188 else: # order == "Manual" or invalid value...
1189 tmp = "ORDER BY rank"
1190 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
1192 sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp
1194 sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
1195 rows = self.db.execute(sql)
1202 def getFavicon(self, key):
1203 filename = "%s%s.d/favicon.ico" % (self.configdir, key)
1204 if isfile(filename):
1209 def updateUnread(self, key):
1210 feed = self.getFeed(key)
1211 self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
1213 self.cache_invalidate('feeds')
1215 def addFeed(self, title, url, id=None, category=1):
1218 count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
1220 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1221 if max_rank == None:
1223 values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
1224 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
1226 # Ask for the feed object, it will create the necessary tables
1229 if wc().available():
1230 # Register the stream with Woodchuck. Update approximately
1232 wc().stream_register(stream_identifier=id,
1233 human_readable_name=title,
1240 def addCategory(self, title):
1241 rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
1244 id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
1247 self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
1250 def removeFeed(self, key):
1251 if wc().available ():
1255 logger.debug("Removing unregistered feed %s failed" % (key,))
1257 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
1258 self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
1259 self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
1262 if isdir(self.configdir+key+".d/"):
1263 rmtree(self.configdir+key+".d/")
1265 def removeCategory(self, key):
1266 if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
1267 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
1268 self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
1269 self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
1270 self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
1273 #def saveConfig(self):
1274 # self.listOfFeeds["feedingit-order"] = self.sortedKeys
1275 # file = open(self.configdir+"feeds.pickle", "w")
1276 # pickle.dump(self.listOfFeeds, file)
1279 def moveUp(self, key):
1280 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1282 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
1283 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
1286 def moveCategoryUp(self, key):
1287 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1289 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
1290 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
1293 def moveDown(self, key):
1294 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1295 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1297 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
1298 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
1301 def moveCategoryDown(self, key):
1302 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1303 max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
1305 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
1306 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )