1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # Copyright (c) 2011 Neal H. Walfield
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Lesser General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU Lesser General Public License for more details.
16 # You should have received a copy of the GNU Lesser General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 # ============================================================================
22 # Author : Yves Marcoz
24 # Description : Simple RSS Reader
25 # ============================================================================
27 from __future__ import with_statement
30 from os.path import isfile, isdir
31 from shutil import rmtree
32 from os import mkdir, remove, utime
38 from BeautifulSoup import BeautifulSoup
39 from urlparse import urljoin
40 from calendar import timegm
43 from wc import wc, wc_init, woodchuck
46 from updatedbus import update_server_object
48 from jobmanager import JobManager
50 from httpprogresshandler import HTTPProgressHandler
54 logger = logging.getLogger(__name__)
57 return md5.new(string).hexdigest()
59 def download_callback(connection):
60 if JobManager().do_quit:
61 raise KeyboardInterrupt
63 def downloader(progress_handler=None, proxy=None):
66 if progress_handler is not None:
67 openers.append(progress_handler)
69 openers.append(HTTPProgressHandler(download_callback))
74 return urllib2.build_opener(*openers)
76 # If not None, a subprocess.Popen object corresponding to a
77 # update_feeds.py process.
78 update_feed_process = None
80 update_feeds_iface = None
84 class BaseObject(object):
85 # Columns to cache. Classes that inherit from this and use the
86 # cache mechanism should set this to a list of tuples, each of
87 # which contains two entries: the table and the column. Note that
88 # both are case sensitive.
91 def cache_invalidate(self, table=None):
95 If table is not None, invalidate only the specified table.
96 Otherwise, drop the whole cache.
98 if not hasattr(self, 'cache'):
104 if table in self.cache:
105 del self.cache[table]
107 def lookup(self, table, column, id=None):
109 Look up a column or value. Uses a cache for columns in
110 cached_columns. Note: the column is returned unsorted.
112 if not hasattr(self, 'cache'):
115 # Cache data for at most 60 seconds.
118 cache = self.cache[table]
120 if time.time() - cache[None] > 60:
121 self.cache[table].clear()
126 or (table, column) not in self.cached_columns):
127 # The cache is empty or the caller wants a column that we
129 if (table, column) in self.cached_columns:
132 self.cache[table] = cache = {}
134 for t, c in self.cached_columns:
146 where = "where id = '%s'" % id
150 results = self.db.execute(
151 "SELECT %s FROM %s %s" % (','.join(columns), table, where))
157 for index, value in enumerate(values):
158 cache[columns[index]][i] = value
167 results.append(values[0])
171 cache = self.cache[table]
175 return cache[column][id]
177 return cache[column].values()
181 class Feed(BaseObject):
183 cached_columns = (('feed', 'read'),
186 serial_execution_lock = threading.Lock()
191 except AttributeError:
192 db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
195 db = property(_getdb)
197 def __init__(self, configdir, key):
199 self.configdir = configdir
200 self.dir = "%s/%s.d" %(self.configdir, self.key)
201 self.tls = threading.local ()
203 if not isdir(self.dir):
205 if not isfile("%s/%s.db" %(self.dir, self.key)):
206 self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
207 self.db.execute("CREATE TABLE images (id text, imagePath text);")
210 def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
211 filename = configdir+key+".d/"+getId(url)
212 if not isfile(filename):
215 opener = downloader(proxy=proxy)
217 abs_url = urljoin(baseurl,url)
218 f = opener.open(abs_url)
220 with open(filename, "w") as outf:
225 except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
226 logger.info("Could not download image %s: %s"
227 % (abs_url, str (exception)))
230 exception = sys.exc_info()[0]
232 logger.info("Downloading image %s: %s" %
233 (abs_url, traceback.format_exc()))
241 #open(filename,"a").close() # "Touch" the file
242 file = open(filename,"a")
243 utime(filename, None)
247 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
248 if (os.path.basename(sys.argv[0]) == 'update_feeds.py'):
251 self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
253 JobManager().execute(doit(), self.key, priority=priority)
255 def send_update_request():
256 global update_feeds_iface
257 if update_feeds_iface is None:
258 bus=dbus.SessionBus()
259 remote_object = bus.get_object(
260 "org.marcoz.feedingit", # Connection name
261 "/org/marcoz/feedingit/update" # Object's path
263 update_feeds_iface = dbus.Interface(
264 remote_object, 'org.marcoz.feedingit')
267 update_feeds_iface.Update(self.key)
269 logger.error("Invoking org.marcoz.feedingit.Update: %s"
271 update_feeds_iface = None
275 if send_update_request():
276 # Success! It seems we were able to start the update
277 # daemon via dbus (or, it was already running).
280 global update_feed_process
281 if (update_feed_process is None
282 or update_feed_process.poll() is not None):
283 # The update_feeds process is not running. Start it.
284 update_feeds = os.path.join(os.path.dirname(__file__),
286 argv = ['/usr/bin/env', 'python', update_feeds, '--daemon' ]
287 logger.debug("Starting update_feeds: running %s"
289 update_feed_process = subprocess.Popen(argv)
290 # Make sure the dbus calls go to the right process:
292 update_feeds_iface = None
295 if send_update_request():
299 def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
301 have_serial_execution_lock = False
303 download_start = time.time ()
305 progress_handler = HTTPProgressHandler(download_callback)
307 openers = [progress_handler]
309 openers.append (proxy)
310 kwargs = {'handlers':openers}
312 tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
313 download_duration = time.time () - download_start
315 opener = downloader(progress_handler, proxy)
317 if JobManager().do_quit:
318 raise KeyboardInterrupt
320 process_start = time.time()
322 # Expiry time is in hours
323 expiry = float(expiryTime) * 3600.
329 wc().stream_register (self.key, "", 6 * 60 * 60)
330 except woodchuck.ObjectExistsError:
333 wc()[self.key].updated (
334 indicator=(woodchuck.Indicator.ApplicationVisual
335 |woodchuck.Indicator.StreamWide),
336 transferred_down=progress_handler.stats['received'],
337 transferred_up=progress_handler.stats['sent'],
338 transfer_time=download_start,
339 transfer_duration=download_duration,
340 new_objects=len (tmp.entries),
341 objects_inline=len (tmp.entries))
344 "Failed to register update of %s with woodchuck!"
347 http_status = tmp.get ('status', 200)
349 # Check if the parse was succesful. If the http status code
350 # is 304, then the download was successful, but there is
351 # nothing new. Indeed, no content is returned. This make a
352 # 304 look like an error because there are no entries and the
353 # parse fails. But really, everything went great! Check for
355 if http_status == 304:
356 logger.debug("%s: No changes to feed." % (self.key,))
357 mainthread.execute(wc_success, async=True)
359 elif len(tmp["entries"])==0 and not tmp.version:
360 # An error occured fetching or parsing the feed. (Version
361 # will be either None if e.g. the connection timed our or
362 # '' if the data is not a proper feed)
364 "Error fetching %s: version is: %s: error: %s"
365 % (url, str (tmp.version),
366 str (tmp.get ('bozo_exception', 'Unknown error'))))
368 def register_stream_update_failed(http_status):
370 logger.debug("%s: stream update failed!" % self.key)
373 # It's not easy to get the feed's title from here.
374 # At the latest, the next time the application is
375 # started, we'll fix up the human readable name.
376 wc().stream_register (self.key, "", 6 * 60 * 60)
377 except woodchuck.ObjectExistsError:
379 ec = woodchuck.TransferStatus.TransientOther
380 if 300 <= http_status and http_status < 400:
381 ec = woodchuck.TransferStatus.TransientNetwork
382 if 400 <= http_status and http_status < 500:
383 ec = woodchuck.TransferStatus.FailureGone
384 if 500 <= http_status and http_status < 600:
385 ec = woodchuck.TransferStatus.TransientNetwork
386 wc()[self.key].update_failed(ec)
390 register_stream_update_failed(
391 http_status=http_status),
394 currentTime = time.time()
395 # The etag and modified value should only be updated if the content was not null
401 modified = tmp["modified"]
405 abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
406 f = opener.open(abs_url)
409 outf = open(self.dir+"/favicon.ico", "w")
413 except (urllib2.HTTPError, urllib2.URLError), exception:
414 logger.debug("Could not download favicon %s: %s"
415 % (abs_url, str (exception)))
417 self.serial_execution_lock.acquire ()
418 have_serial_execution_lock = True
420 #reversedEntries = self.getEntries()
421 #reversedEntries.reverse()
425 tmp["entries"].reverse()
426 for entry in tmp["entries"]:
427 # Yield so as to make the main thread a bit more
431 if JobManager().do_quit:
432 raise KeyboardInterrupt
434 received_base = progress_handler.stats['received']
435 sent_base = progress_handler.stats['sent']
438 date = self.extractDate(entry)
442 entry["title"] = "No Title"
450 entry["author"] = None
451 if(not(entry.has_key("id"))):
453 content = self.extractContent(entry)
454 object_size = len (content)
455 received_base -= len (content)
456 tmpEntry = {"title":entry["title"], "content":content,
457 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
458 id = self.generateUniqueId(tmpEntry)
460 #articleTime = time.mktime(self.entries[id]["dateTuple"])
461 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
463 baseurl = tmpEntry["link"]
465 if imageCache and len(images) > 0:
466 self.serial_execution_lock.release ()
467 have_serial_execution_lock = False
469 filename = self.addImage(
470 configdir, self.key, baseurl, img['src'],
473 img['src']="file://%s" %filename
474 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
476 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
480 object_size += os.path.getsize (filename)
481 except os.error, exception:
482 logger.error ("Error getting size of %s: %s"
483 % (filename, exception))
484 self.serial_execution_lock.acquire ()
485 have_serial_execution_lock = True
487 tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
488 file = open(tmpEntry["contentLink"], "w")
489 file.write(soup.prettify())
492 self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
495 values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
496 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
500 # self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
502 # filename = configdir+self.key+".d/"+id+".html"
503 # file = open(filename,"a")
504 # utime(filename, None)
506 # images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
507 # for image in images:
508 # file = open(image[0],"a")
509 # utime(image[0], None)
514 # Register the object with Woodchuck and mark it as
516 def register_object_transferred(
517 id, title, publication_time,
518 sent, received, object_size):
520 logger.debug("Registering transfer of object %s"
523 obj = wc()[self.key].object_register(
524 object_identifier=id,
525 human_readable_name=title)
526 except woodchuck.ObjectExistsError:
527 obj = wc()[self.key][id]
529 obj.publication_time = publication_time
532 woodchuck.Indicator.ApplicationVisual
533 |woodchuck.Indicator.StreamWide),
534 transferred_down=received,
536 object_size=object_size)
539 # If the entry does not contain a publication
540 # time, the attribute won't exist.
541 pubtime = entry.get('date_parsed', None)
543 publication_time = time.mktime (pubtime)
545 publication_time = None
547 sent = progress_handler.stats['sent'] - sent_base
548 received = (progress_handler.stats['received']
552 register_object_transferred(
554 title=tmpEntry["title"],
555 publication_time=publication_time,
556 sent=sent, received=received,
557 object_size=object_size),
562 "%s: Update successful: transferred: %d/%d; objects: %d)"
564 progress_handler.stats['sent'],
565 progress_handler.stats['received'],
567 mainthread.execute (wc_success, async=True)
570 rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
572 self.removeEntry(row[0])
574 from glob import glob
576 for file in glob(configdir+self.key+".d/*"):
580 # put the two dates into matching format
582 lastmodDate = stats[8]
584 expDate = time.time()-expiry*3
585 # check if image-last-modified-date is outdated
587 if expDate > lastmodDate:
591 #print 'Removing', file
593 # XXX: Tell woodchuck.
594 remove(file) # commented out for testing
596 except OSError, exception:
598 logger.error('Could not remove %s: %s'
599 % (file, str (exception)))
600 logger.debug("updated %s: %fs in download, %fs in processing"
601 % (self.key, download_duration,
602 time.time () - process_start))
604 logger.error("Updating %s: %s" % (self.key, traceback.format_exc()))
608 if have_serial_execution_lock:
609 self.serial_execution_lock.release ()
613 rows = self.db.execute("SELECT MAX(date) FROM feed;")
617 logger.error("Fetching update time: %s: %s"
618 % (str(e), traceback.format_exc()))
625 title = tmp.feed.title
626 except (AttributeError, UnboundLocalError), exception:
628 if postFeedUpdateFunc is not None:
629 postFeedUpdateFunc (self.key, updateTime, etag, modified,
630 title, *postFeedUpdateFuncArgs)
632 self.cache_invalidate()
634 def setEntryRead(self, id):
635 self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
640 wc()[self.key][id].used()
644 mainthread.execute(doit, async=True)
645 self.cache_invalidate('feed')
647 def setEntryUnread(self, id):
648 self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
650 self.cache_invalidate('feed')
652 def markAllAsRead(self):
653 self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
655 self.cache_invalidate('feed')
657 def isEntryRead(self, id):
658 return self.lookup('feed', 'read', id) == 1
660 def getTitle(self, id):
661 return self.lookup('feed', 'title', id)
663 def getContentLink(self, id):
664 return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
666 def getExternalLink(self, id):
667 return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
669 def getDate(self, id):
670 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
671 return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
673 def getDateTuple(self, id):
674 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
675 return time.localtime(dateStamp)
677 def getDateStamp(self, id):
678 return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
680 def generateUniqueId(self, entry):
682 Generate a stable identifier for the article. For the same
683 entry, this should result in the same identifier. If
684 possible, the identifier should remain the same even if the
687 # Prefer the entry's id, which is supposed to be globally
689 key = entry.get('id', None)
691 # Next, try the link to the content.
692 key = entry.get('link', None)
694 # Ok, the title and the date concatenated are likely to be
696 key = entry.get('title', None) + entry.get('date', None)
698 # Hmm, the article's content will at least guarantee no
699 # false negatives (i.e., missing articles)
700 key = entry.get('content', None)
702 # If all else fails, just use a random number.
703 key = str (random.random ())
706 def getIds(self, onlyUnread=False):
708 rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
710 rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
717 def getNextId(self, id, forward=True):
723 index = ids.index(id)
724 return ids[(index + delta) % len(ids)]
726 def getPreviousId(self, id):
727 return self.getNextId(id, forward=False)
729 def getNumberOfUnreadItems(self):
730 return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
732 def getNumberOfEntries(self):
733 return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
735 def getArticle(self, entry):
736 #self.setEntryRead(id)
737 #entry = self.entries[id]
738 title = entry['title']
739 #content = entry.get('content', entry.get('summary_detail', {}))
740 content = entry["content"]
743 author = entry['author']
744 date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
746 #text = '''<div style="color: black; background-color: white;">'''
747 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
748 text += "<html><head><title>" + title + "</title>"
749 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
750 #text += '<style> body {-webkit-user-select: none;} </style>'
751 text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
753 text += "<BR /><small><i>Author: " + author + "</i></small>"
754 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
755 text += "<BR /><BR />"
757 text += "</body></html>"
760 def getContent(self, id):
761 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
763 file = open(self.entries[id]["contentLink"])
764 content = file.read()
767 content = "Content unavailable"
770 def extractDate(self, entry):
771 if entry.has_key("updated_parsed"):
772 return timegm(entry["updated_parsed"])
773 elif entry.has_key("published_parsed"):
774 return timegm(entry["published_parsed"])
778 def extractContent(self, entry):
780 if entry.has_key('summary'):
781 content = entry.get('summary', '')
782 if entry.has_key('content'):
783 if len(entry.content[0].value) > len(content):
784 content = entry.content[0].value
786 content = entry.get('description', '')
789 def removeEntry(self, id):
790 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
794 except OSError, exception:
795 logger.error("Deleting %s: %s" % (contentLink, str (exception)))
796 self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
797 self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
802 wc()[self.key][id].files_deleted (
803 woodchuck.DeletionResponse.Deleted)
804 del wc()[self.key][id]
808 mainthread.execute (doit, async=True)
810 class ArchivedArticles(Feed):
811 def addArchivedArticle(self, title, link, date, configdir):
812 id = self.generateUniqueId({"date":date, "title":title})
813 values = (id, title, link, date, 0, link, 0)
814 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
817 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
819 rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
821 currentTime = time.time()
824 f = urllib2.urlopen(link)
825 #entry["content"] = f.read()
828 soup = BeautifulSoup(html)
832 filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
834 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
836 contentLink = configdir+self.key+".d/"+id+".html"
837 file = open(contentLink, "w")
838 file.write(soup.prettify())
841 self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
843 return (currentTime, None, None)
845 def purgeReadArticles(self):
846 rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
849 self.removeArticle(row[0])
851 def removeArticle(self, id):
852 rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
855 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
862 class Listing(BaseObject):
864 cached_columns = (('feeds', 'updateTime'),
867 ('categories', 'title'))
872 except AttributeError:
873 db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
876 db = property(_getdb)
878 # Lists all the feeds in a dictionary, and expose the data
879 def __init__(self, config, configdir):
881 self.configdir = configdir
883 self.tls = threading.local ()
886 table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
888 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
889 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
890 self.addCategory("Default Category")
891 if isfile(self.configdir+"feeds.pickle"):
892 self.importOldFormatFeeds()
894 self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
896 from string import find, upper
897 if find(upper(table[0]), "WIDGET")<0:
898 self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
899 self.db.execute("UPDATE feeds SET widget=1;")
901 if find(upper(table[0]), "CATEGORY")<0:
902 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
903 self.addCategory("Default Category")
904 self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
905 self.db.execute("UPDATE feeds SET category=1;")
910 # Check that Woodchuck's state is up to date with respect our
912 updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
913 wc_init (self, True if updater else False)
914 if wc().available() and updater:
915 # The list of known streams.
916 streams = wc().streams_list ()
917 stream_ids = [s.identifier for s in streams]
919 # Register any unknown streams. Remove known streams from
921 for key in self.getListOfFeeds():
922 title = self.getFeedTitle(key)
923 # XXX: We should also check whether the list of
924 # articles/objects in each feed/stream is up to date.
925 if key not in stream_ids:
927 "Registering previously unknown channel: %s (%s)"
929 # Use a default refresh interval of 6 hours.
930 wc().stream_register (key, title, 6 * 60 * 60)
932 # Make sure the human readable name is up to date.
933 if wc()[key].human_readable_name != title:
934 wc()[key].human_readable_name = title
935 stream_ids.remove (key)
938 # Unregister any streams that are no longer subscribed to.
939 for id in stream_ids:
940 logger.debug("Unregistering %s" % (id,))
941 w.stream_unregister (id)
943 def importOldFormatFeeds(self):
944 """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
946 listing = rss.Listing(self.configdir)
948 for id in listing.getListOfFeeds():
951 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
952 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
955 feed = listing.getFeed(id)
956 new_feed = self.getFeed(id)
958 items = feed.getIds()[:]
961 if feed.isEntryRead(item):
965 date = timegm(feed.getDateTuple(item))
966 title = feed.getTitle(item)
967 newId = new_feed.generateUniqueId({"date":date, "title":title})
968 values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
969 new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
972 images = feed.getImages(item)
974 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
978 self.updateUnread(id)
980 logger.error("importOldFormatFeeds: %s"
981 % (traceback.format_exc(),))
982 remove(self.configdir+"feeds.pickle")
985 def addArchivedArticle(self, key, index):
986 feed = self.getFeed(key)
987 title = feed.getTitle(index)
988 link = feed.getExternalLink(index)
989 date = feed.getDate(index)
990 count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
992 self.addFeed("Archived Articles", "", id="ArchivedArticles")
994 archFeed = self.getFeed("ArchivedArticles")
995 archFeed.addArchivedArticle(title, link, date, self.configdir)
996 self.updateUnread("ArchivedArticles")
998 def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
1000 if expiryTime is None:
1001 expiryTime = self.config.getExpiry()
1003 # Default to 24 hours
1006 (use_proxy, proxy) = self.config.getProxy()
1009 if imageCache is None:
1010 imageCache = self.config.getImageCache()
1012 feed = self.getFeed(key)
1013 (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
1015 modified = time.struct_time(eval(modified))
1019 self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
1020 priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
1022 def _queuePostFeedUpdate(self, *args, **kwargs):
1023 mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
1025 def _postFeedUpdate(self, key, updateTime, etag, modified, title):
1029 modified=str(tuple(modified))
1031 self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
1033 self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
1035 if title is not None:
1036 self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
1039 self.cache_invalidate('feeds')
1040 self.updateUnread(key)
1042 update_server_object().ArticleCountUpdated()
1044 stats = JobManager().stats()
1045 global jobs_at_start
1046 completed = stats['jobs-completed'] - jobs_at_start
1047 in_progress = stats['jobs-in-progress']
1048 queued = stats['jobs-queued']
1050 percent = (100 * ((completed + in_progress / 2.))
1051 / (completed + in_progress + queued))
1053 update_server_object().UpdateProgress(
1054 percent, completed, in_progress, queued, 0, 0, 0, key)
1056 if in_progress == 0 and queued == 0:
1057 jobs_at_start = stats['jobs-completed']
1059 def getFeed(self, key):
1060 if key == "ArchivedArticles":
1061 return ArchivedArticles(self.configdir, key)
1062 return Feed(self.configdir, key)
1064 def editFeed(self, key, title, url, category=None):
1066 self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
1068 self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
1070 self.cache_invalidate('feeds')
1072 if wc().available():
1074 wc()[key].human_readable_name = title
1076 logger.debug("Feed %s (%s) unknown." % (key, title))
1078 def getFeedUpdateTime(self, key):
1079 update_time = self.lookup('feeds', 'updateTime', key)
1084 delta = time.time() - update_time
1086 delta_hours = delta / (60. * 60.)
1087 if delta_hours < .1:
1088 return "A few minutes ago"
1089 if delta_hours < .75:
1090 return "Less than an hour ago"
1091 if delta_hours < 1.5:
1092 return "About an hour ago"
1093 if delta_hours < 18:
1094 return "About %d hours ago" % (int(delta_hours + 0.5),)
1096 delta_days = delta_hours / 24.
1097 if delta_days < 1.5:
1098 return "About a day ago"
1100 return "%d days ago" % (int(delta_days + 0.5),)
1102 delta_weeks = delta_days / 7.
1103 if delta_weeks <= 8:
1104 return "%d weeks ago" % int(delta_weeks + 0.5)
1106 delta_months = delta_days / 30.
1107 if delta_months <= 30:
1108 return "%d months ago" % int(delta_months + 0.5)
1110 return time.strftime("%x", time.gmtime(update_time))
1112 def getFeedNumberOfUnreadItems(self, key):
1113 return self.lookup('feeds', 'unread', key)
1115 def getFeedTitle(self, key):
1116 title = self.lookup('feeds', 'title', key)
1120 return self.getFeedUrl(key)
1122 def getFeedUrl(self, key):
1123 return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1125 def getFeedCategory(self, key):
1126 return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1128 def getListOfFeeds(self, category=None):
1130 rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
1132 rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
1139 def getListOfCategories(self):
1140 return list(row[0] for row in self.db.execute(
1141 "SELECT id FROM categories ORDER BY rank;"))
1143 def getCategoryTitle(self, id):
1144 return self.lookup('categories', 'title', id)
1146 def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
1147 if order == "Most unread":
1148 tmp = "ORDER BY unread DESC"
1149 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
1150 elif order == "Least unread":
1151 tmp = "ORDER BY unread"
1152 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
1153 elif order == "Most recent":
1154 tmp = "ORDER BY updateTime DESC"
1155 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
1156 elif order == "Least recent":
1157 tmp = "ORDER BY updateTime"
1158 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
1159 else: # order == "Manual" or invalid value...
1160 tmp = "ORDER BY rank"
1161 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
1163 sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp
1165 sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
1166 rows = self.db.execute(sql)
1173 def getFavicon(self, key):
1174 filename = "%s%s.d/favicon.ico" % (self.configdir, key)
1175 if isfile(filename):
1180 def updateUnread(self, key):
1181 feed = self.getFeed(key)
1182 self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
1184 self.cache_invalidate('feeds')
1186 def addFeed(self, title, url, id=None, category=1):
1189 count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
1191 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1192 if max_rank == None:
1194 values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
1195 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
1197 # Ask for the feed object, it will create the necessary tables
1200 if wc().available():
1201 # Register the stream with Woodchuck. Update approximately
1203 wc().stream_register(stream_identifier=id,
1204 human_readable_name=title,
1211 def addCategory(self, title):
1212 rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
1215 id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
1218 self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
1221 def removeFeed(self, key):
1222 if wc().available ():
1226 logger.debug("Removing unregistered feed %s failed" % (key,))
1228 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
1229 self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
1230 self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
1233 if isdir(self.configdir+key+".d/"):
1234 rmtree(self.configdir+key+".d/")
1236 def removeCategory(self, key):
1237 if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
1238 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
1239 self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
1240 self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
1241 self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
1244 #def saveConfig(self):
1245 # self.listOfFeeds["feedingit-order"] = self.sortedKeys
1246 # file = open(self.configdir+"feeds.pickle", "w")
1247 # pickle.dump(self.listOfFeeds, file)
1250 def moveUp(self, key):
1251 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1253 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
1254 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
1257 def moveCategoryUp(self, key):
1258 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1260 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
1261 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
1264 def moveDown(self, key):
1265 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1266 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1268 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
1269 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
1272 def moveCategoryDown(self, key):
1273 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1274 max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
1276 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
1277 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )