1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # Copyright (c) 2011 Neal H. Walfield
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Lesser General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU Lesser General Public License for more details.
16 # You should have received a copy of the GNU Lesser General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 # ============================================================================
22 # Author : Yves Marcoz
24 # Description : Simple RSS Reader
25 # ============================================================================
27 from __future__ import with_statement
30 from os.path import isfile, isdir
31 from shutil import rmtree
32 from os import mkdir, remove, utime
38 from BeautifulSoup import BeautifulSoup
39 from urlparse import urljoin
40 from calendar import timegm
43 from wc import wc, wc_init, woodchuck
46 from updatedbus import update_server_object
48 from jobmanager import JobManager
50 from httpprogresshandler import HTTPProgressHandler
54 logger = logging.getLogger(__name__)
57 if issubclass(string.__class__, unicode):
58 string = string.encode('utf8', 'replace')
60 return md5.new(string).hexdigest()
62 def download_callback(connection):
63 if JobManager().do_quit:
64 raise KeyboardInterrupt
66 def downloader(progress_handler=None, proxy=None):
69 if progress_handler is not None:
70 openers.append(progress_handler)
72 openers.append(HTTPProgressHandler(download_callback))
77 return urllib2.build_opener(*openers)
79 def transfer_stats(sent, received, **kwargs):
81 This function takes two arguments: sent is the number of bytes
82 sent so far, received is the number of bytes received. The
83 function returns a continuation that you can call later.
85 The continuation takes the same two arguments. It returns a tuple
86 of the number of bytes sent, the number of bytes received and the
87 time since the original function was invoked.
89 start_time = time.time()
91 start_received = received
93 def e(sent, received, **kwargs):
94 return (sent - start_sent,
95 received - start_received,
96 time.time() - start_time)
100 # If not None, a subprocess.Popen object corresponding to a
101 # update_feeds.py process.
102 update_feed_process = None
104 update_feeds_iface = None
108 class BaseObject(object):
109 # Columns to cache. Classes that inherit from this and use the
110 # cache mechanism should set this to a list of tuples, each of
111 # which contains two entries: the table and the column. Note that
112 # both are case sensitive.
115 def cache_invalidate(self, table=None):
117 Invalidate the cache.
119 If table is not None, invalidate only the specified table.
120 Otherwise, drop the whole cache.
122 if not hasattr(self, 'cache'):
128 if table in self.cache:
129 del self.cache[table]
131 def lookup(self, table, column, id=None):
133 Look up a column or value. Uses a cache for columns in
134 cached_columns. Note: the column is returned unsorted.
136 if not hasattr(self, 'cache'):
139 # Cache data for at most 60 seconds.
142 cache = self.cache[table]
144 if time.time() - cache[None] > 60:
145 # logger.debug("%s: Cache too old: clearing" % (table,))
146 del self.cache[table]
152 or (table, column) not in self.cached_columns):
153 # The cache is empty or the caller wants a column that we
155 if (table, column) in self.cached_columns:
156 # logger.debug("%s: Rebuilding cache" % (table,))
160 self.cache[table] = cache = {}
162 for t, c in self.cached_columns:
174 where = "where id = '%s'" % id
178 results = self.db.execute(
179 "SELECT %s FROM %s %s" % (','.join(columns), table, where))
185 for index, value in enumerate(values):
186 cache[columns[index]][i] = value
195 results.append(values[0])
199 cache = self.cache[table]
203 value = cache[column][id]
204 # logger.debug("%s.%s:%s -> %s" % (table, column, id, value))
207 return cache[column].values()
209 # logger.debug("%s.%s:%s -> Not found" % (table, column, id))
212 class Feed(BaseObject):
214 cached_columns = (('feed', 'read'),
217 serial_execution_lock = threading.Lock()
222 except AttributeError:
223 db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
226 db = property(_getdb)
228 def __init__(self, configdir, key):
230 self.configdir = configdir
231 self.dir = "%s/%s.d" %(self.configdir, self.key)
232 self.tls = threading.local ()
234 if not isdir(self.dir):
236 if not isfile("%s/%s.db" %(self.dir, self.key)):
237 self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
238 self.db.execute("CREATE TABLE images (id text, imagePath text);")
241 def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
242 filename = configdir+key+".d/"+getId(url)
243 if not isfile(filename):
246 opener = downloader(proxy=proxy)
248 abs_url = urljoin(baseurl,url)
249 f = opener.open(abs_url)
251 with open(filename, "w") as outf:
256 except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
257 logger.info("Could not download image %s: %s"
258 % (abs_url, str (exception)))
261 exception = sys.exc_info()[0]
263 logger.info("Downloading image %s: %s" %
264 (abs_url, traceback.format_exc()))
272 #open(filename,"a").close() # "Touch" the file
273 file = open(filename,"a")
274 utime(filename, None)
278 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
279 if (os.path.basename(sys.argv[0]) == 'update_feeds.py'):
282 self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
284 JobManager().execute(doit(), self.key, priority=priority)
286 def send_update_request():
287 global update_feeds_iface
288 if update_feeds_iface is None:
289 bus=dbus.SessionBus()
290 remote_object = bus.get_object(
291 "org.marcoz.feedingit", # Connection name
292 "/org/marcoz/feedingit/update" # Object's path
294 update_feeds_iface = dbus.Interface(
295 remote_object, 'org.marcoz.feedingit')
298 update_feeds_iface.Update(self.key)
300 logger.error("Invoking org.marcoz.feedingit.Update: %s"
302 update_feeds_iface = None
306 if send_update_request():
307 # Success! It seems we were able to start the update
308 # daemon via dbus (or, it was already running).
311 global update_feed_process
312 if (update_feed_process is None
313 or update_feed_process.poll() is not None):
314 # The update_feeds process is not running. Start it.
315 update_feeds = os.path.join(os.path.dirname(__file__),
317 argv = ['/usr/bin/env', 'python', update_feeds, '--daemon' ]
318 logger.debug("Starting update_feeds: running %s"
320 update_feed_process = subprocess.Popen(argv)
321 # Make sure the dbus calls go to the right process:
323 update_feeds_iface = None
326 if send_update_request():
330 def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
331 logger.debug("Updating %s" % url)
334 have_serial_execution_lock = False
336 update_start = time.time ()
338 progress_handler = HTTPProgressHandler(download_callback)
340 openers = [progress_handler]
342 openers.append (proxy)
343 kwargs = {'handlers':openers}
345 feed_transfer_stats = transfer_stats(0, 0)
347 tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
348 download_duration = time.time () - update_start
350 opener = downloader(progress_handler, proxy)
352 if JobManager().do_quit:
353 raise KeyboardInterrupt
355 process_start = time.time()
357 # Expiry time is in hours
358 expiry = float(expiryTime) * 3600.
367 wc().stream_register (self.key, "", 6 * 60 * 60)
368 except woodchuck.ObjectExistsError:
371 wc()[self.key].updated (
372 indicator=(woodchuck.Indicator.ApplicationVisual
373 |woodchuck.Indicator.StreamWide),
374 transferred_down=progress_handler.stats['received'],
375 transferred_up=progress_handler.stats['sent'],
376 transfer_time=update_start,
377 transfer_duration=download_duration,
378 new_objects=new_objects,
379 updated_objects=updated_objects,
380 objects_inline=new_objects + updated_objects)
383 "Failed to register update of %s with woodchuck!"
386 http_status = tmp.get ('status', 200)
388 # Check if the parse was succesful. If the http status code
389 # is 304, then the download was successful, but there is
390 # nothing new. Indeed, no content is returned. This make a
391 # 304 look like an error because there are no entries and the
392 # parse fails. But really, everything went great! Check for
394 if http_status == 304:
395 logger.debug("%s: No changes to feed." % (self.key,))
396 mainthread.execute(wc_success, async=True)
398 elif len(tmp["entries"])==0 and not tmp.get('version', None):
399 # An error occured fetching or parsing the feed. (Version
400 # will be either None if e.g. the connection timed our or
401 # '' if the data is not a proper feed)
403 "Error fetching %s: version is: %s: error: %s"
404 % (url, str (tmp.get('version', 'unset')),
405 str (tmp.get ('bozo_exception', 'Unknown error'))))
407 def register_stream_update_failed(http_status):
409 logger.debug("%s: stream update failed!" % self.key)
412 # It's not easy to get the feed's title from here.
413 # At the latest, the next time the application is
414 # started, we'll fix up the human readable name.
415 wc().stream_register (self.key, "", 6 * 60 * 60)
416 except woodchuck.ObjectExistsError:
418 ec = woodchuck.TransferStatus.TransientOther
419 if 300 <= http_status and http_status < 400:
420 ec = woodchuck.TransferStatus.TransientNetwork
421 if 400 <= http_status and http_status < 500:
422 ec = woodchuck.TransferStatus.FailureGone
423 if 500 <= http_status and http_status < 600:
424 ec = woodchuck.TransferStatus.TransientNetwork
425 wc()[self.key].update_failed(ec)
429 register_stream_update_failed(
430 http_status=http_status),
433 currentTime = time.time()
434 # The etag and modified value should only be updated if the content was not null
440 modified = tmp["modified"]
444 abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
445 f = opener.open(abs_url)
448 outf = open(self.dir+"/favicon.ico", "w")
452 except (urllib2.HTTPError, urllib2.URLError), exception:
453 logger.debug("Could not download favicon %s: %s"
454 % (abs_url, str (exception)))
456 self.serial_execution_lock.acquire ()
457 have_serial_execution_lock = True
459 #reversedEntries = self.getEntries()
460 #reversedEntries.reverse()
462 tmp["entries"].reverse()
463 for entry in tmp["entries"]:
464 # Yield so as to make the main thread a bit more
468 entry_transfer_stats = transfer_stats(
469 *feed_transfer_stats(**progress_handler.stats)[0:2])
471 if JobManager().do_quit:
472 raise KeyboardInterrupt
476 date = self.extractDate(entry)
480 entry["title"] = "No Title"
488 entry["author"] = None
489 if(not(entry.has_key("id"))):
491 content = self.extractContent(entry)
492 object_size = len (content)
493 tmpEntry = {"title":entry["title"], "content":content,
494 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
495 id = self.generateUniqueId(tmpEntry)
497 current_version = self.db.execute(
498 'select date, ROWID from feed where id=?',
500 if (current_version is not None
501 and current_version[0] == date):
502 logger.debug("ALREADY DOWNLOADED %s (%s)"
503 % (entry["title"], entry["link"]))
504 ## This article is already present in the feed listing. Update the "updated" time, so it doesn't expire
505 self.db.execute("UPDATE feed SET updated=? WHERE id=?;",(currentTime,id))
507 logger.debug("Updating already downloaded files for %s" %(id))
508 filename = configdir+self.key+".d/"+id+".html"
509 file = open(filename,"a")
510 utime(filename, None)
512 images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
514 file = open(image[0],"a")
515 utime(image[0], None)
518 logger.debug("Error in refreshing images for %s" % (id))
522 if current_version is not None:
523 # The version was updated. Mark it as unread.
524 logger.debug("UPDATED: %s (%s)"
525 % (entry["title"], entry["link"]))
528 logger.debug("NEW: %s (%s)"
529 % (entry["title"], entry["link"]))
532 #articleTime = time.mktime(self.entries[id]["dateTuple"])
533 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
535 baseurl = tmpEntry["link"]
536 if imageCache and len(images) > 0:
537 self.serial_execution_lock.release ()
538 have_serial_execution_lock = False
540 if not img.has_key('src'):
543 filename = self.addImage(
544 configdir, self.key, baseurl, img['src'],
547 img['src']="file://%s" %filename
548 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
550 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
554 object_size += os.path.getsize (filename)
555 except os.error, exception:
556 logger.error ("Error getting size of %s: %s"
557 % (filename, exception))
558 self.serial_execution_lock.acquire ()
559 have_serial_execution_lock = True
561 tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
562 file = open(tmpEntry["contentLink"], "w")
563 file.write(soup.prettify())
567 'title': tmpEntry["title"],
568 'contentLink': tmpEntry["contentLink"],
569 'date': tmpEntry["date"],
570 'updated': currentTime,
571 'link': tmpEntry["link"],
574 if current_version is not None:
575 # This is an update. Ensure that the existing
577 values['ROWID'] = current_version[1]
579 cols, values = zip(*values.items())
581 "INSERT OR REPLACE INTO feed (%s) VALUES (%s);"
582 % (','.join(cols), ','.join(('?',) * len(values))),
586 # Register the object with Woodchuck and mark it as
588 def register_object_transferred(
589 id, title, publication_time,
590 sent, received, object_size):
592 logger.debug("Registering transfer of object %s"
595 obj = wc()[self.key].object_register(
596 object_identifier=id,
597 human_readable_name=title)
598 except woodchuck.ObjectExistsError:
599 obj = wc()[self.key][id]
601 obj.publication_time = publication_time
604 woodchuck.Indicator.ApplicationVisual
605 |woodchuck.Indicator.StreamWide),
606 transferred_down=received,
608 object_size=object_size)
611 # If the entry does not contain a publication
612 # time, the attribute won't exist.
613 pubtime = entry.get('date_parsed', None)
615 publication_time = time.mktime (pubtime)
617 publication_time = None
620 = entry_transfer_stats(**progress_handler.stats)
621 # sent and received are for objects (in
622 # particular, images) associated with this
623 # item. We also want to attribute the data
624 # transferred for the item's content. This is
625 # a good first approximation.
626 received += len(content)
629 register_object_transferred(
631 title=tmpEntry["title"],
632 publication_time=publication_time,
633 sent=sent, received=received,
634 object_size=object_size),
639 = feed_transfer_stats(**progress_handler.stats)
641 "%s: Update successful: transferred: %d/%d; objects: %d)"
642 % (url, sent, received, len (tmp.entries)))
643 mainthread.execute (wc_success, async=True)
646 rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
648 self.removeEntry(row[0])
650 from glob import glob
652 for file in glob(configdir+self.key+".d/*"):
656 # put the two dates into matching format
658 lastmodDate = stats[8]
660 expDate = time.time()-expiry*3
661 # check if image-last-modified-date is outdated
663 if expDate > lastmodDate:
667 #print 'Removing', file
669 # XXX: Tell woodchuck.
670 remove(file) # commented out for testing
672 except OSError, exception:
674 logger.error('Could not remove %s: %s'
675 % (file, str (exception)))
676 logger.debug("updated %s: %fs in download, %fs in processing"
677 % (self.key, download_duration,
678 time.time () - process_start))
680 logger.error("Updating %s: %s" % (self.key, traceback.format_exc()))
684 if have_serial_execution_lock:
685 self.serial_execution_lock.release ()
689 rows = self.db.execute("SELECT MAX(date) FROM feed;")
693 logger.error("Fetching update time: %s: %s"
694 % (str(e), traceback.format_exc()))
701 title = tmp.feed.title
702 except (AttributeError, UnboundLocalError), exception:
704 if postFeedUpdateFunc is not None:
705 postFeedUpdateFunc (self.key, updateTime, etag, modified,
706 title, *postFeedUpdateFuncArgs)
708 self.cache_invalidate()
710 def setEntryRead(self, id):
711 self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
716 wc()[self.key][id].used()
720 mainthread.execute(doit, async=True)
721 self.cache_invalidate('feed')
723 def setEntryUnread(self, id):
724 self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
726 self.cache_invalidate('feed')
728 def markAllAsRead(self):
729 self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
731 self.cache_invalidate('feed')
733 def isEntryRead(self, id):
734 return self.lookup('feed', 'read', id) == 1
736 def getTitle(self, id):
737 return self.lookup('feed', 'title', id)
739 def getContentLink(self, id):
740 return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
742 def getExternalLink(self, id):
743 return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
745 def getDate(self, id):
746 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
747 return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
749 def getDateTuple(self, id):
750 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
751 return time.localtime(dateStamp)
753 def getDateStamp(self, id):
754 return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
756 def generateUniqueId(self, entry):
758 Generate a stable identifier for the article. For the same
759 entry, this should result in the same identifier. If
760 possible, the identifier should remain the same even if the
763 # Prefer the entry's id, which is supposed to be globally
765 key = entry.get('id', None)
767 # Next, try the link to the content.
768 key = entry.get('link', None)
770 # Ok, the title and the date concatenated are likely to be
772 key = entry.get('title', None) + entry.get('date', None)
774 # Hmm, the article's content will at least guarantee no
775 # false negatives (i.e., missing articles)
776 key = entry.get('content', None)
778 # If all else fails, just use a random number.
779 key = str (random.random ())
782 def getIds(self, onlyUnread=False):
784 rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
786 rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
793 def getNextId(self, id, forward=True):
799 index = ids.index(id)
800 return ids[(index + delta) % len(ids)]
802 def getPreviousId(self, id):
803 return self.getNextId(id, forward=False)
805 def getNumberOfUnreadItems(self):
806 return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
808 def getNumberOfEntries(self):
809 return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
811 def getArticle(self, entry):
812 #self.setEntryRead(id)
813 #entry = self.entries[id]
814 title = entry['title']
815 #content = entry.get('content', entry.get('summary_detail', {}))
816 content = entry["content"]
819 author = entry['author']
820 date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
822 #text = '''<div style="color: black; background-color: white;">'''
823 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
824 text += "<html><head><title>" + title + "</title>"
825 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
826 #text += '<style> body {-webkit-user-select: none;} </style>'
827 text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
829 text += "<BR /><small><i>Author: " + author + "</i></small>"
830 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
831 text += "<BR /><BR />"
833 text += "</body></html>"
836 def getContent(self, id):
838 Return the content of the article with the specified ID. If
839 the content is not available, returns None.
841 contentLink = self.getContentLink(id)
843 with open(contentLink, 'r') as file:
844 content = file.read()
846 logger.exception("Failed get content for %s: reading %s failed",
851 def extractDate(self, entry):
852 if entry.has_key("updated_parsed"):
853 return timegm(entry["updated_parsed"])
854 elif entry.has_key("published_parsed"):
855 return timegm(entry["published_parsed"])
859 def extractContent(self, entry):
861 if entry.has_key('summary'):
862 content = entry.get('summary', '')
863 if entry.has_key('content'):
864 if len(entry.content[0].value) > len(content):
865 content = entry.content[0].value
867 content = entry.get('description', '')
870 def removeEntry(self, id):
871 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
875 except OSError, exception:
876 logger.error("Deleting %s: %s" % (contentLink, str (exception)))
877 self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
878 self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
883 wc()[self.key][id].files_deleted (
884 woodchuck.DeletionResponse.Deleted)
885 del wc()[self.key][id]
889 mainthread.execute (doit, async=True)
891 class ArchivedArticles(Feed):
892 def addArchivedArticle(self, title, link, date, configdir):
893 id = self.generateUniqueId({"date":date, "title":title})
894 values = (id, title, link, date, 0, link, 0)
895 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
898 # Feed.UpdateFeed calls this function.
899 def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
901 rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
904 currentTime = time.time()
907 f = urllib2.urlopen(link)
908 #entry["content"] = f.read()
911 soup = BeautifulSoup(html)
915 filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
917 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
919 contentLink = configdir+self.key+".d/"+id+".html"
920 file = open(contentLink, "w")
921 file.write(soup.prettify())
924 self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
927 logger.error("Error updating Archived Article: %s %s"
928 % (link,traceback.format_exc(),))
930 if postFeedUpdateFunc is not None:
931 postFeedUpdateFunc (self.key, currentTime, None, None, None,
932 *postFeedUpdateFuncArgs)
934 def purgeReadArticles(self):
935 rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
938 self.removeArticle(row[0])
940 def removeArticle(self, id):
941 rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
944 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
951 class Listing(BaseObject):
953 cached_columns = (('feeds', 'updateTime'),
956 ('categories', 'title'))
961 except AttributeError:
962 db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
965 db = property(_getdb)
967 # Lists all the feeds in a dictionary, and expose the data
968 def __init__(self, config, configdir):
970 self.configdir = configdir
972 self.tls = threading.local ()
975 table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
977 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
978 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
979 self.addCategory("Default Category")
980 if isfile(self.configdir+"feeds.pickle"):
981 self.importOldFormatFeeds()
983 self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
985 from string import find, upper
986 if find(upper(table[0]), "WIDGET")<0:
987 self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
988 self.db.execute("UPDATE feeds SET widget=1;")
990 if find(upper(table[0]), "CATEGORY")<0:
991 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
992 self.addCategory("Default Category")
993 self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
994 self.db.execute("UPDATE feeds SET category=1;")
999 # Check that Woodchuck's state is up to date with respect our
1002 updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
1003 wc_init(config, self, True if updater else False)
1004 if wc().available() and updater:
1005 # The list of known streams.
1006 streams = wc().streams_list ()
1007 stream_ids = [s.identifier for s in streams]
1009 # Register any unknown streams. Remove known streams from
1011 for key in self.getListOfFeeds():
1012 title = self.getFeedTitle(key)
1013 # XXX: We should also check whether the list of
1014 # articles/objects in each feed/stream is up to date.
1015 if key not in stream_ids:
1017 "Registering previously unknown channel: %s (%s)"
1019 wc().stream_register(
1021 self.config.getUpdateInterval() * 60 * 60)
1023 # Make sure the human readable name is up to date.
1024 if wc()[key].human_readable_name != title:
1025 wc()[key].human_readable_name = title
1026 stream_ids.remove (key)
1027 wc()[key].freshness \
1028 = self.config.getUpdateInterval() * 60 * 60
1031 # Unregister any streams that are no longer subscribed to.
1032 for id in stream_ids:
1033 logger.debug("Unregistering %s" % (id,))
1034 w.stream_unregister (id)
1036 logger.exception("Registering streams with Woodchuck")
1038 def importOldFormatFeeds(self):
1039 """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
1041 listing = rss.Listing(self.configdir)
1043 for id in listing.getListOfFeeds():
1046 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
1047 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
1050 feed = listing.getFeed(id)
1051 new_feed = self.getFeed(id)
1053 items = feed.getIds()[:]
1056 if feed.isEntryRead(item):
1060 date = timegm(feed.getDateTuple(item))
1061 title = feed.getTitle(item)
1062 newId = new_feed.generateUniqueId({"date":date, "title":title})
1063 values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
1064 new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
1065 new_feed.db.commit()
1067 images = feed.getImages(item)
1068 for image in images:
1069 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
1070 new_feed.db.commit()
1073 self.updateUnread(id)
1075 logger.error("importOldFormatFeeds: %s"
1076 % (traceback.format_exc(),))
1077 remove(self.configdir+"feeds.pickle")
1080 def addArchivedArticle(self, key, index):
1081 feed = self.getFeed(key)
1082 title = feed.getTitle(index)
1083 link = feed.getExternalLink(index)
1084 date = feed.getDate(index)
1085 count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
1087 self.addFeed("Archived Articles", "", id="ArchivedArticles")
1089 archFeed = self.getFeed("ArchivedArticles")
1090 archFeed.addArchivedArticle(title, link, date, self.configdir)
1091 self.updateUnread("ArchivedArticles")
1093 def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
1095 if expiryTime is None:
1096 expiryTime = self.config.getExpiry()
1098 # Default to 24 hours
1101 (use_proxy, proxy) = self.config.getProxy()
1104 if imageCache is None:
1105 imageCache = self.config.getImageCache()
1107 feed = self.getFeed(key)
1108 (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
1110 modified = time.struct_time(eval(modified))
1114 self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
1115 priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
1117 def _queuePostFeedUpdate(self, *args, **kwargs):
1118 mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
1120 def _postFeedUpdate(self, key, updateTime, etag, modified, title):
1124 modified=str(tuple(modified))
1126 self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
1128 self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
1130 if title is not None:
1131 self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
1134 self.cache_invalidate('feeds')
1135 self.updateUnread(key)
1137 update_server_object().ArticleCountUpdated()
1139 stats = JobManager().stats()
1140 global jobs_at_start
1141 completed = stats['jobs-completed'] - jobs_at_start
1142 in_progress = stats['jobs-in-progress']
1143 queued = stats['jobs-queued']
1146 percent = (100 * ((completed + in_progress / 2.))
1147 / (completed + in_progress + queued))
1148 except ZeroDivisionError:
1151 update_server_object().UpdateProgress(
1152 percent, completed, in_progress, queued, 0, 0, 0, key)
1154 if in_progress == 0 and queued == 0:
1155 jobs_at_start = stats['jobs-completed']
1157 def getFeed(self, key):
1158 if key == "ArchivedArticles":
1159 return ArchivedArticles(self.configdir, key)
1160 return Feed(self.configdir, key)
1162 def editFeed(self, key, title, url, category=None):
1164 self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
1166 self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
1168 self.cache_invalidate('feeds')
1170 if wc().available():
1172 wc()[key].human_readable_name = title
1174 logger.debug("Feed %s (%s) unknown." % (key, title))
1176 def getFeedUpdateTime(self, key):
1177 update_time = self.lookup('feeds', 'updateTime', key)
1182 delta = time.time() - update_time
1184 delta_hours = delta / (60. * 60.)
1185 if delta_hours < .1:
1186 return "A few minutes ago"
1187 if delta_hours < .75:
1188 return "Less than an hour ago"
1189 if delta_hours < 1.5:
1190 return "About an hour ago"
1191 if delta_hours < 18:
1192 return "About %d hours ago" % (int(delta_hours + 0.5),)
1194 delta_days = delta_hours / 24.
1195 if delta_days < 1.5:
1196 return "About a day ago"
1198 return "%d days ago" % (int(delta_days + 0.5),)
1200 delta_weeks = delta_days / 7.
1201 if delta_weeks <= 8:
1202 return "%d weeks ago" % int(delta_weeks + 0.5)
1204 delta_months = delta_days / 30.
1205 if delta_months <= 30:
1206 return "%d months ago" % int(delta_months + 0.5)
1208 return time.strftime("%x", time.gmtime(update_time))
1210 def getFeedNumberOfUnreadItems(self, key):
1211 return self.lookup('feeds', 'unread', key)
1213 def getFeedTitle(self, key):
1214 title = self.lookup('feeds', 'title', key)
1218 return self.getFeedUrl(key)
1220 def getFeedUrl(self, key):
1221 return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1223 def getFeedCategory(self, key):
1224 return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1226 def getListOfFeeds(self, category=None):
1228 rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
1230 rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
1237 def getListOfCategories(self):
1238 return list(row[0] for row in self.db.execute(
1239 "SELECT id FROM categories ORDER BY rank;"))
1241 def getCategoryTitle(self, id):
1242 return self.lookup('categories', 'title', id)
1244 def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
1245 if order == "Most unread":
1246 tmp = "ORDER BY unread DESC"
1247 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
1248 elif order == "Least unread":
1249 tmp = "ORDER BY unread"
1250 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
1251 elif order == "Most recent":
1252 tmp = "ORDER BY updateTime DESC"
1253 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
1254 elif order == "Least recent":
1255 tmp = "ORDER BY updateTime"
1256 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
1257 else: # order == "Manual" or invalid value...
1258 tmp = "ORDER BY rank"
1259 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
1261 sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp
1263 sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
1264 rows = self.db.execute(sql)
1271 def getFavicon(self, key):
1272 filename = "%s%s.d/favicon.ico" % (self.configdir, key)
1273 if isfile(filename):
1278 def updateUnread(self, key):
1279 feed = self.getFeed(key)
1280 self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
1282 self.cache_invalidate('feeds')
1284 def addFeed(self, title, url, id=None, category=1):
1287 count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
1289 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1290 if max_rank == None:
1292 values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
1293 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
1295 # Ask for the feed object, it will create the necessary tables
1298 if wc().available():
1299 # Register the stream with Woodchuck. Update approximately
1301 wc().stream_register(stream_identifier=id,
1302 human_readable_name=title,
1309 def addCategory(self, title):
1310 rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
1313 id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
1316 self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
1319 def removeFeed(self, key):
1320 if wc().available ():
1323 except KeyError, woodchuck.Error:
1324 logger.debug("Removing unregistered feed %s failed" % (key,))
1326 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
1327 self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
1328 self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
1331 if isdir(self.configdir+key+".d/"):
1332 rmtree(self.configdir+key+".d/")
1334 def removeCategory(self, key):
1335 if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
1336 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
1337 self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
1338 self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
1339 self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
1342 #def saveConfig(self):
1343 # self.listOfFeeds["feedingit-order"] = self.sortedKeys
1344 # file = open(self.configdir+"feeds.pickle", "w")
1345 # pickle.dump(self.listOfFeeds, file)
1348 def moveUp(self, key):
1349 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1351 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
1352 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
1355 def moveCategoryUp(self, key):
1356 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1358 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
1359 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
1362 def moveDown(self, key):
1363 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1364 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1366 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
1367 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
1370 def moveCategoryDown(self, key):
1371 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1372 max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
1374 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
1375 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )