1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # Copyright (c) 2011 Neal H. Walfield
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Lesser General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU Lesser General Public License for more details.
16 # You should have received a copy of the GNU Lesser General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 # ============================================================================
22 # Author : Yves Marcoz
24 # Description : Simple RSS Reader
25 # ============================================================================
27 from __future__ import with_statement
30 from os.path import isfile, isdir
31 from shutil import rmtree
32 from os import mkdir, remove, utime
38 from BeautifulSoup import BeautifulSoup
39 from urlparse import urljoin
40 from calendar import timegm
43 from wc import wc, wc_init, woodchuck
46 from updatedbus import update_server_object
48 from jobmanager import JobManager
50 from httpprogresshandler import HTTPProgressHandler
54 logger = logging.getLogger(__name__)
57 if issubclass(string.__class__, unicode):
58 string = string.encode('utf8', 'replace')
60 return md5.new(string).hexdigest()
62 def download_callback(connection):
63 if JobManager().do_quit:
64 raise KeyboardInterrupt
66 def downloader(progress_handler=None, proxy=None):
69 if progress_handler is not None:
70 openers.append(progress_handler)
72 openers.append(HTTPProgressHandler(download_callback))
77 return urllib2.build_opener(*openers)
79 def transfer_stats(sent, received, **kwargs):
81 This function takes two arguments: sent is the number of bytes
82 sent so far, received is the number of bytes received. The
83 function returns a continuation that you can call later.
85 The continuation takes the same two arguments. It returns a tuple
86 of the number of bytes sent, the number of bytes received and the
87 time since the original function was invoked.
89 start_time = time.time()
91 start_received = received
93 def e(sent, received, **kwargs):
94 return (sent - start_sent,
95 received - start_received,
96 time.time() - start_time)
100 # If not None, a subprocess.Popen object corresponding to a
101 # update_feeds.py process.
102 update_feed_process = None
104 update_feeds_iface = None
108 class BaseObject(object):
109 # Columns to cache. Classes that inherit from this and use the
110 # cache mechanism should set this to a list of tuples, each of
111 # which contains two entries: the table and the column. Note that
112 # both are case sensitive.
115 def cache_invalidate(self, table=None):
117 Invalidate the cache.
119 If table is not None, invalidate only the specified table.
120 Otherwise, drop the whole cache.
122 if not hasattr(self, 'cache'):
128 if table in self.cache:
129 del self.cache[table]
131 def lookup(self, table, column, id=None):
133 Look up a column or value. Uses a cache for columns in
134 cached_columns. Note: the column is returned unsorted.
136 if not hasattr(self, 'cache'):
139 # Cache data for at most 60 seconds.
142 cache = self.cache[table]
144 if time.time() - cache[None] > 60:
145 # logger.debug("%s: Cache too old: clearing" % (table,))
146 del self.cache[table]
152 or (table, column) not in self.cached_columns):
153 # The cache is empty or the caller wants a column that we
155 if (table, column) in self.cached_columns:
156 # logger.debug("%s: Rebuilding cache" % (table,))
160 self.cache[table] = cache = {}
162 for t, c in self.cached_columns:
174 where = "where id = '%s'" % id
178 results = self.db.execute(
179 "SELECT %s FROM %s %s" % (','.join(columns), table, where))
185 for index, value in enumerate(values):
186 cache[columns[index]][i] = value
195 results.append(values[0])
199 cache = self.cache[table]
203 value = cache[column][id]
204 # logger.debug("%s.%s:%s -> %s" % (table, column, id, value))
207 return cache[column].values()
209 # logger.debug("%s.%s:%s -> Not found" % (table, column, id))
212 class Feed(BaseObject):
214 cached_columns = (('feed', 'read'),
217 serial_execution_lock = threading.Lock()
222 except AttributeError:
223 db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
226 db = property(_getdb)
228 def __init__(self, configdir, key):
230 self.configdir = configdir
231 self.dir = "%s/%s.d" %(self.configdir, self.key)
232 self.tls = threading.local()
234 if not isdir(self.dir):
236 filename = "%s/%s.db" % (self.dir, self.key)
237 if not isfile(filename):
238 self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, contentHash text, date float, updated float, link text, read int);")
239 self.db.execute("CREATE TABLE images (id text, imagePath text);")
243 self.db.execute("ALTER TABLE feed ADD COLUMN contentHash text")
245 except sqlite3.OperationalError, e:
246 if 'duplicate column name' in str(e):
249 logger.exception("Add column contentHash to %s", filename)
251 def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
252 filename = configdir+key+".d/"+getId(url)
253 if not isfile(filename):
256 opener = downloader(proxy=proxy)
258 abs_url = urljoin(baseurl,url)
259 f = opener.open(abs_url)
261 with open(filename, "w") as outf:
266 except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
267 logger.info("Could not download image %s: %s"
268 % (abs_url, str (exception)))
271 exception = sys.exc_info()[0]
273 logger.info("Downloading image %s: %s" %
274 (abs_url, traceback.format_exc()))
282 #open(filename,"a").close() # "Touch" the file
283 file = open(filename,"a")
284 utime(filename, None)
288 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
289 if (os.path.basename(sys.argv[0]) == 'update_feeds.py'):
292 self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
294 JobManager().execute(doit(), self.key, priority=priority)
296 def send_update_request():
297 global update_feeds_iface
298 if update_feeds_iface is None:
299 bus=dbus.SessionBus()
300 remote_object = bus.get_object(
301 "org.marcoz.feedingit", # Connection name
302 "/org/marcoz/feedingit/update" # Object's path
304 update_feeds_iface = dbus.Interface(
305 remote_object, 'org.marcoz.feedingit')
308 update_feeds_iface.Update(self.key)
310 logger.error("Invoking org.marcoz.feedingit.Update: %s"
312 update_feeds_iface = None
316 if send_update_request():
317 # Success! It seems we were able to start the update
318 # daemon via dbus (or, it was already running).
321 global update_feed_process
322 if (update_feed_process is None
323 or update_feed_process.poll() is not None):
324 # The update_feeds process is not running. Start it.
325 update_feeds = os.path.join(os.path.dirname(__file__),
327 argv = ['/usr/bin/env', 'python', update_feeds, '--daemon' ]
328 logger.debug("Starting update_feeds: running %s"
330 update_feed_process = subprocess.Popen(argv)
331 # Make sure the dbus calls go to the right process:
333 update_feeds_iface = None
336 if send_update_request():
340 def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
341 logger.debug("Updating %s" % url)
344 have_serial_execution_lock = False
346 update_start = time.time ()
348 progress_handler = HTTPProgressHandler(download_callback)
350 openers = [progress_handler]
352 openers.append (proxy)
353 kwargs = {'handlers':openers}
355 feed_transfer_stats = transfer_stats(0, 0)
357 tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
358 download_duration = time.time () - update_start
360 opener = downloader(progress_handler, proxy)
362 if JobManager().do_quit:
363 raise KeyboardInterrupt
365 process_start = time.time()
367 # Expiry time is in hours
368 expiry = float(expiryTime) * 3600.
377 wc().stream_register (self.key, "", 6 * 60 * 60)
378 except woodchuck.ObjectExistsError:
381 wc()[self.key].updated (
382 indicator=(woodchuck.Indicator.ApplicationVisual
383 |woodchuck.Indicator.StreamWide),
384 transferred_down=progress_handler.stats['received'],
385 transferred_up=progress_handler.stats['sent'],
386 transfer_time=update_start,
387 transfer_duration=download_duration,
388 new_objects=new_objects,
389 updated_objects=updated_objects,
390 objects_inline=new_objects + updated_objects)
393 "Failed to register update of %s with woodchuck!"
396 http_status = tmp.get ('status', 200)
398 # Check if the parse was succesful. If the http status code
399 # is 304, then the download was successful, but there is
400 # nothing new. Indeed, no content is returned. This make a
401 # 304 look like an error because there are no entries and the
402 # parse fails. But really, everything went great! Check for
404 if http_status == 304:
405 logger.debug("%s: No changes to feed." % (self.key,))
406 mainthread.execute(wc_success, async=True)
408 elif len(tmp["entries"])==0 and not tmp.get('version', None):
409 # An error occured fetching or parsing the feed. (Version
410 # will be either None if e.g. the connection timed our or
411 # '' if the data is not a proper feed)
413 "Error fetching %s: version is: %s: error: %s"
414 % (url, str (tmp.get('version', 'unset')),
415 str (tmp.get ('bozo_exception', 'Unknown error'))))
417 def register_stream_update_failed(http_status):
419 logger.debug("%s: stream update failed!" % self.key)
422 # It's not easy to get the feed's title from here.
423 # At the latest, the next time the application is
424 # started, we'll fix up the human readable name.
425 wc().stream_register (self.key, "", 6 * 60 * 60)
426 except woodchuck.ObjectExistsError:
428 ec = woodchuck.TransferStatus.TransientOther
429 if 300 <= http_status and http_status < 400:
430 ec = woodchuck.TransferStatus.TransientNetwork
431 if 400 <= http_status and http_status < 500:
432 ec = woodchuck.TransferStatus.FailureGone
433 if 500 <= http_status and http_status < 600:
434 ec = woodchuck.TransferStatus.TransientNetwork
435 wc()[self.key].update_failed(ec)
439 register_stream_update_failed(
440 http_status=http_status),
443 currentTime = time.time()
444 # The etag and modified value should only be updated if the content was not null
450 modified = tmp["modified"]
454 abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
455 f = opener.open(abs_url)
458 outf = open(self.dir+"/favicon.ico", "w")
462 except (urllib2.HTTPError, urllib2.URLError), exception:
463 logger.debug("Could not download favicon %s: %s"
464 % (abs_url, str (exception)))
466 self.serial_execution_lock.acquire ()
467 have_serial_execution_lock = True
469 #reversedEntries = self.getEntries()
470 #reversedEntries.reverse()
472 tmp["entries"].reverse()
473 for entry in tmp["entries"]:
474 # Yield so as to make the main thread a bit more
478 entry_transfer_stats = transfer_stats(
479 *feed_transfer_stats(**progress_handler.stats)[0:2])
481 if JobManager().do_quit:
482 raise KeyboardInterrupt
486 date = self.extractDate(entry)
490 entry["title"] = "No Title"
498 entry["author"] = None
499 if(not(entry.has_key("id"))):
501 content = self.extractContent(entry)
502 contentHash = getId(content)
503 object_size = len (content)
504 tmpEntry = {"title":entry["title"], "content":content,
505 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
506 id = self.generateUniqueId(tmpEntry)
508 current_version = self.db.execute(
509 'select date, ROWID, contentHash from feed where id=?',
511 if (current_version is not None
512 # To detect updates, don't compare by date:
513 # compare by content.
515 # - If an article update is just a date change
516 # and the content remains the same, we don't
517 # want to register an update.
519 # - If an article's content changes but not the
520 # date, we want to recognize an update.
521 and current_version[2] == contentHash):
522 logger.debug("ALREADY DOWNLOADED %s (%s)"
523 % (entry["title"], entry["link"]))
524 ## This article is already present in the feed listing. Update the "updated" time, so it doesn't expire
525 self.db.execute("UPDATE feed SET updated=? WHERE id=?;",(currentTime,id))
527 logger.debug("Updating already downloaded files for %s" %(id))
528 filename = configdir+self.key+".d/"+id+".html"
529 file = open(filename,"a")
530 utime(filename, None)
532 images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
534 file = open(image[0],"a")
535 utime(image[0], None)
538 logger.debug("Error in refreshing images for %s" % (id))
542 if current_version is not None:
543 # The version was updated. Mark it as unread.
544 logger.debug("UPDATED: %s (%s)"
545 % (entry["title"], entry["link"]))
548 logger.debug("NEW: %s (%s)"
549 % (entry["title"], entry["link"]))
552 #articleTime = time.mktime(self.entries[id]["dateTuple"])
553 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
555 baseurl = tmpEntry["link"]
556 if imageCache and len(images) > 0:
557 self.serial_execution_lock.release ()
558 have_serial_execution_lock = False
560 if not img.has_key('src'):
563 filename = self.addImage(
564 configdir, self.key, baseurl, img['src'],
567 img['src']="file://%s" %filename
568 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
570 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
574 object_size += os.path.getsize (filename)
575 except os.error, exception:
576 logger.error ("Error getting size of %s: %s"
577 % (filename, exception))
578 self.serial_execution_lock.acquire ()
579 have_serial_execution_lock = True
581 tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
582 file = open(tmpEntry["contentLink"], "w")
583 file.write(soup.prettify())
587 'title': tmpEntry["title"],
588 'contentLink': tmpEntry["contentLink"],
589 'contentHash': contentHash,
590 'date': tmpEntry["date"],
591 'updated': currentTime,
592 'link': tmpEntry["link"],
595 if current_version is not None:
596 # This is an update. Ensure that the existing
598 values['ROWID'] = current_version[1]
600 cols, values = zip(*values.items())
602 "INSERT OR REPLACE INTO feed (%s) VALUES (%s);"
603 % (','.join(cols), ','.join(('?',) * len(values))),
607 # Register the object with Woodchuck and mark it as
609 def register_object_transferred(
610 id, title, publication_time,
611 sent, received, object_size):
613 logger.debug("Registering transfer of object %s"
616 obj = wc()[self.key].object_register(
617 object_identifier=id,
618 human_readable_name=title)
619 except woodchuck.ObjectExistsError:
620 obj = wc()[self.key][id]
622 obj.publication_time = publication_time
625 woodchuck.Indicator.ApplicationVisual
626 |woodchuck.Indicator.StreamWide),
627 transferred_down=received,
629 object_size=object_size)
632 # If the entry does not contain a publication
633 # time, the attribute won't exist.
634 pubtime = entry.get('date_parsed', None)
636 publication_time = time.mktime (pubtime)
638 publication_time = None
641 = entry_transfer_stats(**progress_handler.stats)
642 # sent and received are for objects (in
643 # particular, images) associated with this
644 # item. We also want to attribute the data
645 # transferred for the item's content. This is
646 # a good first approximation.
647 received += len(content)
650 register_object_transferred(
652 title=tmpEntry["title"],
653 publication_time=publication_time,
654 sent=sent, received=received,
655 object_size=object_size),
660 = feed_transfer_stats(**progress_handler.stats)
662 "%s: Update successful: transferred: %d/%d; objects: %d)"
663 % (url, sent, received, len (tmp.entries)))
664 mainthread.execute (wc_success, async=True)
667 rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
669 self.removeEntry(row[0])
671 from glob import glob
673 for file in glob(configdir+self.key+".d/*"):
677 # put the two dates into matching format
679 lastmodDate = stats[8]
681 expDate = time.time()-expiry*3
682 # check if image-last-modified-date is outdated
684 if expDate > lastmodDate:
688 #print 'Removing', file
690 # XXX: Tell woodchuck.
691 remove(file) # commented out for testing
693 except OSError, exception:
695 logger.error('Could not remove %s: %s'
696 % (file, str (exception)))
697 logger.debug("updated %s: %fs in download, %fs in processing"
698 % (self.key, download_duration,
699 time.time () - process_start))
701 logger.error("Updating %s: %s" % (self.key, traceback.format_exc()))
705 if have_serial_execution_lock:
706 self.serial_execution_lock.release ()
710 rows = self.db.execute("SELECT MAX(date) FROM feed;")
714 logger.error("Fetching update time: %s: %s"
715 % (str(e), traceback.format_exc()))
722 title = tmp.feed.title
723 except (AttributeError, UnboundLocalError), exception:
725 if postFeedUpdateFunc is not None:
726 postFeedUpdateFunc (self.key, updateTime, etag, modified,
727 title, *postFeedUpdateFuncArgs)
729 self.cache_invalidate()
731 def setEntryRead(self, id):
732 self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
737 wc()[self.key][id].used()
741 mainthread.execute(doit, async=True)
742 self.cache_invalidate('feed')
744 def setEntryUnread(self, id):
745 self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
747 self.cache_invalidate('feed')
749 def markAllAsRead(self):
750 self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
752 self.cache_invalidate('feed')
754 def isEntryRead(self, id):
755 return self.lookup('feed', 'read', id) == 1
757 def getTitle(self, id):
758 return self.lookup('feed', 'title', id)
760 def getContentLink(self, id):
761 return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
763 def getContentHash(self, id):
764 return self.db.execute("SELECT contentHash FROM feed WHERE id=?;", (id,) ).fetchone()[0]
766 def getExternalLink(self, id):
767 return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
769 def getDate(self, id):
770 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
771 return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
773 def getDateTuple(self, id):
774 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
775 return time.localtime(dateStamp)
777 def getDateStamp(self, id):
778 return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
780 def generateUniqueId(self, entry):
782 Generate a stable identifier for the article. For the same
783 entry, this should result in the same identifier. If
784 possible, the identifier should remain the same even if the
787 # Prefer the entry's id, which is supposed to be globally
789 key = entry.get('id', None)
791 # Next, try the link to the content.
792 key = entry.get('link', None)
794 # Ok, the title and the date concatenated are likely to be
796 key = entry.get('title', None) + entry.get('date', None)
798 # Hmm, the article's content will at least guarantee no
799 # false negatives (i.e., missing articles)
800 key = entry.get('content', None)
802 # If all else fails, just use a random number.
803 key = str (random.random ())
806 def getIds(self, onlyUnread=False):
808 rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
810 rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
817 def getNextId(self, id, forward=True):
823 index = ids.index(id)
824 return ids[(index + delta) % len(ids)]
826 def getPreviousId(self, id):
827 return self.getNextId(id, forward=False)
829 def getNumberOfUnreadItems(self):
830 return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
832 def getNumberOfEntries(self):
833 return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
835 def getArticle(self, entry):
836 #self.setEntryRead(id)
837 #entry = self.entries[id]
838 title = entry['title']
839 #content = entry.get('content', entry.get('summary_detail', {}))
840 content = entry["content"]
843 author = entry['author']
844 date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
846 #text = '''<div style="color: black; background-color: white;">'''
847 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
848 text += "<html><head><title>" + title + "</title>"
849 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
850 #text += '<style> body {-webkit-user-select: none;} </style>'
851 text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
853 text += "<BR /><small><i>Author: " + author + "</i></small>"
854 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
855 text += "<BR /><BR />"
857 text += "</body></html>"
860 def getContent(self, id):
862 Return the content of the article with the specified ID. If
863 the content is not available, returns None.
865 contentLink = self.getContentLink(id)
867 with open(contentLink, 'r') as file:
868 content = file.read()
870 logger.exception("Failed get content for %s: reading %s failed",
875 def extractDate(self, entry):
876 if entry.has_key("updated_parsed"):
877 return timegm(entry["updated_parsed"])
878 elif entry.has_key("published_parsed"):
879 return timegm(entry["published_parsed"])
883 def extractContent(self, entry):
885 if entry.has_key('summary'):
886 content = entry.get('summary', '')
887 if entry.has_key('content'):
888 if len(entry.content[0].value) > len(content):
889 content = entry.content[0].value
891 content = entry.get('description', '')
894 def removeEntry(self, id):
895 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
899 except OSError, exception:
900 logger.error("Deleting %s: %s" % (contentLink, str (exception)))
901 self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
902 self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
907 wc()[self.key][id].files_deleted (
908 woodchuck.DeletionResponse.Deleted)
909 del wc()[self.key][id]
913 mainthread.execute (doit, async=True)
915 class ArchivedArticles(Feed):
916 def addArchivedArticle(self, title, link, date, configdir):
917 id = self.generateUniqueId({"date":date, "title":title})
918 values = (id, title, link, date, 0, link, 0)
919 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
922 # Feed.UpdateFeed calls this function.
923 def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
925 rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
928 currentTime = time.time()
931 f = urllib2.urlopen(link)
932 #entry["content"] = f.read()
935 soup = BeautifulSoup(html)
939 filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
941 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
943 contentLink = configdir+self.key+".d/"+id+".html"
944 file = open(contentLink, "w")
945 file.write(soup.prettify())
948 self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
951 logger.error("Error updating Archived Article: %s %s"
952 % (link,traceback.format_exc(),))
954 if postFeedUpdateFunc is not None:
955 postFeedUpdateFunc (self.key, currentTime, None, None, None,
956 *postFeedUpdateFuncArgs)
958 def purgeReadArticles(self):
959 rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
962 self.removeArticle(row[0])
964 def removeArticle(self, id):
965 rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
968 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
975 class Listing(BaseObject):
977 cached_columns = (('feeds', 'updateTime'),
980 ('categories', 'title'))
985 except AttributeError:
986 db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
989 db = property(_getdb)
991 # Lists all the feeds in a dictionary, and expose the data
992 def __init__(self, config, configdir):
994 self.configdir = configdir
996 self.tls = threading.local ()
999 table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
1001 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
1002 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
1003 self.addCategory("Default Category")
1004 if isfile(self.configdir+"feeds.pickle"):
1005 self.importOldFormatFeeds()
1007 self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
1009 from string import find, upper
1010 if find(upper(table[0]), "WIDGET")<0:
1011 self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
1012 self.db.execute("UPDATE feeds SET widget=1;")
1014 if find(upper(table[0]), "CATEGORY")<0:
1015 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
1016 self.addCategory("Default Category")
1017 self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
1018 self.db.execute("UPDATE feeds SET category=1;")
1023 # Check that Woodchuck's state is up to date with respect our
1026 updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
1027 wc_init(config, self, True if updater else False)
1028 if wc().available() and updater:
1029 # The list of known streams.
1030 streams = wc().streams_list ()
1031 stream_ids = [s.identifier for s in streams]
1033 # Register any unknown streams. Remove known streams from
1035 for key in self.getListOfFeeds():
1036 title = self.getFeedTitle(key)
1037 # XXX: We should also check whether the list of
1038 # articles/objects in each feed/stream is up to date.
1039 if key not in stream_ids:
1041 "Registering previously unknown channel: %s (%s)"
1043 wc().stream_register(
1045 self.config.getUpdateInterval() * 60 * 60)
1047 # Make sure the human readable name is up to date.
1048 if wc()[key].human_readable_name != title:
1049 wc()[key].human_readable_name = title
1050 stream_ids.remove (key)
1051 wc()[key].freshness \
1052 = self.config.getUpdateInterval() * 60 * 60
1055 # Unregister any streams that are no longer subscribed to.
1056 for id in stream_ids:
1057 logger.debug("Unregistering %s" % (id,))
1058 w.stream_unregister (id)
1060 logger.exception("Registering streams with Woodchuck")
1062 def importOldFormatFeeds(self):
1063 """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
1065 listing = rss.Listing(self.configdir)
1067 for id in listing.getListOfFeeds():
1070 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
1071 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
1074 feed = listing.getFeed(id)
1075 new_feed = self.getFeed(id)
1077 items = feed.getIds()[:]
1080 if feed.isEntryRead(item):
1084 date = timegm(feed.getDateTuple(item))
1085 title = feed.getTitle(item)
1086 newId = new_feed.generateUniqueId({"date":date, "title":title})
1087 values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
1088 new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
1089 new_feed.db.commit()
1091 images = feed.getImages(item)
1092 for image in images:
1093 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
1094 new_feed.db.commit()
1097 self.updateUnread(id)
1099 logger.error("importOldFormatFeeds: %s"
1100 % (traceback.format_exc(),))
1101 remove(self.configdir+"feeds.pickle")
1104 def addArchivedArticle(self, key, index):
1105 feed = self.getFeed(key)
1106 title = feed.getTitle(index)
1107 link = feed.getExternalLink(index)
1108 date = feed.getDate(index)
1109 count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
1111 self.addFeed("Archived Articles", "", id="ArchivedArticles")
1113 archFeed = self.getFeed("ArchivedArticles")
1114 archFeed.addArchivedArticle(title, link, date, self.configdir)
1115 self.updateUnread("ArchivedArticles")
1117 def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
1119 if expiryTime is None:
1120 expiryTime = self.config.getExpiry()
1122 # Default to 24 hours
1125 (use_proxy, proxy) = self.config.getProxy()
1128 if imageCache is None:
1129 imageCache = self.config.getImageCache()
1131 feed = self.getFeed(key)
1132 (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
1134 modified = time.struct_time(eval(modified))
1138 self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
1139 priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
1141 def _queuePostFeedUpdate(self, *args, **kwargs):
1142 mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
1144 def _postFeedUpdate(self, key, updateTime, etag, modified, title):
1148 modified=str(tuple(modified))
1150 self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
1152 self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
1154 if title is not None:
1155 self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
1158 self.cache_invalidate('feeds')
1159 self.updateUnread(key)
1161 update_server_object().ArticleCountUpdated()
1163 stats = JobManager().stats()
1164 global jobs_at_start
1165 completed = stats['jobs-completed'] - jobs_at_start
1166 in_progress = stats['jobs-in-progress']
1167 queued = stats['jobs-queued']
1170 percent = (100 * ((completed + in_progress / 2.))
1171 / (completed + in_progress + queued))
1172 except ZeroDivisionError:
1175 update_server_object().UpdateProgress(
1176 percent, completed, in_progress, queued, 0, 0, 0, key)
1178 if in_progress == 0 and queued == 0:
1179 jobs_at_start = stats['jobs-completed']
1181 def getFeed(self, key):
1182 if key == "ArchivedArticles":
1183 return ArchivedArticles(self.configdir, key)
1184 return Feed(self.configdir, key)
1186 def editFeed(self, key, title, url, category=None):
1188 self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
1190 self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
1192 self.cache_invalidate('feeds')
1194 if wc().available():
1196 wc()[key].human_readable_name = title
1198 logger.debug("Feed %s (%s) unknown." % (key, title))
1200 def getFeedUpdateTime(self, key):
1201 update_time = self.lookup('feeds', 'updateTime', key)
1206 delta = time.time() - update_time
1208 delta_hours = delta / (60. * 60.)
1209 if delta_hours < .1:
1210 return "A few minutes ago"
1211 if delta_hours < .75:
1212 return "Less than an hour ago"
1213 if delta_hours < 1.5:
1214 return "About an hour ago"
1215 if delta_hours < 18:
1216 return "About %d hours ago" % (int(delta_hours + 0.5),)
1218 delta_days = delta_hours / 24.
1219 if delta_days < 1.5:
1220 return "About a day ago"
1222 return "%d days ago" % (int(delta_days + 0.5),)
1224 delta_weeks = delta_days / 7.
1225 if delta_weeks <= 8:
1226 return "%d weeks ago" % int(delta_weeks + 0.5)
1228 delta_months = delta_days / 30.
1229 if delta_months <= 30:
1230 return "%d months ago" % int(delta_months + 0.5)
1232 return time.strftime("%x", time.gmtime(update_time))
1234 def getFeedNumberOfUnreadItems(self, key):
1235 return self.lookup('feeds', 'unread', key)
1237 def getFeedTitle(self, key):
1238 title = self.lookup('feeds', 'title', key)
1242 return self.getFeedUrl(key)
1244 def getFeedUrl(self, key):
1245 return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1247 def getFeedCategory(self, key):
1248 return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1250 def getListOfFeeds(self, category=None):
1252 rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
1254 rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
1261 def getListOfCategories(self):
1262 return list(row[0] for row in self.db.execute(
1263 "SELECT id FROM categories ORDER BY rank;"))
1265 def getCategoryTitle(self, id):
1266 return self.lookup('categories', 'title', id)
1268 def getCategoryUnread(self, id):
1270 for key in self.getListOfFeeds(category=id):
1272 count = count + self.getFeedNumberOfUnreadItems(key)
1277 def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
1278 if order == "Most unread":
1279 tmp = "ORDER BY unread DESC"
1280 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
1281 elif order == "Least unread":
1282 tmp = "ORDER BY unread"
1283 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
1284 elif order == "Most recent":
1285 tmp = "ORDER BY updateTime DESC"
1286 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
1287 elif order == "Least recent":
1288 tmp = "ORDER BY updateTime"
1289 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
1290 else: # order == "Manual" or invalid value...
1291 tmp = "ORDER BY rank"
1292 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
1294 sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp
1296 sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
1297 rows = self.db.execute(sql)
1304 def getFavicon(self, key):
1305 filename = "%s%s.d/favicon.ico" % (self.configdir, key)
1306 if isfile(filename):
1311 def updateUnread(self, key):
1312 feed = self.getFeed(key)
1313 self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
1315 self.cache_invalidate('feeds')
1317 def addFeed(self, title, url, id=None, category=1):
1320 count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
1322 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1323 if max_rank == None:
1325 values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
1326 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
1328 # Ask for the feed object, it will create the necessary tables
1331 if wc().available():
1332 # Register the stream with Woodchuck. Update approximately
1334 wc().stream_register(stream_identifier=id,
1335 human_readable_name=title,
1338 self.cache_invalidate('feeds')
1343 def addCategory(self, title):
1344 rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
1347 id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
1350 self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
1352 self.cache_invalidate('categories')
1354 def removeFeed(self, key):
1355 if wc().available ():
1358 except KeyError, woodchuck.Error:
1359 logger.debug("Removing unregistered feed %s failed" % (key,))
1361 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
1362 self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
1363 self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
1366 if isdir(self.configdir+key+".d/"):
1367 rmtree(self.configdir+key+".d/")
1368 self.cache_invalidate('feeds')
1370 def removeCategory(self, key):
1371 if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
1372 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
1373 self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
1374 self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
1375 self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
1377 self.cache_invalidate('categories')
1379 #def saveConfig(self):
1380 # self.listOfFeeds["feedingit-order"] = self.sortedKeys
1381 # file = open(self.configdir+"feeds.pickle", "w")
1382 # pickle.dump(self.listOfFeeds, file)
1385 def moveUp(self, key):
1386 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1388 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
1389 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
1392 def moveCategoryUp(self, key):
1393 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1395 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
1396 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
1399 def moveDown(self, key):
1400 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1401 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1403 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
1404 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
1407 def moveCategoryDown(self, key):
1408 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1409 max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
1411 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
1412 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )