1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # Copyright (c) 2011 Neal H. Walfield
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Lesser General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU Lesser General Public License for more details.
16 # You should have received a copy of the GNU Lesser General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 # ============================================================================
22 # Author : Yves Marcoz
24 # Description : Simple RSS Reader
25 # ============================================================================
27 from __future__ import with_statement
30 from os.path import isfile, isdir
31 from shutil import rmtree
32 from os import mkdir, remove, utime
38 from BeautifulSoup import BeautifulSoup
39 from urlparse import urljoin
40 from calendar import timegm
43 from wc import wc, wc_init, woodchuck
46 from updatedbus import update_server_object
48 from jobmanager import JobManager
50 from httpprogresshandler import HTTPProgressHandler
54 logger = logging.getLogger(__name__)
57 if issubclass(string.__class__, unicode):
58 string = string.encode('utf8', 'replace')
60 return md5.new(string).hexdigest()
62 def download_callback(connection):
63 if JobManager().do_quit:
64 raise KeyboardInterrupt
66 def downloader(progress_handler=None, proxy=None):
69 if progress_handler is not None:
70 openers.append(progress_handler)
72 openers.append(HTTPProgressHandler(download_callback))
77 return urllib2.build_opener(*openers)
79 def transfer_stats(sent, received, **kwargs):
81 This function takes two arguments: sent is the number of bytes
82 sent so far, received is the number of bytes received. The
83 function returns a continuation that you can call later.
85 The continuation takes the same two arguments. It returns a tuple
86 of the number of bytes sent, the number of bytes received and the
87 time since the original function was invoked.
89 start_time = time.time()
91 start_received = received
93 def e(sent, received, **kwargs):
94 return (sent - start_sent,
95 received - start_received,
96 time.time() - start_time)
100 # If not None, a subprocess.Popen object corresponding to a
101 # update_feeds.py process.
102 update_feed_process = None
104 update_feeds_iface = None
108 class BaseObject(object):
109 # Columns to cache. Classes that inherit from this and use the
110 # cache mechanism should set this to a list of tuples, each of
111 # which contains two entries: the table and the column. Note that
112 # both are case sensitive.
115 def cache_invalidate(self, table=None):
117 Invalidate the cache.
119 If table is not None, invalidate only the specified table.
120 Otherwise, drop the whole cache.
122 if not hasattr(self, 'cache'):
128 if table in self.cache:
129 del self.cache[table]
131 def lookup(self, table, column, id=None):
133 Look up a column or value. Uses a cache for columns in
134 cached_columns. Note: the column is returned unsorted.
136 if not hasattr(self, 'cache'):
139 # Cache data for at most 60 seconds.
142 cache = self.cache[table]
144 if time.time() - cache[None] > 60:
145 # logger.debug("%s: Cache too old: clearing" % (table,))
146 del self.cache[table]
152 or (table, column) not in self.cached_columns):
153 # The cache is empty or the caller wants a column that we
155 if (table, column) in self.cached_columns:
156 # logger.debug("%s: Rebuilding cache" % (table,))
160 self.cache[table] = cache = {}
162 for t, c in self.cached_columns:
174 where = "where id = '%s'" % id
178 results = self.db.execute(
179 "SELECT %s FROM %s %s" % (','.join(columns), table, where))
185 for index, value in enumerate(values):
186 cache[columns[index]][i] = value
195 results.append(values[0])
199 cache = self.cache[table]
203 value = cache[column][id]
204 # logger.debug("%s.%s:%s -> %s" % (table, column, id, value))
207 return cache[column].values()
209 # logger.debug("%s.%s:%s -> Not found" % (table, column, id))
212 class Feed(BaseObject):
214 cached_columns = (('feed', 'read'),
217 serial_execution_lock = threading.Lock()
222 except AttributeError:
223 db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
226 db = property(_getdb)
228 def __init__(self, configdir, key):
230 self.configdir = configdir
231 self.dir = "%s/%s.d" %(self.configdir, self.key)
232 self.tls = threading.local()
234 if not isdir(self.dir):
236 filename = "%s/%s.db" % (self.dir, self.key)
237 if not isfile(filename):
238 self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, contentHash text, date float, updated float, link text, read int);")
239 self.db.execute("CREATE TABLE images (id text, imagePath text);")
243 self.db.execute("ALTER TABLE feed ADD COLUMN contentHash text")
245 except sqlite3.OperationalError, e:
246 if 'duplicate column name' in str(e):
249 logger.exception("Add column contentHash to %s", filename)
251 def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
252 filename = configdir+key+".d/"+getId(url)
253 if not isfile(filename):
256 opener = downloader(proxy=proxy)
258 abs_url = urljoin(baseurl,url)
259 f = opener.open(abs_url)
261 with open(filename, "w") as outf:
266 except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
267 logger.info("Could not download image %s: %s"
268 % (abs_url, str (exception)))
271 exception = sys.exc_info()[0]
273 logger.info("Downloading image %s: %s" %
274 (abs_url, traceback.format_exc()))
282 #open(filename,"a").close() # "Touch" the file
283 file = open(filename,"a")
284 utime(filename, None)
288 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
289 if (os.path.basename(sys.argv[0]) == 'update_feeds.py'):
292 self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
294 JobManager().execute(doit(), self.key, priority=priority)
296 def send_update_request():
297 global update_feeds_iface
298 if update_feeds_iface is None:
299 bus=dbus.SessionBus()
300 remote_object = bus.get_object(
301 "org.marcoz.feedingit", # Connection name
302 "/org/marcoz/feedingit/update" # Object's path
304 update_feeds_iface = dbus.Interface(
305 remote_object, 'org.marcoz.feedingit')
308 update_feeds_iface.Update(self.key)
310 logger.error("Invoking org.marcoz.feedingit.Update: %s"
312 update_feeds_iface = None
316 if send_update_request():
317 # Success! It seems we were able to start the update
318 # daemon via dbus (or, it was already running).
321 global update_feed_process
322 if (update_feed_process is None
323 or update_feed_process.poll() is not None):
324 # The update_feeds process is not running. Start it.
325 update_feeds = os.path.join(os.path.dirname(__file__),
327 argv = ['/usr/bin/env', 'python', update_feeds, '--daemon' ]
328 logger.debug("Starting update_feeds: running %s"
330 update_feed_process = subprocess.Popen(argv)
331 # Make sure the dbus calls go to the right process:
333 update_feeds_iface = None
336 if send_update_request():
340 def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
341 logger.debug("Updating %s" % url)
344 have_serial_execution_lock = False
346 update_start = time.time ()
348 progress_handler = HTTPProgressHandler(download_callback)
350 openers = [progress_handler]
352 openers.append (proxy)
353 kwargs = {'handlers':openers}
355 feed_transfer_stats = transfer_stats(0, 0)
357 tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
358 download_duration = time.time () - update_start
360 opener = downloader(progress_handler, proxy)
362 if JobManager().do_quit:
363 raise KeyboardInterrupt
365 process_start = time.time()
367 # Expiry time is in hours
368 expiry = float(expiryTime) * 3600.
377 wc().stream_register (self.key, "", 6 * 60 * 60)
378 except woodchuck.ObjectExistsError:
381 wc()[self.key].updated (
382 indicator=(woodchuck.Indicator.ApplicationVisual
383 |woodchuck.Indicator.StreamWide),
384 transferred_down=progress_handler.stats['received'],
385 transferred_up=progress_handler.stats['sent'],
386 transfer_time=update_start,
387 transfer_duration=download_duration,
388 new_objects=new_objects,
389 updated_objects=updated_objects,
390 objects_inline=new_objects + updated_objects)
393 "Failed to register update of %s with woodchuck!"
396 http_status = tmp.get ('status', 200)
398 # Check if the parse was succesful. If the http status code
399 # is 304, then the download was successful, but there is
400 # nothing new. Indeed, no content is returned. This make a
401 # 304 look like an error because there are no entries and the
402 # parse fails. But really, everything went great! Check for
404 if http_status == 304:
405 logger.debug("%s: No changes to feed." % (self.key,))
406 mainthread.execute(wc_success, async=True)
408 elif len(tmp["entries"])==0 and not tmp.get('version', None):
409 # An error occured fetching or parsing the feed. (Version
410 # will be either None if e.g. the connection timed our or
411 # '' if the data is not a proper feed)
413 "Error fetching %s: version is: %s: error: %s"
414 % (url, str (tmp.get('version', 'unset')),
415 str (tmp.get ('bozo_exception', 'Unknown error'))))
417 def register_stream_update_failed(http_status):
419 logger.debug("%s: stream update failed!" % self.key)
422 # It's not easy to get the feed's title from here.
423 # At the latest, the next time the application is
424 # started, we'll fix up the human readable name.
425 wc().stream_register (self.key, "", 6 * 60 * 60)
426 except woodchuck.ObjectExistsError:
428 ec = woodchuck.TransferStatus.TransientOther
429 if 300 <= http_status and http_status < 400:
430 ec = woodchuck.TransferStatus.TransientNetwork
431 if 400 <= http_status and http_status < 500:
432 ec = woodchuck.TransferStatus.FailureGone
433 if 500 <= http_status and http_status < 600:
434 ec = woodchuck.TransferStatus.TransientNetwork
435 wc()[self.key].update_failed(ec)
439 register_stream_update_failed(
440 http_status=http_status),
443 currentTime = time.time()
444 # The etag and modified value should only be updated if the content was not null
450 modified = tmp["modified"]
454 abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
455 f = opener.open(abs_url)
458 outf = open(self.dir+"/favicon.ico", "w")
462 except (urllib2.HTTPError, urllib2.URLError), exception:
463 logger.debug("Could not download favicon %s: %s"
464 % (abs_url, str (exception)))
466 self.serial_execution_lock.acquire ()
467 have_serial_execution_lock = True
469 #reversedEntries = self.getEntries()
470 #reversedEntries.reverse()
472 tmp["entries"].reverse()
473 for entry in tmp["entries"]:
474 # Yield so as to make the main thread a bit more
478 entry_transfer_stats = transfer_stats(
479 *feed_transfer_stats(**progress_handler.stats)[0:2])
481 if JobManager().do_quit:
482 raise KeyboardInterrupt
486 date = self.extractDate(entry)
490 entry["title"] = "No Title"
498 entry["author"] = None
499 if(not(entry.has_key("id"))):
501 content = self.extractContent(entry)
502 contentHash = getId(content)
503 object_size = len (content)
504 tmpEntry = {"title":entry["title"], "content":content,
505 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
506 id = self.generateUniqueId(tmpEntry)
508 current_version = self.db.execute(
509 'select date, ROWID, contentHash from feed where id=?',
511 if (current_version is not None
512 # To detect updates, don't compare by date:
513 # compare by content.
515 # - If an article update is just a date change
516 # and the content remains the same, we don't
517 # want to register an update.
519 # - If an article's content changes but not the
520 # date, we want to recognize an update.
521 and current_version[2] == contentHash):
522 logger.debug("ALREADY DOWNLOADED %s (%s)"
523 % (entry["title"], entry["link"]))
524 ## This article is already present in the feed listing. Update the "updated" time, so it doesn't expire
525 self.db.execute("UPDATE feed SET updated=? WHERE id=?;",(currentTime,id))
527 logger.debug("Updating already downloaded files for %s" %(id))
528 filename = configdir+self.key+".d/"+id+".html"
529 file = open(filename,"a")
530 utime(filename, None)
532 images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
534 file = open(image[0],"a")
535 utime(image[0], None)
538 logger.debug("Error in refreshing images for %s" % (id))
542 if current_version is not None:
543 # The version was updated. Mark it as unread.
544 logger.debug("UPDATED: %s (%s)"
545 % (entry["title"], entry["link"]))
548 logger.debug("NEW: %s (%s)"
549 % (entry["title"], entry["link"]))
552 #articleTime = time.mktime(self.entries[id]["dateTuple"])
553 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
555 baseurl = tmpEntry["link"]
556 if imageCache and len(images) > 0:
557 self.serial_execution_lock.release ()
558 have_serial_execution_lock = False
560 if not img.has_key('src'):
563 filename = self.addImage(
564 configdir, self.key, baseurl, img['src'],
567 img['src']="file://%s" %filename
568 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
570 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
574 object_size += os.path.getsize (filename)
575 except os.error, exception:
576 logger.error ("Error getting size of %s: %s"
577 % (filename, exception))
578 self.serial_execution_lock.acquire ()
579 have_serial_execution_lock = True
581 tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
582 file = open(tmpEntry["contentLink"], "w")
583 file.write(soup.prettify())
587 'title': tmpEntry["title"],
588 'contentLink': tmpEntry["contentLink"],
589 'contentHash': contentHash,
590 'date': tmpEntry["date"],
591 'updated': currentTime,
592 'link': tmpEntry["link"],
595 if current_version is not None:
596 # This is an update. Ensure that the existing
598 values['ROWID'] = current_version[1]
600 cols, values = zip(*values.items())
602 "INSERT OR REPLACE INTO feed (%s) VALUES (%s);"
603 % (','.join(cols), ','.join(('?',) * len(values))),
607 # Register the object with Woodchuck and mark it as
609 def register_object_transferred(
610 id, title, publication_time,
611 sent, received, object_size):
613 logger.debug("Registering transfer of object %s"
616 obj = wc()[self.key].object_register(
617 object_identifier=id,
618 human_readable_name=title)
619 except woodchuck.ObjectExistsError:
620 obj = wc()[self.key][id]
622 obj.publication_time = publication_time
625 woodchuck.Indicator.ApplicationVisual
626 |woodchuck.Indicator.StreamWide),
627 transferred_down=received,
629 object_size=object_size)
632 # If the entry does not contain a publication
633 # time, the attribute won't exist.
634 pubtime = entry.get('date_parsed', None)
636 publication_time = time.mktime (pubtime)
638 publication_time = None
641 = entry_transfer_stats(**progress_handler.stats)
642 # sent and received are for objects (in
643 # particular, images) associated with this
644 # item. We also want to attribute the data
645 # transferred for the item's content. This is
646 # a good first approximation.
647 received += len(content)
650 register_object_transferred(
652 title=tmpEntry["title"],
653 publication_time=publication_time,
654 sent=sent, received=received,
655 object_size=object_size),
660 = feed_transfer_stats(**progress_handler.stats)
662 "%s: Update successful: transferred: %d/%d; objects: %d)"
663 % (url, sent, received, len (tmp.entries)))
664 mainthread.execute (wc_success, async=True)
667 rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
669 self.removeEntry(row[0])
671 from glob import glob
673 for file in glob(configdir+self.key+".d/*"):
677 # put the two dates into matching format
679 lastmodDate = stats[8]
681 expDate = time.time()-expiry*3
682 # check if image-last-modified-date is outdated
684 if expDate > lastmodDate:
688 #print 'Removing', file
690 # XXX: Tell woodchuck.
691 remove(file) # commented out for testing
693 except OSError, exception:
695 logger.error('Could not remove %s: %s'
696 % (file, str (exception)))
697 logger.debug("updated %s: %fs in download, %fs in processing"
698 % (self.key, download_duration,
699 time.time () - process_start))
701 logger.error("Updating %s: %s" % (self.key, traceback.format_exc()))
705 if have_serial_execution_lock:
706 self.serial_execution_lock.release ()
710 rows = self.db.execute("SELECT MAX(date) FROM feed;")
714 logger.error("Fetching update time: %s: %s"
715 % (str(e), traceback.format_exc()))
722 title = tmp.feed.title
723 except (AttributeError, UnboundLocalError), exception:
725 if postFeedUpdateFunc is not None:
726 postFeedUpdateFunc (self.key, updateTime, etag, modified,
727 title, *postFeedUpdateFuncArgs)
729 self.cache_invalidate()
731 def setEntryRead(self, id):
732 self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
737 wc()[self.key][id].used()
741 mainthread.execute(doit, async=True)
742 self.cache_invalidate('feed')
744 def setEntryUnread(self, id):
745 self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
747 self.cache_invalidate('feed')
749 def markAllAsRead(self):
750 self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
752 self.cache_invalidate('feed')
754 def isEntryRead(self, id):
755 return self.lookup('feed', 'read', id) == 1
757 def getTitle(self, id):
758 return self.lookup('feed', 'title', id)
760 def getContentLink(self, id):
761 return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
763 def getContentHash(self, id):
764 return self.db.execute("SELECT contentHash FROM feed WHERE id=?;", (id,) ).fetchone()[0]
766 def getExternalLink(self, id):
767 return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
769 def getDate(self, id):
770 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
771 return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
773 def getDateTuple(self, id):
774 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
775 return time.localtime(dateStamp)
777 def getDateStamp(self, id):
778 return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
780 def generateUniqueId(self, entry):
782 Generate a stable identifier for the article. For the same
783 entry, this should result in the same identifier. If
784 possible, the identifier should remain the same even if the
787 # Prefer the entry's id, which is supposed to be globally
789 key = entry.get('id', None)
791 # Next, try the link to the content.
792 key = entry.get('link', None)
794 # Ok, the title and the date concatenated are likely to be
796 key = entry.get('title', None) + entry.get('date', None)
798 # Hmm, the article's content will at least guarantee no
799 # false negatives (i.e., missing articles)
800 key = entry.get('content', None)
802 # If all else fails, just use a random number.
803 key = str (random.random ())
806 def getIds(self, onlyUnread=False):
808 rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
810 rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
817 def getNextId(self, id, forward=True):
823 index = ids.index(id)
824 return ids[(index + delta) % len(ids)]
826 def getPreviousId(self, id):
827 return self.getNextId(id, forward=False)
829 def getNumberOfUnreadItems(self):
830 return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
832 def getNumberOfEntries(self):
833 return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
835 def getArticle(self, entry):
836 #self.setEntryRead(id)
837 #entry = self.entries[id]
838 title = entry['title']
839 #content = entry.get('content', entry.get('summary_detail', {}))
840 content = entry["content"]
843 author = entry['author']
844 date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
846 #text = '''<div style="color: black; background-color: white;">'''
847 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
848 text += "<html><head><title>" + title + "</title>"
849 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
850 #text += '<style> body {-webkit-user-select: none;} </style>'
851 text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
853 text += "<BR /><small><i>Author: " + author + "</i></small>"
854 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
855 text += "<BR /><BR />"
857 text += "</body></html>"
860 def getContent(self, id):
862 Return the content of the article with the specified ID. If
863 the content is not available, returns None.
865 contentLink = self.getContentLink(id)
867 with open(contentLink, 'r') as file:
868 content = file.read()
870 logger.exception("Failed get content for %s: reading %s failed",
875 def extractDate(self, entry):
876 if entry.has_key("updated_parsed"):
878 return timegm(entry.updated_parsed)
879 except (TypeError, IndexError):
880 # entry.updated_parsed is garbage.
882 if entry.has_key("published_parsed"):
884 return timegm(entry.published_parsed)
885 except (TypeError, IndexError):
886 # entry.published_parsed is garbage.
891 def extractContent(self, entry):
893 if entry.has_key('summary'):
894 content = entry.get('summary', '')
895 if entry.has_key('content'):
896 if len(entry.content[0].value) > len(content):
897 content = entry.content[0].value
899 content = entry.get('description', '')
902 def removeEntry(self, id):
903 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
907 except OSError, exception:
908 logger.error("Deleting %s: %s" % (contentLink, str (exception)))
909 self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
910 self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
915 wc()[self.key][id].files_deleted (
916 woodchuck.DeletionResponse.Deleted)
917 del wc()[self.key][id]
921 mainthread.execute (doit, async=True)
923 class ArchivedArticles(Feed):
924 def addArchivedArticle(self, title, link, date, configdir):
925 id = self.generateUniqueId({"date":date, "title":title})
926 values = (id, title, link, date, 0, link, 0)
927 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
930 # Feed.UpdateFeed calls this function.
931 def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
933 rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
936 currentTime = time.time()
939 f = urllib2.urlopen(link)
940 #entry["content"] = f.read()
943 soup = BeautifulSoup(html)
947 filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
949 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
951 contentLink = configdir+self.key+".d/"+id+".html"
952 file = open(contentLink, "w")
953 file.write(soup.prettify())
956 self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
959 logger.error("Error updating Archived Article: %s %s"
960 % (link,traceback.format_exc(),))
962 if postFeedUpdateFunc is not None:
963 postFeedUpdateFunc (self.key, currentTime, None, None, None,
964 *postFeedUpdateFuncArgs)
966 def purgeReadArticles(self):
967 rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
970 self.removeArticle(row[0])
972 def removeArticle(self, id):
973 rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
976 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
983 class Listing(BaseObject):
985 cached_columns = (('feeds', 'updateTime'),
988 ('categories', 'title'))
993 except AttributeError:
994 db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
997 db = property(_getdb)
999 # Lists all the feeds in a dictionary, and expose the data
1000 def __init__(self, config, configdir):
1001 self.config = config
1002 self.configdir = configdir
1004 self.tls = threading.local ()
1007 table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
1009 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
1010 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
1011 self.addCategory("Default Category")
1012 if isfile(self.configdir+"feeds.pickle"):
1013 self.importOldFormatFeeds()
1015 self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
1017 from string import find, upper
1018 if find(upper(table[0]), "WIDGET")<0:
1019 self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
1020 self.db.execute("UPDATE feeds SET widget=1;")
1022 if find(upper(table[0]), "CATEGORY")<0:
1023 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
1024 self.addCategory("Default Category")
1025 self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
1026 self.db.execute("UPDATE feeds SET category=1;")
1031 # Check that Woodchuck's state is up to date with respect our
1034 updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
1035 wc_init(config, self, True if updater else False)
1036 if wc().available() and updater:
1037 # The list of known streams.
1038 streams = wc().streams_list ()
1039 stream_ids = [s.identifier for s in streams]
1041 # Register any unknown streams. Remove known streams from
1043 for key in self.getListOfFeeds():
1044 title = self.getFeedTitle(key)
1045 # XXX: We should also check whether the list of
1046 # articles/objects in each feed/stream is up to date.
1047 if key not in stream_ids:
1049 "Registering previously unknown channel: %s (%s)"
1051 wc().stream_register(
1053 self.config.getUpdateInterval() * 60 * 60)
1055 # Make sure the human readable name is up to date.
1056 if wc()[key].human_readable_name != title:
1057 wc()[key].human_readable_name = title
1058 stream_ids.remove (key)
1059 wc()[key].freshness \
1060 = self.config.getUpdateInterval() * 60 * 60
1063 # Unregister any streams that are no longer subscribed to.
1064 for id in stream_ids:
1065 logger.debug("Unregistering %s" % (id,))
1066 wc().stream_unregister (id)
1068 logger.exception("Registering streams with Woodchuck")
1070 def importOldFormatFeeds(self):
1071 """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
1073 listing = rss.Listing(self.configdir)
1075 for id in listing.getListOfFeeds():
1078 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
1079 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
1082 feed = listing.getFeed(id)
1083 new_feed = self.getFeed(id)
1085 items = feed.getIds()[:]
1088 if feed.isEntryRead(item):
1092 date = timegm(feed.getDateTuple(item))
1093 title = feed.getTitle(item)
1094 newId = new_feed.generateUniqueId({"date":date, "title":title})
1095 values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
1096 new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
1097 new_feed.db.commit()
1099 images = feed.getImages(item)
1100 for image in images:
1101 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
1102 new_feed.db.commit()
1105 self.updateUnread(id)
1107 logger.error("importOldFormatFeeds: %s"
1108 % (traceback.format_exc(),))
1109 remove(self.configdir+"feeds.pickle")
1112 def addArchivedArticle(self, key, index):
1113 feed = self.getFeed(key)
1114 title = feed.getTitle(index)
1115 link = feed.getExternalLink(index)
1116 date = feed.getDate(index)
1117 count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
1119 self.addFeed("Archived Articles", "", id="ArchivedArticles")
1121 archFeed = self.getFeed("ArchivedArticles")
1122 archFeed.addArchivedArticle(title, link, date, self.configdir)
1123 self.updateUnread("ArchivedArticles")
1125 def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
1127 if expiryTime is None:
1128 expiryTime = self.config.getExpiry()
1130 # Default to 24 hours
1133 (use_proxy, proxy) = self.config.getProxy()
1136 if imageCache is None:
1137 imageCache = self.config.getImageCache()
1139 feed = self.getFeed(key)
1140 (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
1142 modified = time.struct_time(eval(modified))
1146 self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
1147 priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
1149 def _queuePostFeedUpdate(self, *args, **kwargs):
1150 mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
1152 def _postFeedUpdate(self, key, updateTime, etag, modified, title):
1156 modified=str(tuple(modified))
1158 self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
1160 self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
1162 if title is not None:
1163 self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
1166 self.cache_invalidate('feeds')
1167 self.updateUnread(key)
1169 update_server_object().ArticleCountUpdated()
1171 stats = JobManager().stats()
1172 global jobs_at_start
1173 completed = stats['jobs-completed'] - jobs_at_start
1174 in_progress = stats['jobs-in-progress']
1175 queued = stats['jobs-queued']
1178 percent = (100 * ((completed + in_progress / 2.))
1179 / (completed + in_progress + queued))
1180 except ZeroDivisionError:
1183 update_server_object().UpdateProgress(
1184 percent, completed, in_progress, queued, 0, 0, 0, key)
1186 if in_progress == 0 and queued == 0:
1187 jobs_at_start = stats['jobs-completed']
1189 def getFeed(self, key):
1190 if key == "ArchivedArticles":
1191 return ArchivedArticles(self.configdir, key)
1192 return Feed(self.configdir, key)
1194 def editFeed(self, key, title, url, category=None):
1196 self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
1198 self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
1200 self.cache_invalidate('feeds')
1202 if wc().available():
1204 wc()[key].human_readable_name = title
1206 logger.debug("Feed %s (%s) unknown." % (key, title))
1208 def getFeedUpdateTime(self, key):
1209 update_time = self.lookup('feeds', 'updateTime', key)
1214 delta = time.time() - update_time
1216 delta_hours = delta / (60. * 60.)
1217 if delta_hours < .1:
1218 return "A few minutes ago"
1219 if delta_hours < .75:
1220 return "Less than an hour ago"
1221 if delta_hours < 1.5:
1222 return "About an hour ago"
1223 if delta_hours < 18:
1224 return "About %d hours ago" % (int(delta_hours + 0.5),)
1226 delta_days = delta_hours / 24.
1227 if delta_days < 1.5:
1228 return "About a day ago"
1230 return "%d days ago" % (int(delta_days + 0.5),)
1232 delta_weeks = delta_days / 7.
1233 if delta_weeks <= 8:
1234 return "%d weeks ago" % int(delta_weeks + 0.5)
1236 delta_months = delta_days / 30.
1237 if delta_months <= 30:
1238 return "%d months ago" % int(delta_months + 0.5)
1240 return time.strftime("%x", time.gmtime(update_time))
1242 def getFeedNumberOfUnreadItems(self, key):
1243 return self.lookup('feeds', 'unread', key)
1245 def getFeedTitle(self, key):
1246 title = self.lookup('feeds', 'title', key)
1250 return self.getFeedUrl(key)
1252 def getFeedUrl(self, key):
1253 return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1255 def getFeedCategory(self, key):
1256 return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1258 def getListOfFeeds(self, category=None):
1260 rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
1262 rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
1269 def getListOfCategories(self):
1270 return list(row[0] for row in self.db.execute(
1271 "SELECT id FROM categories ORDER BY rank;"))
1273 def getCategoryTitle(self, id):
1274 return self.lookup('categories', 'title', id)
1276 def getCategoryUnread(self, id):
1278 for key in self.getListOfFeeds(category=id):
1280 count = count + self.getFeedNumberOfUnreadItems(key)
1285 def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
1286 if order == "Most unread":
1287 tmp = "ORDER BY unread DESC"
1288 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
1289 elif order == "Least unread":
1290 tmp = "ORDER BY unread"
1291 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
1292 elif order == "Most recent":
1293 tmp = "ORDER BY updateTime DESC"
1294 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
1295 elif order == "Least recent":
1296 tmp = "ORDER BY updateTime"
1297 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
1298 else: # order == "Manual" or invalid value...
1299 tmp = "ORDER BY rank"
1300 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
1302 sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp
1304 sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
1305 rows = self.db.execute(sql)
1312 def getFavicon(self, key):
1313 filename = "%s%s.d/favicon.ico" % (self.configdir, key)
1314 if isfile(filename):
1319 def updateUnread(self, key):
1320 feed = self.getFeed(key)
1321 self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
1323 self.cache_invalidate('feeds')
1325 def addFeed(self, title, url, id=None, category=1):
1328 count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
1330 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1331 if max_rank == None:
1333 values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
1334 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
1336 # Ask for the feed object, it will create the necessary tables
1339 if wc().available():
1340 # Register the stream with Woodchuck. Update approximately
1342 wc().stream_register(stream_identifier=id,
1343 human_readable_name=title,
1346 self.cache_invalidate('feeds')
1351 def addCategory(self, title):
1352 rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
1355 id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
1358 self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
1360 self.cache_invalidate('categories')
1362 def removeFeed(self, key):
1363 if wc().available ():
1366 except KeyError, woodchuck.Error:
1367 logger.debug("Removing unregistered feed %s failed" % (key,))
1369 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
1370 self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
1371 self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
1374 if isdir(self.configdir+key+".d/"):
1375 rmtree(self.configdir+key+".d/")
1376 self.cache_invalidate('feeds')
1378 def removeCategory(self, key):
1379 if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
1380 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
1381 self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
1382 self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
1383 self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
1385 self.cache_invalidate('categories')
1387 #def saveConfig(self):
1388 # self.listOfFeeds["feedingit-order"] = self.sortedKeys
1389 # file = open(self.configdir+"feeds.pickle", "w")
1390 # pickle.dump(self.listOfFeeds, file)
1393 def moveUp(self, key):
1394 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1396 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
1397 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
1400 def moveCategoryUp(self, key):
1401 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1403 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
1404 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
1407 def moveDown(self, key):
1408 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1409 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1411 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
1412 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
1415 def moveCategoryDown(self, key):
1416 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1417 max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
1419 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
1420 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )