1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # Copyright (c) 2011 Neal H. Walfield
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Lesser General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU Lesser General Public License for more details.
16 # You should have received a copy of the GNU Lesser General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 # ============================================================================
22 # Author : Yves Marcoz
24 # Description : Simple RSS Reader
25 # ============================================================================
27 from __future__ import with_statement
30 from os.path import isfile, isdir
31 from shutil import rmtree
32 from os import mkdir, remove, utime
38 from BeautifulSoup import BeautifulSoup
39 from urlparse import urljoin
40 from calendar import timegm
43 from wc import wc, wc_init, woodchuck
46 from updatedbus import update_server_object
48 from jobmanager import JobManager
50 from httpprogresshandler import HTTPProgressHandler
54 logger = logging.getLogger(__name__)
57 if issubclass(string.__class__, unicode):
58 string = string.encode('utf8', 'replace')
60 return md5.new(string).hexdigest()
62 def download_callback(connection):
63 if JobManager().do_quit:
64 raise KeyboardInterrupt
66 def downloader(progress_handler=None, proxy=None):
69 if progress_handler is not None:
70 openers.append(progress_handler)
72 openers.append(HTTPProgressHandler(download_callback))
77 return urllib2.build_opener(*openers)
79 def transfer_stats(sent, received, **kwargs):
81 This function takes two arguments: sent is the number of bytes
82 sent so far, received is the number of bytes received. The
83 function returns a continuation that you can call later.
85 The continuation takes the same two arguments. It returns a tuple
86 of the number of bytes sent, the number of bytes received and the
87 time since the original function was invoked.
89 start_time = time.time()
91 start_received = received
93 def e(sent, received, **kwargs):
94 return (sent - start_sent,
95 received - start_received,
96 time.time() - start_time)
100 # If not None, a subprocess.Popen object corresponding to a
101 # update_feeds.py process.
102 update_feed_process = None
104 update_feeds_iface = None
108 class BaseObject(object):
109 # Columns to cache. Classes that inherit from this and use the
110 # cache mechanism should set this to a list of tuples, each of
111 # which contains two entries: the table and the column. Note that
112 # both are case sensitive.
115 def cache_invalidate(self, table=None):
117 Invalidate the cache.
119 If table is not None, invalidate only the specified table.
120 Otherwise, drop the whole cache.
122 if not hasattr(self, 'cache'):
128 if table in self.cache:
129 del self.cache[table]
131 def lookup(self, table, column, id=None):
133 Look up a column or value. Uses a cache for columns in
134 cached_columns. Note: the column is returned unsorted.
136 if not hasattr(self, 'cache'):
139 # Cache data for at most 60 seconds.
142 cache = self.cache[table]
144 if time.time() - cache[None] > 60:
145 # logger.debug("%s: Cache too old: clearing" % (table,))
146 del self.cache[table]
152 or (table, column) not in self.cached_columns):
153 # The cache is empty or the caller wants a column that we
155 if (table, column) in self.cached_columns:
156 # logger.debug("%s: Rebuilding cache" % (table,))
160 self.cache[table] = cache = {}
162 for t, c in self.cached_columns:
174 where = "where id = '%s'" % id
178 results = self.db.execute(
179 "SELECT %s FROM %s %s" % (','.join(columns), table, where))
185 for index, value in enumerate(values):
186 cache[columns[index]][i] = value
195 results.append(values[0])
199 cache = self.cache[table]
203 value = cache[column][id]
204 # logger.debug("%s.%s:%s -> %s" % (table, column, id, value))
207 return cache[column].values()
209 # logger.debug("%s.%s:%s -> Not found" % (table, column, id))
212 class Feed(BaseObject):
214 cached_columns = (('feed', 'read'),
217 serial_execution_lock = threading.Lock()
222 except AttributeError:
223 db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
226 db = property(_getdb)
228 def __init__(self, configdir, key):
230 self.configdir = configdir
231 self.dir = "%s/%s.d" %(self.configdir, self.key)
232 self.tls = threading.local()
234 if not isdir(self.dir):
236 filename = "%s/%s.db" % (self.dir, self.key)
237 if not isfile(filename):
238 self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, contentHash text, date float, updated float, link text, read int);")
239 self.db.execute("CREATE TABLE images (id text, imagePath text);")
243 self.db.execute("ALTER TABLE feed ADD COLUMN contentHash text")
245 except sqlite3.OperationalError, e:
246 if 'duplicate column name' in str(e):
249 logger.exception("Add column contentHash to %s", filename)
251 def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
252 filename = configdir+key+".d/"+getId(url)
253 if not isfile(filename):
256 opener = downloader(proxy=proxy)
258 abs_url = urljoin(baseurl,url)
259 f = opener.open(abs_url)
261 with open(filename, "w") as outf:
266 except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
267 logger.info("Could not download image %s: %s"
268 % (abs_url, str (exception)))
271 exception = sys.exc_info()[0]
273 logger.info("Downloading image %s: %s" %
274 (abs_url, traceback.format_exc()))
282 #open(filename,"a").close() # "Touch" the file
283 file = open(filename,"a")
284 utime(filename, None)
288 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
289 if (os.path.basename(sys.argv[0]) == 'update_feeds.py'):
292 self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
294 JobManager().execute(doit(), self.key, priority=priority)
296 def send_update_request():
297 global update_feeds_iface
298 if update_feeds_iface is None:
299 bus=dbus.SessionBus()
300 remote_object = bus.get_object(
301 "org.marcoz.feedingit", # Connection name
302 "/org/marcoz/feedingit/update" # Object's path
304 update_feeds_iface = dbus.Interface(
305 remote_object, 'org.marcoz.feedingit')
308 update_feeds_iface.Update(self.key)
310 logger.error("Invoking org.marcoz.feedingit.Update: %s"
312 update_feeds_iface = None
316 if send_update_request():
317 # Success! It seems we were able to start the update
318 # daemon via dbus (or, it was already running).
321 global update_feed_process
322 if (update_feed_process is None
323 or update_feed_process.poll() is not None):
324 # The update_feeds process is not running. Start it.
325 update_feeds = os.path.join(os.path.dirname(__file__),
327 argv = ['/usr/bin/env', 'python', update_feeds, '--daemon' ]
328 logger.debug("Starting update_feeds: running %s"
330 update_feed_process = subprocess.Popen(argv)
331 # Make sure the dbus calls go to the right process:
333 update_feeds_iface = None
336 if send_update_request():
340 def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
341 logger.debug("Updating %s" % url)
344 have_serial_execution_lock = False
346 update_start = time.time ()
348 progress_handler = HTTPProgressHandler(download_callback)
350 openers = [progress_handler]
352 openers.append (proxy)
353 kwargs = {'handlers':openers}
355 feed_transfer_stats = transfer_stats(0, 0)
357 tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
358 download_duration = time.time () - update_start
360 opener = downloader(progress_handler, proxy)
362 if JobManager().do_quit:
363 raise KeyboardInterrupt
365 process_start = time.time()
367 # Expiry time is in hours
368 expiry = float(expiryTime) * 3600.
378 wc().stream_register (self.key, "", 6 * 60 * 60)
379 except woodchuck.ObjectExistsError:
382 wc()[self.key].updated (
383 indicator=(woodchuck.Indicator.ApplicationVisual
384 |woodchuck.Indicator.StreamWide),
385 transferred_down=progress_handler.stats['received'],
386 transferred_up=progress_handler.stats['sent'],
387 transfer_time=update_start,
388 transfer_duration=download_duration,
389 new_objects=new_objects,
390 updated_objects=updated_objects,
391 objects_inline=new_objects + updated_objects)
394 "Failed to register update of %s with woodchuck!"
397 http_status = tmp.get ('status', 200)
399 # Check if the parse was succesful. If the http status code
400 # is 304, then the download was successful, but there is
401 # nothing new. Indeed, no content is returned. This make a
402 # 304 look like an error because there are no entries and the
403 # parse fails. But really, everything went great! Check for
405 if http_status == 304:
406 logger.debug("%s: No changes to feed." % (self.key,))
407 mainthread.execute(wc_success, async=True)
409 elif len(tmp["entries"])==0 and not tmp.get('version', None):
410 # An error occured fetching or parsing the feed. (Version
411 # will be either None if e.g. the connection timed our or
412 # '' if the data is not a proper feed)
414 "Error fetching %s: version is: %s: error: %s"
415 % (url, str (tmp.get('version', 'unset')),
416 str (tmp.get ('bozo_exception', 'Unknown error'))))
418 def register_stream_update_failed(http_status):
420 logger.debug("%s: stream update failed!" % self.key)
423 # It's not easy to get the feed's title from here.
424 # At the latest, the next time the application is
425 # started, we'll fix up the human readable name.
426 wc().stream_register (self.key, "", 6 * 60 * 60)
427 except woodchuck.ObjectExistsError:
429 ec = woodchuck.TransferStatus.TransientOther
430 if 300 <= http_status and http_status < 400:
431 ec = woodchuck.TransferStatus.TransientNetwork
432 if 400 <= http_status and http_status < 500:
433 ec = woodchuck.TransferStatus.FailureGone
434 if 500 <= http_status and http_status < 600:
435 ec = woodchuck.TransferStatus.TransientNetwork
436 wc()[self.key].update_failed(ec)
440 register_stream_update_failed(
441 http_status=http_status),
444 currentTime = time.time()
445 # The etag and modified value should only be updated if the content was not null
451 modified = tmp["modified"]
455 abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
456 f = opener.open(abs_url)
459 outf = open(self.dir+"/favicon.ico", "w")
463 except (urllib2.HTTPError, urllib2.URLError), exception:
464 logger.debug("Could not download favicon %s: %s"
465 % (abs_url, str (exception)))
467 self.serial_execution_lock.acquire ()
468 have_serial_execution_lock = True
470 #reversedEntries = self.getEntries()
471 #reversedEntries.reverse()
473 tmp["entries"].reverse()
474 for entry in tmp["entries"]:
475 # Yield so as to make the main thread a bit more
479 entry_transfer_stats = transfer_stats(
480 *feed_transfer_stats(**progress_handler.stats)[0:2])
482 if JobManager().do_quit:
483 raise KeyboardInterrupt
487 date = self.extractDate(entry)
491 entry["title"] = "No Title"
499 entry["author"] = None
500 if(not(entry.has_key("id"))):
502 content = self.extractContent(entry)
503 contentHash = getId(content)
504 object_size = len (content)
505 tmpEntry = {"title":entry["title"], "content":content,
506 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
507 id = self.generateUniqueId(tmpEntry)
509 current_version = self.db.execute(
510 'select date, ROWID, contentHash from feed where id=?',
512 if (current_version is not None
513 # To detect updates, don't compare by date:
514 # compare by content.
516 # - If an article update is just a date change
517 # and the content remains the same, we don't
518 # want to register an update.
520 # - If an article's content changes but not the
521 # date, we want to recognize an update.
522 and current_version[2] == contentHash):
523 logger.debug("ALREADY DOWNLOADED %s (%s)"
524 % (entry["title"], entry["link"]))
525 ## This article is already present in the feed listing. Update the "updated" time, so it doesn't expire
526 self.db.execute("UPDATE feed SET updated=? WHERE id=?;",(currentTime,id))
528 logger.debug("Updating already downloaded files for %s" %(id))
529 filename = configdir+self.key+".d/"+id+".html"
530 file = open(filename,"a")
531 utime(filename, None)
533 images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
535 file = open(image[0],"a")
536 utime(image[0], None)
539 logger.debug("Error in refreshing images for %s" % (id))
543 if current_version is not None:
544 # The version was updated. Mark it as unread.
545 logger.debug("UPDATED: %s (%s)"
546 % (entry["title"], entry["link"]))
549 logger.debug("NEW: %s (%s)"
550 % (entry["title"], entry["link"]))
553 #articleTime = time.mktime(self.entries[id]["dateTuple"])
554 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
556 baseurl = tmpEntry["link"]
557 if imageCache and len(images) > 0:
558 self.serial_execution_lock.release ()
559 have_serial_execution_lock = False
561 if not img.has_key('src'):
564 filename = self.addImage(
565 configdir, self.key, baseurl, img['src'],
568 img['src']="file://%s" %filename
569 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
571 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
575 object_size += os.path.getsize (filename)
576 except os.error, exception:
577 logger.error ("Error getting size of %s: %s"
578 % (filename, exception))
579 self.serial_execution_lock.acquire ()
580 have_serial_execution_lock = True
582 tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
583 file = open(tmpEntry["contentLink"], "w")
584 file.write(soup.prettify())
588 'title': tmpEntry["title"],
589 'contentLink': tmpEntry["contentLink"],
590 'contentHash': contentHash,
591 'date': tmpEntry["date"],
592 'updated': currentTime,
593 'link': tmpEntry["link"],
596 if current_version is not None:
597 # This is an update. Ensure that the existing
599 values['ROWID'] = current_version[1]
601 cols, values = zip(*values.items())
603 "INSERT OR REPLACE INTO feed (%s) VALUES (%s);"
604 % (','.join(cols), ','.join(('?',) * len(values))),
608 # Register the object with Woodchuck and mark it as
610 def register_object_transferred(
611 id, title, publication_time,
612 sent, received, object_size):
614 logger.debug("Registering transfer of object %s"
617 obj = wc()[self.key].object_register(
618 object_identifier=id,
619 human_readable_name=title)
620 except woodchuck.ObjectExistsError:
621 obj = wc()[self.key][id]
623 obj.publication_time = publication_time
626 woodchuck.Indicator.ApplicationVisual
627 |woodchuck.Indicator.StreamWide),
628 transferred_down=received,
630 object_size=object_size)
633 # If the entry does not contain a publication
634 # time, the attribute won't exist.
635 pubtime = entry.get('date_parsed', None)
637 publication_time = time.mktime (pubtime)
639 publication_time = None
642 = entry_transfer_stats(**progress_handler.stats)
643 # sent and received are for objects (in
644 # particular, images) associated with this
645 # item. We also want to attribute the data
646 # transferred for the item's content. This is
647 # a good first approximation.
648 received += len(content)
651 register_object_transferred(
653 title=tmpEntry["title"],
654 publication_time=publication_time,
655 sent=sent, received=received,
656 object_size=object_size),
661 = feed_transfer_stats(**progress_handler.stats)
663 "%s: Update successful: transferred: %d/%d; objects: %d)"
664 % (url, sent, received, len (tmp.entries)))
665 mainthread.execute (wc_success, async=True)
668 rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
670 self.removeEntry(row[0])
672 from glob import glob
674 for file in glob(configdir+self.key+".d/*"):
678 # put the two dates into matching format
680 lastmodDate = stats[8]
682 expDate = time.time()-expiry*3
683 # check if image-last-modified-date is outdated
685 if expDate > lastmodDate:
689 #print 'Removing', file
691 # XXX: Tell woodchuck.
692 remove(file) # commented out for testing
694 except OSError, exception:
696 logger.error('Could not remove %s: %s'
697 % (file, str (exception)))
698 logger.debug("updated %s: %fs in download, %fs in processing"
699 % (self.key, download_duration,
700 time.time () - process_start))
702 logger.error("Updating %s: %s" % (self.key, traceback.format_exc()))
706 if have_serial_execution_lock:
707 self.serial_execution_lock.release ()
711 rows = self.db.execute("SELECT MAX(date) FROM feed;")
715 logger.error("Fetching update time: %s: %s"
716 % (str(e), traceback.format_exc()))
723 title = tmp.feed.title
724 except (AttributeError, UnboundLocalError), exception:
726 if postFeedUpdateFunc is not None:
727 postFeedUpdateFunc (self.key, updateTime, etag, modified,
728 title, *postFeedUpdateFuncArgs)
730 self.cache_invalidate()
732 def setEntryRead(self, id):
733 self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
738 wc()[self.key][id].used()
742 mainthread.execute(doit, async=True)
743 self.cache_invalidate('feed')
745 def setEntryUnread(self, id):
746 self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
748 self.cache_invalidate('feed')
750 def markAllAsRead(self):
751 self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
753 self.cache_invalidate('feed')
755 def isEntryRead(self, id):
756 return self.lookup('feed', 'read', id) == 1
758 def getTitle(self, id):
759 return self.lookup('feed', 'title', id)
761 def getContentLink(self, id):
762 return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
764 def getContentHash(self, id):
765 return self.db.execute("SELECT contentHash FROM feed WHERE id=?;", (id,) ).fetchone()[0]
767 def getExternalLink(self, id):
768 return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
770 def getDate(self, id):
771 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
772 return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
774 def getDateTuple(self, id):
775 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
776 return time.localtime(dateStamp)
778 def getDateStamp(self, id):
779 return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
781 def generateUniqueId(self, entry):
783 Generate a stable identifier for the article. For the same
784 entry, this should result in the same identifier. If
785 possible, the identifier should remain the same even if the
788 # Prefer the entry's id, which is supposed to be globally
790 key = entry.get('id', None)
792 # Next, try the link to the content.
793 key = entry.get('link', None)
795 # Ok, the title and the date concatenated are likely to be
797 key = entry.get('title', None) + entry.get('date', None)
799 # Hmm, the article's content will at least guarantee no
800 # false negatives (i.e., missing articles)
801 key = entry.get('content', None)
803 # If all else fails, just use a random number.
804 key = str (random.random ())
807 def getIds(self, onlyUnread=False):
809 rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
811 rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
818 def getNextId(self, id, forward=True, onlyUnread=False):
823 ids = self.getIds(onlyUnread=onlyUnread)
824 index = ids.index(id)
825 return ids[(index + delta) % len(ids)]
827 def getPreviousId(self, id, onlyUnread=False):
828 return self.getNextId(id, forward=False, onlyUnread=onlyUnread)
830 def getNumberOfUnreadItems(self):
831 return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
833 def getNumberOfEntries(self):
834 return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
836 def getArticle(self, entry):
837 #self.setEntryRead(id)
838 #entry = self.entries[id]
839 title = entry['title']
840 #content = entry.get('content', entry.get('summary_detail', {}))
841 content = entry["content"]
844 author = entry['author']
845 date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
847 #text = '''<div style="color: black; background-color: white;">'''
848 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
849 text += "<html><head><title>" + title + "</title>"
850 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
851 #text += '<style> body {-webkit-user-select: none;} </style>'
852 text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
854 text += "<BR /><small><i>Author: " + author + "</i></small>"
855 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
856 text += "<BR /><BR />"
858 text += "</body></html>"
861 def getContent(self, id):
863 Return the content of the article with the specified ID. If
864 the content is not available, returns None.
866 contentLink = self.getContentLink(id)
868 with open(contentLink, 'r') as file:
869 content = file.read()
871 logger.exception("Failed get content for %s: reading %s failed",
876 def extractDate(self, entry):
877 if entry.has_key("updated_parsed"):
878 return timegm(entry["updated_parsed"])
879 elif entry.has_key("published_parsed"):
880 return timegm(entry["published_parsed"])
884 def extractContent(self, entry):
886 if entry.has_key('summary'):
887 content = entry.get('summary', '')
888 if entry.has_key('content'):
889 if len(entry.content[0].value) > len(content):
890 content = entry.content[0].value
892 content = entry.get('description', '')
895 def removeEntry(self, id):
896 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
900 except OSError, exception:
901 logger.error("Deleting %s: %s" % (contentLink, str (exception)))
902 self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
903 self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
908 wc()[self.key][id].files_deleted (
909 woodchuck.DeletionResponse.Deleted)
910 del wc()[self.key][id]
914 mainthread.execute (doit, async=True)
916 class ArchivedArticles(Feed):
917 def addArchivedArticle(self, title, link, date, configdir):
918 id = self.generateUniqueId({"date":date, "title":title})
919 values = (id, title, link, date, 0, link, 0)
920 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
923 # Feed.UpdateFeed calls this function.
924 def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
926 rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
929 currentTime = time.time()
932 f = urllib2.urlopen(link)
933 #entry["content"] = f.read()
936 soup = BeautifulSoup(html)
940 filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
942 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
944 contentLink = configdir+self.key+".d/"+id+".html"
945 file = open(contentLink, "w")
946 file.write(soup.prettify())
949 self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
952 logger.error("Error updating Archived Article: %s %s"
953 % (link,traceback.format_exc(),))
955 if postFeedUpdateFunc is not None:
956 postFeedUpdateFunc (self.key, currentTime, None, None, None,
957 *postFeedUpdateFuncArgs)
959 def purgeReadArticles(self):
960 rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
963 self.removeArticle(row[0])
965 def removeArticle(self, id):
966 rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
969 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
976 class Listing(BaseObject):
978 cached_columns = (('feeds', 'updateTime'),
981 ('categories', 'title'))
986 except AttributeError:
987 db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
990 db = property(_getdb)
992 # Lists all the feeds in a dictionary, and expose the data
993 def __init__(self, config, configdir):
995 self.configdir = configdir
997 self.tls = threading.local ()
1000 table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
1002 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
1003 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
1004 self.addCategory("Default Category")
1005 if isfile(self.configdir+"feeds.pickle"):
1006 self.importOldFormatFeeds()
1008 self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
1010 from string import find, upper
1011 if find(upper(table[0]), "WIDGET")<0:
1012 self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
1013 self.db.execute("UPDATE feeds SET widget=1;")
1015 if find(upper(table[0]), "CATEGORY")<0:
1016 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
1017 self.addCategory("Default Category")
1018 self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
1019 self.db.execute("UPDATE feeds SET category=1;")
1024 # Check that Woodchuck's state is up to date with respect our
1027 updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
1028 wc_init(config, self, True if updater else False)
1029 if wc().available() and updater:
1030 # The list of known streams.
1031 streams = wc().streams_list ()
1032 stream_ids = [s.identifier for s in streams]
1034 # Register any unknown streams. Remove known streams from
1036 for key in self.getListOfFeeds():
1037 title = self.getFeedTitle(key)
1038 # XXX: We should also check whether the list of
1039 # articles/objects in each feed/stream is up to date.
1040 if key not in stream_ids:
1042 "Registering previously unknown channel: %s (%s)"
1044 wc().stream_register(
1046 self.config.getUpdateInterval() * 60 * 60)
1048 # Make sure the human readable name is up to date.
1049 if wc()[key].human_readable_name != title:
1050 wc()[key].human_readable_name = title
1051 stream_ids.remove (key)
1052 wc()[key].freshness \
1053 = self.config.getUpdateInterval() * 60 * 60
1056 # Unregister any streams that are no longer subscribed to.
1057 for id in stream_ids:
1058 logger.debug("Unregistering %s" % (id,))
1059 wc().stream_unregister (id)
1061 logger.exception("Registering streams with Woodchuck")
1063 def importOldFormatFeeds(self):
1064 """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
1066 listing = rss.Listing(self.configdir)
1068 for id in listing.getListOfFeeds():
1071 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
1072 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
1075 feed = listing.getFeed(id)
1076 new_feed = self.getFeed(id)
1078 items = feed.getIds()[:]
1081 if feed.isEntryRead(item):
1085 date = timegm(feed.getDateTuple(item))
1086 title = feed.getTitle(item)
1087 newId = new_feed.generateUniqueId({"date":date, "title":title})
1088 values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
1089 new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
1090 new_feed.db.commit()
1092 images = feed.getImages(item)
1093 for image in images:
1094 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
1095 new_feed.db.commit()
1098 self.updateUnread(id)
1100 logger.error("importOldFormatFeeds: %s"
1101 % (traceback.format_exc(),))
1102 remove(self.configdir+"feeds.pickle")
1105 def addArchivedArticle(self, key, index):
1106 feed = self.getFeed(key)
1107 title = feed.getTitle(index)
1108 link = feed.getExternalLink(index)
1109 date = feed.getDate(index)
1110 count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
1112 self.addFeed("Archived Articles", "", id="ArchivedArticles")
1114 archFeed = self.getFeed("ArchivedArticles")
1115 archFeed.addArchivedArticle(title, link, date, self.configdir)
1116 self.updateUnread("ArchivedArticles")
1118 def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
1120 if expiryTime is None:
1121 expiryTime = self.config.getExpiry()
1123 # Default to 24 hours
1126 (use_proxy, proxy) = self.config.getProxy()
1129 if imageCache is None:
1130 imageCache = self.config.getImageCache()
1132 feed = self.getFeed(key)
1133 (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
1135 modified = time.struct_time(eval(modified))
1139 self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
1140 priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
1142 def _queuePostFeedUpdate(self, *args, **kwargs):
1143 mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
1145 def _postFeedUpdate(self, key, updateTime, etag, modified, title):
1149 modified=str(tuple(modified))
1151 self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
1153 self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
1155 if title is not None:
1156 self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
1159 self.cache_invalidate('feeds')
1160 self.updateUnread(key)
1162 update_server_object().ArticleCountUpdated()
1164 stats = JobManager().stats()
1165 global jobs_at_start
1166 completed = stats['jobs-completed'] - jobs_at_start
1167 in_progress = stats['jobs-in-progress']
1168 queued = stats['jobs-queued']
1171 percent = (100 * ((completed + in_progress / 2.))
1172 / (completed + in_progress + queued))
1173 except ZeroDivisionError:
1176 update_server_object().UpdateProgress(
1177 percent, completed, in_progress, queued, 0, 0, 0, key)
1179 if in_progress == 0 and queued == 0:
1180 jobs_at_start = stats['jobs-completed']
1182 def getFeed(self, key):
1183 if key == "ArchivedArticles":
1184 return ArchivedArticles(self.configdir, key)
1185 return Feed(self.configdir, key)
1187 def editFeed(self, key, title, url, category=None):
1189 self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
1191 self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
1193 self.cache_invalidate('feeds')
1195 if wc().available():
1197 wc()[key].human_readable_name = title
1199 logger.debug("Feed %s (%s) unknown." % (key, title))
1201 def getFeedUpdateTime(self, key):
1202 update_time = self.lookup('feeds', 'updateTime', key)
1207 delta = time.time() - update_time
1209 delta_hours = delta / (60. * 60.)
1210 if delta_hours < .1:
1211 return "A few minutes ago"
1212 if delta_hours < .75:
1213 return "Less than an hour ago"
1214 if delta_hours < 1.5:
1215 return "About an hour ago"
1216 if delta_hours < 18:
1217 return "About %d hours ago" % (int(delta_hours + 0.5),)
1219 delta_days = delta_hours / 24.
1220 if delta_days < 1.5:
1221 return "About a day ago"
1223 return "%d days ago" % (int(delta_days + 0.5),)
1225 delta_weeks = delta_days / 7.
1226 if delta_weeks <= 8:
1227 return "%d weeks ago" % int(delta_weeks + 0.5)
1229 delta_months = delta_days / 30.
1230 if delta_months <= 30:
1231 return "%d months ago" % int(delta_months + 0.5)
1233 return time.strftime("%x", time.gmtime(update_time))
1235 def getFeedNumberOfUnreadItems(self, key):
1236 return self.lookup('feeds', 'unread', key)
1238 def getFeedTitle(self, key):
1239 title = self.lookup('feeds', 'title', key)
1243 return self.getFeedUrl(key)
1245 def getFeedUrl(self, key):
1246 return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1248 def getFeedCategory(self, key):
1249 return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1251 def getListOfFeeds(self, category=None):
1253 rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
1255 rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
1262 def getListOfCategories(self):
1263 return list(row[0] for row in self.db.execute(
1264 "SELECT id FROM categories ORDER BY rank;"))
1266 def getCategoryTitle(self, id):
1267 return self.lookup('categories', 'title', id)
1269 def getCategoryUnread(self, id):
1271 for key in self.getListOfFeeds(category=id):
1273 count = count + self.getFeedNumberOfUnreadItems(key)
1278 def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
1279 if order == "Most unread":
1280 tmp = "ORDER BY unread DESC"
1281 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
1282 elif order == "Least unread":
1283 tmp = "ORDER BY unread"
1284 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
1285 elif order == "Most recent":
1286 tmp = "ORDER BY updateTime DESC"
1287 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
1288 elif order == "Least recent":
1289 tmp = "ORDER BY updateTime"
1290 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
1291 else: # order == "Manual" or invalid value...
1292 tmp = "ORDER BY rank"
1293 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
1295 sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp
1297 sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
1298 rows = self.db.execute(sql)
1305 def getFavicon(self, key):
1306 filename = "%s%s.d/favicon.ico" % (self.configdir, key)
1307 if isfile(filename):
1312 def updateUnread(self, key):
1313 feed = self.getFeed(key)
1314 self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
1316 self.cache_invalidate('feeds')
1318 def addFeed(self, title, url, id=None, category=1):
1321 count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
1323 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1324 if max_rank == None:
1326 values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
1327 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
1329 # Ask for the feed object, it will create the necessary tables
1332 if wc().available():
1333 # Register the stream with Woodchuck. Update approximately
1335 wc().stream_register(stream_identifier=id,
1336 human_readable_name=title,
1339 self.cache_invalidate('feeds')
1344 def addCategory(self, title):
1345 rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
1348 id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
1351 self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
1353 self.cache_invalidate('categories')
1355 def removeFeed(self, key):
1356 if wc().available ():
1359 except KeyError, woodchuck.Error:
1360 logger.debug("Removing unregistered feed %s failed" % (key,))
1362 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
1363 self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
1364 self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
1367 if isdir(self.configdir+key+".d/"):
1368 rmtree(self.configdir+key+".d/")
1369 self.cache_invalidate('feeds')
1371 def removeCategory(self, key):
1372 if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
1373 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
1374 self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
1375 self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
1376 self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
1378 self.cache_invalidate('categories')
1380 #def saveConfig(self):
1381 # self.listOfFeeds["feedingit-order"] = self.sortedKeys
1382 # file = open(self.configdir+"feeds.pickle", "w")
1383 # pickle.dump(self.listOfFeeds, file)
1386 def moveUp(self, key):
1387 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1389 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
1390 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
1393 def moveCategoryUp(self, key):
1394 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1396 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
1397 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
1400 def moveDown(self, key):
1401 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1402 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1404 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
1405 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
1408 def moveCategoryDown(self, key):
1409 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1410 max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
1412 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
1413 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )