Do less important start up configuration after the main view is shown.
[feedingit] / src / rss_sqlite.py
1 #!/usr/bin/env python2.5
2
3
4 # Copyright (c) 2007-2008 INdT.
5 # Copyright (c) 2011 Neal H. Walfield
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Lesser General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10 #
11 #  This program is distributed in the hope that it will be useful,
12 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
13 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 #  GNU Lesser General Public License for more details.
15 #
16 #  You should have received a copy of the GNU Lesser General Public License
17 #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 #
19
20 # ============================================================================
21 # Name        : FeedingIt.py
22 # Author      : Yves Marcoz
23 # Version     : 0.5.4
24 # Description : Simple RSS Reader
25 # ============================================================================
26
27 from __future__ import with_statement
28
29 import sqlite3
30 from os.path import isfile, isdir
31 from shutil import rmtree
32 from os import mkdir, remove, utime
33 import os
34 import md5
35 import feedparser
36 import time
37 import urllib2
38 from BeautifulSoup import BeautifulSoup
39 from urlparse import urljoin
40 from calendar import timegm
41 import threading
42 import traceback
43 from wc import wc, wc_init, woodchuck
44 import subprocess
45 import dbus
46 from updatedbus import update_server_object
47
48 from jobmanager import JobManager
49 import mainthread
50 from httpprogresshandler import HTTPProgressHandler
51 import random
52 import sys
53 import logging
54 logger = logging.getLogger(__name__)
55
56 def getId(string):
57     return md5.new(string).hexdigest()
58
59 def download_callback(connection):
60     if JobManager().do_quit:
61         raise KeyboardInterrupt
62
63 def downloader(progress_handler=None, proxy=None):
64     openers = []
65
66     if progress_handler is not None:
67         openers.append(progress_handler)
68     else:
69         openers.append(HTTPProgressHandler(download_callback))
70
71     if proxy:
72         openers.append(proxy)
73
74     return urllib2.build_opener(*openers)
75
76 # If not None, a subprocess.Popen object corresponding to a
77 # update_feeds.py process.
78 update_feed_process = None
79
80 update_feeds_iface = None
81
82 jobs_at_start = 0
83
84 class BaseObject(object):
85     # Columns to cache.  Classes that inherit from this and use the
86     # cache mechanism should set this to a list of tuples, each of
87     # which contains two entries: the table and the column.  Note that
88     # both are case sensitive.
89     cached_columns = ()
90
91     def cache_invalidate(self, table=None):
92         """
93         Invalidate the cache.
94
95         If table is not None, invalidate only the specified table.
96         Otherwise, drop the whole cache.
97         """
98         if not hasattr(self, 'cache'):
99             return
100
101         if table is None:
102             del self.cache
103         else:
104             if table in self.cache:
105                 del self.cache[table]
106
107     def lookup(self, table, column, id=None):
108         """
109         Look up a column or value.  Uses a cache for columns in
110         cached_columns.  Note: the column is returned unsorted.
111         """
112         if not hasattr(self, 'cache'):
113             self.cache = {}
114
115         # Cache data for at most 60 seconds.
116         now = time.time()
117         try:
118             cache = self.cache[table]
119
120             if time.time() - cache[None] > 60:
121                 self.cache[table].clear()
122         except KeyError:
123             cache = None
124
125         if (cache is None
126             or (table, column) not in self.cached_columns):
127             # The cache is empty or the caller wants a column that we
128             # don't cache.
129             if (table, column) in self.cached_columns:
130                 do_cache = True
131
132                 self.cache[table] = cache = {}
133                 columns = []
134                 for t, c in self.cached_columns:
135                     if table == t:
136                         cache[c] = {}
137                         columns.append(c)
138
139                 columns.append('id')
140                 where = ""
141             else:
142                 do_cache = False
143
144                 columns = (colums,)
145                 if id is not None:
146                     where = "where id = '%s'" % id
147                 else:
148                     where = ""
149
150             results = self.db.execute(
151                 "SELECT %s FROM %s %s" % (','.join(columns), table, where))
152
153             if do_cache:
154                 for r in results:
155                     values = list(r)
156                     i = values.pop()
157                     for index, value in enumerate(values):
158                         cache[columns[index]][i] = value
159
160                 cache[None] = now
161             else:
162                 results = []
163                 for r in results:
164                     if id is not None:
165                         return values[0]
166
167                     results.append(values[0])
168
169                 return results
170         else:
171             cache = self.cache[table]
172
173         try:
174             if id is not None:
175                 return cache[column][id]
176             else:
177                 return cache[column].values()
178         except KeyError:
179             return None
180
181 class Feed(BaseObject):
182     # Columns to cache.
183     cached_columns = (('feed', 'read'),
184                       ('feed', 'title'))
185
186     serial_execution_lock = threading.Lock()
187
188     def _getdb(self):
189         try:
190             db = self.tls.db
191         except AttributeError:
192             db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
193             self.tls.db = db
194         return db
195     db = property(_getdb)
196
197     def __init__(self, configdir, key):
198         self.key = key
199         self.configdir = configdir
200         self.dir = "%s/%s.d" %(self.configdir, self.key)
201         self.tls = threading.local ()
202
203         if not isdir(self.dir):
204             mkdir(self.dir)
205         if not isfile("%s/%s.db" %(self.dir, self.key)):
206             self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
207             self.db.execute("CREATE TABLE images (id text, imagePath text);")
208             self.db.commit()
209
210     def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
211         filename = configdir+key+".d/"+getId(url)
212         if not isfile(filename):
213             try:
214                 if not opener:
215                     opener = downloader(proxy=proxy)
216
217                 abs_url = urljoin(baseurl,url)
218                 f = opener.open(abs_url)
219                 try:
220                     with open(filename, "w") as outf:
221                         for data in f:
222                             outf.write(data)
223                 finally:
224                     f.close()
225             except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
226                 logger.info("Could not download image %s: %s"
227                             % (abs_url, str (exception)))
228                 return None
229             except:
230                 exception = sys.exc_info()[0]
231
232                 logger.info("Downloading image %s: %s" %
233                             (abs_url, traceback.format_exc()))
234                 try:
235                     remove(filename)
236                 except OSError:
237                     pass
238
239                 raise exception
240         else:
241             #open(filename,"a").close()  # "Touch" the file
242             file = open(filename,"a")
243             utime(filename, None)
244             file.close()
245         return filename
246
247     def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
248         if (os.path.basename(sys.argv[0]) == 'update_feeds.py'):
249             def doit():
250                 def it():
251                     self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
252                 return it
253             JobManager().execute(doit(), self.key, priority=priority)
254         else:
255             def send_update_request():
256                 global update_feeds_iface
257                 if update_feeds_iface is None:
258                     bus=dbus.SessionBus()
259                     remote_object = bus.get_object(
260                         "org.marcoz.feedingit", # Connection name
261                         "/org/marcoz/feedingit/update" # Object's path
262                         )
263                     update_feeds_iface = dbus.Interface(
264                         remote_object, 'org.marcoz.feedingit')
265
266                 try:
267                     update_feeds_iface.Update(self.key)
268                 except Exception, e:
269                     logger.error("Invoking org.marcoz.feedingit.Update: %s"
270                                  % str(e))
271                     update_feeds_iface = None
272                 else:
273                     return True
274
275             if send_update_request():
276                 # Success!  It seems we were able to start the update
277                 # daemon via dbus (or, it was already running).
278                 return
279
280             global update_feed_process
281             if (update_feed_process is None
282                 or update_feed_process.poll() is not None):
283                 # The update_feeds process is not running.  Start it.
284                 update_feeds = os.path.join(os.path.dirname(__file__),
285                                             'update_feeds.py')
286                 argv = ['/usr/bin/env', 'python', update_feeds, '--daemon' ]
287                 logger.debug("Starting update_feeds: running %s"
288                              % (str(argv),))
289                 update_feed_process = subprocess.Popen(argv)
290                 # Make sure the dbus calls go to the right process:
291                 # rebind.
292                 update_feeds_iface = None
293
294             for _ in xrange(5):
295                 if send_update_request():
296                     break
297                 time.sleep(1)
298
299     def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
300         success = False
301         have_serial_execution_lock = False
302         try:
303             download_start = time.time ()
304
305             progress_handler = HTTPProgressHandler(download_callback)
306
307             openers = [progress_handler]
308             if proxy:
309                 openers.append (proxy)
310             kwargs = {'handlers':openers}
311             
312             tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
313             download_duration = time.time () - download_start
314     
315             opener = downloader(progress_handler, proxy)
316
317             if JobManager().do_quit:
318                 raise KeyboardInterrupt
319
320             process_start = time.time()
321
322             # Expiry time is in hours
323             expiry = float(expiryTime) * 3600.
324     
325             currentTime = 0
326     
327             def wc_success():
328                 try:
329                     wc().stream_register (self.key, "", 6 * 60 * 60)
330                 except woodchuck.ObjectExistsError:
331                     pass
332                 try:
333                     wc()[self.key].updated (
334                         indicator=(woodchuck.Indicator.ApplicationVisual
335                                    |woodchuck.Indicator.StreamWide),
336                         transferred_down=progress_handler.stats['received'],
337                         transferred_up=progress_handler.stats['sent'],
338                         transfer_time=download_start,
339                         transfer_duration=download_duration,
340                         new_objects=len (tmp.entries),
341                         objects_inline=len (tmp.entries))
342                 except KeyError:
343                     logger.warn(
344                         "Failed to register update of %s with woodchuck!"
345                         % (self.key))
346     
347             http_status = tmp.get ('status', 200)
348     
349             # Check if the parse was succesful.  If the http status code
350             # is 304, then the download was successful, but there is
351             # nothing new.  Indeed, no content is returned.  This make a
352             # 304 look like an error because there are no entries and the
353             # parse fails.  But really, everything went great!  Check for
354             # this first.
355             if http_status == 304:
356                 logger.debug("%s: No changes to feed." % (self.key,))
357                 mainthread.execute(wc_success, async=True)
358                 success = True
359             elif len(tmp["entries"])==0 and not tmp.version:
360                 # An error occured fetching or parsing the feed.  (Version
361                 # will be either None if e.g. the connection timed our or
362                 # '' if the data is not a proper feed)
363                 logger.error(
364                     "Error fetching %s: version is: %s: error: %s"
365                     % (url, str (tmp.version),
366                        str (tmp.get ('bozo_exception', 'Unknown error'))))
367                 logger.debug(tmp)
368                 def register_stream_update_failed(http_status):
369                     def doit():
370                         logger.debug("%s: stream update failed!" % self.key)
371     
372                         try:
373                             # It's not easy to get the feed's title from here.
374                             # At the latest, the next time the application is
375                             # started, we'll fix up the human readable name.
376                             wc().stream_register (self.key, "", 6 * 60 * 60)
377                         except woodchuck.ObjectExistsError:
378                             pass
379                         ec = woodchuck.TransferStatus.TransientOther
380                         if 300 <= http_status and http_status < 400:
381                             ec = woodchuck.TransferStatus.TransientNetwork
382                         if 400 <= http_status and http_status < 500:
383                             ec = woodchuck.TransferStatus.FailureGone
384                         if 500 <= http_status and http_status < 600:
385                             ec = woodchuck.TransferStatus.TransientNetwork
386                         wc()[self.key].update_failed(ec)
387                     return doit
388                 if wc().available:
389                     mainthread.execute(
390                         register_stream_update_failed(
391                             http_status=http_status),
392                         async=True)
393             else:
394                currentTime = time.time()
395                # The etag and modified value should only be updated if the content was not null
396                try:
397                    etag = tmp["etag"]
398                except KeyError:
399                    etag = None
400                try:
401                    modified = tmp["modified"]
402                except KeyError:
403                    modified = None
404                try:
405                    abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
406                    f = opener.open(abs_url)
407                    data = f.read()
408                    f.close()
409                    outf = open(self.dir+"/favicon.ico", "w")
410                    outf.write(data)
411                    outf.close()
412                    del data
413                except (urllib2.HTTPError, urllib2.URLError), exception:
414                    logger.debug("Could not download favicon %s: %s"
415                                 % (abs_url, str (exception)))
416     
417                self.serial_execution_lock.acquire ()
418                have_serial_execution_lock = True
419
420                #reversedEntries = self.getEntries()
421                #reversedEntries.reverse()
422     
423                ids = self.getIds()
424     
425                tmp["entries"].reverse()
426                for entry in tmp["entries"]:
427                    # Yield so as to make the main thread a bit more
428                    # responsive.
429                    time.sleep(0)
430     
431                    if JobManager().do_quit:
432                        raise KeyboardInterrupt
433
434                    received_base = progress_handler.stats['received']
435                    sent_base = progress_handler.stats['sent']
436                    object_size = 0
437
438                    date = self.extractDate(entry)
439                    try:
440                        entry["title"]
441                    except KeyError:
442                        entry["title"] = "No Title"
443                    try :
444                        entry["link"]
445                    except KeyError:
446                        entry["link"] = ""
447                    try:
448                        entry["author"]
449                    except KeyError:
450                        entry["author"] = None
451                    if(not(entry.has_key("id"))):
452                        entry["id"] = None
453                    content = self.extractContent(entry)
454                    object_size = len (content)
455                    received_base -= len (content)
456                    tmpEntry = {"title":entry["title"], "content":content,
457                                 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
458                    id = self.generateUniqueId(tmpEntry)
459                    
460                    #articleTime = time.mktime(self.entries[id]["dateTuple"])
461                    soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
462                    images = soup('img')
463                    baseurl = tmpEntry["link"]
464                    #if not id in ids:
465                    if imageCache and len(images) > 0:
466                        self.serial_execution_lock.release ()
467                        have_serial_execution_lock = False
468                        for img in images:
469                            filename = self.addImage(
470                                configdir, self.key, baseurl, img['src'],
471                                opener=opener)
472                            if filename:
473                                 img['src']="file://%s" %filename
474                                 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
475                                 if count == 0:
476                                     self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
477                                     self.db.commit()
478     
479                                 try:
480                                     object_size += os.path.getsize (filename)
481                                 except os.error, exception:
482                                     logger.error ("Error getting size of %s: %s"
483                                                   % (filename, exception))
484                        self.serial_execution_lock.acquire ()
485                        have_serial_execution_lock = True
486     
487                    tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
488                    file = open(tmpEntry["contentLink"], "w")
489                    file.write(soup.prettify())
490                    file.close()
491                    if id in ids:
492                        self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
493                        self.db.commit()
494                    else:
495                        values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
496                        self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
497                        self.db.commit()
498 #                   else:
499 #                       try:
500 #                           self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
501 #                           self.db.commit()
502 #                           filename = configdir+self.key+".d/"+id+".html"
503 #                           file = open(filename,"a")
504 #                           utime(filename, None)
505 #                           file.close()
506 #                           images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
507 #                           for image in images:
508 #                                file = open(image[0],"a")
509 #                                utime(image[0], None)
510 #                                file.close()
511 #                       except:
512 #                           pass
513     
514                    # Register the object with Woodchuck and mark it as
515                    # downloaded.
516                    def register_object_transferred(
517                            id, title, publication_time,
518                            sent, received, object_size):
519                        def doit():
520                            logger.debug("Registering transfer of object %s"
521                                         % title)
522                            try:
523                                obj = wc()[self.key].object_register(
524                                    object_identifier=id,
525                                    human_readable_name=title)
526                            except woodchuck.ObjectExistsError:
527                                obj = wc()[self.key][id]
528                            else:
529                                obj.publication_time = publication_time
530                                obj.transferred(
531                                    indicator=(
532                                        woodchuck.Indicator.ApplicationVisual
533                                        |woodchuck.Indicator.StreamWide),
534                                    transferred_down=received,
535                                    transferred_up=sent,
536                                    object_size=object_size)
537                        return doit
538                    if wc().available:
539                        # If the entry does not contain a publication
540                        # time, the attribute won't exist.
541                        pubtime = entry.get('date_parsed', None)
542                        if pubtime:
543                            publication_time = time.mktime (pubtime)
544                        else:
545                            publication_time = None
546
547                        sent = progress_handler.stats['sent'] - sent_base
548                        received = (progress_handler.stats['received']
549                                    - received_base)
550
551                        mainthread.execute(
552                            register_object_transferred(
553                                id=id,
554                                title=tmpEntry["title"],
555                                publication_time=publication_time,
556                                sent=sent, received=received,
557                                object_size=object_size),
558                            async=True)
559                self.db.commit()
560
561                logger.debug (
562                    "%s: Update successful: transferred: %d/%d; objects: %d)"
563                    % (self.key,
564                       progress_handler.stats['sent'],
565                       progress_handler.stats['received'],
566                       len (tmp.entries)))
567                mainthread.execute (wc_success, async=True)
568                success = True
569
570             rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
571             for row in rows:
572                self.removeEntry(row[0])
573             
574             from glob import glob
575             from os import stat
576             for file in glob(configdir+self.key+".d/*"):
577                 #
578                 stats = stat(file)
579                 #
580                 # put the two dates into matching format
581                 #
582                 lastmodDate = stats[8]
583                 #
584                 expDate = time.time()-expiry*3
585                 # check if image-last-modified-date is outdated
586                 #
587                 if expDate > lastmodDate:
588                     #
589                     try:
590                         #
591                         #print 'Removing', file
592                         #
593                         # XXX: Tell woodchuck.
594                         remove(file) # commented out for testing
595                         #
596                     except OSError, exception:
597                         #
598                         logger.error('Could not remove %s: %s'
599                                      % (file, str (exception)))
600             logger.debug("updated %s: %fs in download, %fs in processing"
601                          % (self.key, download_duration,
602                             time.time () - process_start))
603         except:
604             logger.error("Updating %s: %s" % (self.key, traceback.format_exc()))
605         finally:
606             self.db.commit ()
607
608             if have_serial_execution_lock:
609                 self.serial_execution_lock.release ()
610
611             updateTime = 0
612             try:
613                 rows = self.db.execute("SELECT MAX(date) FROM feed;")
614                 for row in rows:
615                     updateTime=row[0]
616             except Exception, e:
617                 logger.error("Fetching update time: %s: %s"
618                              % (str(e), traceback.format_exc()))
619             finally:
620                 if not success:
621                     etag = None
622                     modified = None
623                 title = None
624                 try:
625                     title = tmp.feed.title
626                 except (AttributeError, UnboundLocalError), exception:
627                     pass
628                 if postFeedUpdateFunc is not None:
629                     postFeedUpdateFunc (self.key, updateTime, etag, modified,
630                                         title, *postFeedUpdateFuncArgs)
631
632         self.cache_invalidate()
633
634     def setEntryRead(self, id):
635         self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
636         self.db.commit()
637
638         def doit():
639             try:
640                 wc()[self.key][id].used()
641             except KeyError:
642                 pass
643         if wc().available():
644             mainthread.execute(doit, async=True)
645         self.cache_invalidate('feed')
646
647     def setEntryUnread(self, id):
648         self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
649         self.db.commit()     
650         self.cache_invalidate('feed')
651         
652     def markAllAsRead(self):
653         self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
654         self.db.commit()
655         self.cache_invalidate('feed')
656
657     def isEntryRead(self, id):
658         return self.lookup('feed', 'read', id) == 1
659     
660     def getTitle(self, id):
661         return self.lookup('feed', 'title', id)
662     
663     def getContentLink(self, id):
664         return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
665     
666     def getExternalLink(self, id):
667         return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
668     
669     def getDate(self, id):
670         dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
671         return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
672
673     def getDateTuple(self, id):
674         dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
675         return time.localtime(dateStamp)
676     
677     def getDateStamp(self, id):
678         return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
679     
680     def generateUniqueId(self, entry):
681         """
682         Generate a stable identifier for the article.  For the same
683         entry, this should result in the same identifier.  If
684         possible, the identifier should remain the same even if the
685         article is updated.
686         """
687         # Prefer the entry's id, which is supposed to be globally
688         # unique.
689         key = entry.get('id', None)
690         if not key:
691             # Next, try the link to the content.
692             key = entry.get('link', None)
693         if not key:
694             # Ok, the title and the date concatenated are likely to be
695             # relatively stable.
696             key = entry.get('title', None) + entry.get('date', None)
697         if not key:
698             # Hmm, the article's content will at least guarantee no
699             # false negatives (i.e., missing articles)
700             key = entry.get('content', None)
701         if not key:
702             # If all else fails, just use a random number.
703             key = str (random.random ())
704         return getId (key)
705     
706     def getIds(self, onlyUnread=False):
707         if onlyUnread:
708             rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
709         else:
710             rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
711         ids = []
712         for row in rows:
713             ids.append(row[0])
714         #ids.reverse()
715         return ids
716     
717     def getNextId(self, id, forward=True):
718         if forward:
719             delta = 1
720         else:
721             delta = -1
722         ids = self.getIds()
723         index = ids.index(id)
724         return ids[(index + delta) % len(ids)]
725         
726     def getPreviousId(self, id):
727         return self.getNextId(id, forward=False)
728     
729     def getNumberOfUnreadItems(self):
730         return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
731     
732     def getNumberOfEntries(self):
733         return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
734
735     def getArticle(self, entry):
736         #self.setEntryRead(id)
737         #entry = self.entries[id]
738         title = entry['title']
739         #content = entry.get('content', entry.get('summary_detail', {}))
740         content = entry["content"]
741
742         link = entry['link']
743         author = entry['author']
744         date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
745
746         #text = '''<div style="color: black; background-color: white;">'''
747         text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
748         text += "<html><head><title>" + title + "</title>"
749         text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
750         #text += '<style> body {-webkit-user-select: none;} </style>'
751         text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
752         if author != None:
753             text += "<BR /><small><i>Author: " + author + "</i></small>"
754         text += "<BR /><small><i>Date: " + date + "</i></small></div>"
755         text += "<BR /><BR />"
756         text += content
757         text += "</body></html>"
758         return text
759    
760     def getContent(self, id):
761         contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
762         try:
763             file = open(self.entries[id]["contentLink"])
764             content = file.read()
765             file.close()
766         except:
767             content = "Content unavailable"
768         return content
769     
770     def extractDate(self, entry):
771         if entry.has_key("updated_parsed"):
772             return timegm(entry["updated_parsed"])
773         elif entry.has_key("published_parsed"):
774             return timegm(entry["published_parsed"])
775         else:
776             return time.time()
777         
778     def extractContent(self, entry):
779         content = ""
780         if entry.has_key('summary'):
781             content = entry.get('summary', '')
782         if entry.has_key('content'):
783             if len(entry.content[0].value) > len(content):
784                 content = entry.content[0].value
785         if content == "":
786             content = entry.get('description', '')
787         return content
788     
789     def removeEntry(self, id):
790         contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
791         if contentLink:
792             try:
793                 remove(contentLink)
794             except OSError, exception:
795                 logger.error("Deleting %s: %s" % (contentLink, str (exception)))
796         self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
797         self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
798         self.db.commit()
799
800         def doit():
801             try:
802                 wc()[self.key][id].files_deleted (
803                     woodchuck.DeletionResponse.Deleted)
804                 del wc()[self.key][id]
805             except KeyError:
806                 pass
807         if wc().available():
808             mainthread.execute (doit, async=True)
809  
810 class ArchivedArticles(Feed):    
811     def addArchivedArticle(self, title, link, date, configdir):
812         id = self.generateUniqueId({"date":date, "title":title})
813         values = (id, title, link, date, 0, link, 0)
814         self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
815         self.db.commit()
816
817     def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
818         currentTime = 0
819         rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
820         for row in rows:
821             currentTime = time.time()
822             id = row[0]
823             link = row[1]
824             f = urllib2.urlopen(link)
825             #entry["content"] = f.read()
826             html = f.read()
827             f.close()
828             soup = BeautifulSoup(html)
829             images = soup('img')
830             baseurl = link
831             for img in images:
832                 filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
833                 img['src']=filename
834                 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
835                 self.db.commit()
836             contentLink = configdir+self.key+".d/"+id+".html"
837             file = open(contentLink, "w")
838             file.write(soup.prettify())
839             file.close()
840             
841             self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
842             self.db.commit()
843         return (currentTime, None, None)
844     
845     def purgeReadArticles(self):
846         rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
847         #ids = self.getIds()
848         for row in rows:
849             self.removeArticle(row[0])
850
851     def removeArticle(self, id):
852         rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
853         for row in rows:
854             try:
855                 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
856                 if count == 0:
857                     os.remove(row[0])
858             except:
859                 pass
860         self.removeEntry(id)
861
862 class Listing(BaseObject):
863     # Columns to cache.
864     cached_columns = (('feeds', 'updateTime'),
865                       ('feeds', 'unread'),
866                       ('feeds', 'title'),
867                       ('categories', 'title'))
868
869     def _getdb(self):
870         try:
871             db = self.tls.db
872         except AttributeError:
873             db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
874             self.tls.db = db
875         return db
876     db = property(_getdb)
877
878     # Lists all the feeds in a dictionary, and expose the data
879     def __init__(self, config, configdir):
880         self.config = config
881         self.configdir = configdir
882
883         self.tls = threading.local ()
884         
885         try:
886             table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
887             if table == None:
888                 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
889                 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
890                 self.addCategory("Default Category")
891                 if isfile(self.configdir+"feeds.pickle"):
892                     self.importOldFormatFeeds()
893                 else:
894                     self.addFeed("Maemo News", "http://maemo.org/news/items.xml")    
895             else:
896                 from string import find, upper
897                 if find(upper(table[0]), "WIDGET")<0:
898                     self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
899                     self.db.execute("UPDATE feeds SET widget=1;")
900                     self.db.commit()
901                 if find(upper(table[0]), "CATEGORY")<0:
902                     self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
903                     self.addCategory("Default Category")
904                     self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
905                     self.db.execute("UPDATE feeds SET category=1;")
906             self.db.commit()
907         except:
908             pass
909
910         # Check that Woodchuck's state is up to date with respect our
911         # state.
912         updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
913         wc_init (self, True if updater else False)
914         if wc().available() and updater:
915             # The list of known streams.
916             streams = wc().streams_list ()
917             stream_ids = [s.identifier for s in streams]
918
919             # Register any unknown streams.  Remove known streams from
920             # STREAMS_IDS.
921             for key in self.getListOfFeeds():
922                 title = self.getFeedTitle(key)
923                 # XXX: We should also check whether the list of
924                 # articles/objects in each feed/stream is up to date.
925                 if key not in stream_ids:
926                     logger.debug(
927                         "Registering previously unknown channel: %s (%s)"
928                         % (key, title,))
929                     # Use a default refresh interval of 6 hours.
930                     wc().stream_register (key, title, 6 * 60 * 60)
931                 else:
932                     # Make sure the human readable name is up to date.
933                     if wc()[key].human_readable_name != title:
934                         wc()[key].human_readable_name = title
935                     stream_ids.remove (key)
936                     
937
938             # Unregister any streams that are no longer subscribed to.
939             for id in stream_ids:
940                 logger.debug("Unregistering %s" % (id,))
941                 w.stream_unregister (id)
942
943     def importOldFormatFeeds(self):
944         """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
945         import rss
946         listing = rss.Listing(self.configdir)
947         rank = 0
948         for id in listing.getListOfFeeds():
949             try:
950                 rank += 1
951                 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
952                 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
953                 self.db.commit()
954                 
955                 feed = listing.getFeed(id)
956                 new_feed = self.getFeed(id)
957                 
958                 items = feed.getIds()[:]
959                 items.reverse()
960                 for item in items:
961                         if feed.isEntryRead(item):
962                             read_status = 1
963                         else:
964                             read_status = 0 
965                         date = timegm(feed.getDateTuple(item))
966                         title = feed.getTitle(item)
967                         newId = new_feed.generateUniqueId({"date":date, "title":title})
968                         values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
969                         new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
970                         new_feed.db.commit()
971                         try:
972                             images = feed.getImages(item)
973                             for image in images:
974                                 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
975                                 new_feed.db.commit()
976                         except:
977                             pass
978                 self.updateUnread(id)
979             except:
980                 logger.error("importOldFormatFeeds: %s"
981                              % (traceback.format_exc(),))
982         remove(self.configdir+"feeds.pickle")
983                 
984         
985     def addArchivedArticle(self, key, index):
986         feed = self.getFeed(key)
987         title = feed.getTitle(index)
988         link = feed.getExternalLink(index)
989         date = feed.getDate(index)
990         count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
991         if count == 0:
992             self.addFeed("Archived Articles", "", id="ArchivedArticles")
993
994         archFeed = self.getFeed("ArchivedArticles")
995         archFeed.addArchivedArticle(title, link, date, self.configdir)
996         self.updateUnread("ArchivedArticles")
997         
998     def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
999                    priority=0):
1000         if expiryTime is None:
1001             expiryTime = self.config.getExpiry()
1002         if not expiryTime:
1003             # Default to 24 hours
1004             expriyTime = 24
1005         if proxy is None:
1006             (use_proxy, proxy) = self.config.getProxy()
1007             if not use_proxy:
1008                 proxy = None
1009         if imageCache is None:
1010             imageCache = self.config.getImageCache()
1011
1012         feed = self.getFeed(key)
1013         (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
1014         try:
1015             modified = time.struct_time(eval(modified))
1016         except:
1017             modified = None
1018         feed.updateFeed(
1019             self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
1020             priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
1021
1022     def _queuePostFeedUpdate(self, *args, **kwargs):
1023         mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
1024
1025     def _postFeedUpdate(self, key, updateTime, etag, modified, title):
1026         if modified==None:
1027             modified="None"
1028         else:
1029             modified=str(tuple(modified))
1030         if updateTime > 0:
1031             self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
1032         else:
1033             self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
1034
1035         if title is not None:
1036             self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
1037                             (title, key))
1038         self.db.commit()
1039         self.cache_invalidate('feeds')
1040         self.updateUnread(key)
1041
1042         update_server_object().ArticleCountUpdated()
1043
1044         stats = JobManager().stats()
1045         global jobs_at_start
1046         completed = stats['jobs-completed'] - jobs_at_start
1047         in_progress = stats['jobs-in-progress']
1048         queued = stats['jobs-queued']
1049
1050         percent = (100 * ((completed + in_progress / 2.))
1051                    / (completed + in_progress + queued))
1052
1053         update_server_object().UpdateProgress(
1054             percent, completed, in_progress, queued, 0, 0, 0, key)
1055
1056         if in_progress == 0 and queued == 0:
1057             jobs_at_start = stats['jobs-completed']
1058         
1059     def getFeed(self, key):
1060         if key == "ArchivedArticles":
1061             return ArchivedArticles(self.configdir, key)
1062         return Feed(self.configdir, key)
1063         
1064     def editFeed(self, key, title, url, category=None):
1065         if category:
1066             self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
1067         else:
1068             self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
1069         self.db.commit()
1070         self.cache_invalidate('feeds')
1071
1072         if wc().available():
1073             try:
1074                 wc()[key].human_readable_name = title
1075             except KeyError:
1076                 logger.debug("Feed %s (%s) unknown." % (key, title))
1077         
1078     def getFeedUpdateTime(self, key):
1079         update_time = self.lookup('feeds', 'updateTime', key)
1080
1081         if not update_time:
1082             return "Never"
1083
1084         delta = time.time() - update_time
1085
1086         delta_hours = delta / (60. * 60.)
1087         if delta_hours < .1:
1088             return "A few minutes ago"
1089         if delta_hours < .75:
1090             return "Less than an hour ago"
1091         if delta_hours < 1.5:
1092             return "About an hour ago"
1093         if delta_hours < 18:
1094             return "About %d hours ago" % (int(delta_hours + 0.5),)
1095
1096         delta_days = delta_hours / 24.
1097         if delta_days < 1.5:
1098             return "About a day ago"
1099         if delta_days < 18:
1100             return "%d days ago" % (int(delta_days + 0.5),)
1101
1102         delta_weeks = delta_days / 7.
1103         if delta_weeks <= 8:
1104             return "%d weeks ago" % int(delta_weeks + 0.5)
1105
1106         delta_months = delta_days / 30.
1107         if delta_months <= 30:
1108             return "%d months ago" % int(delta_months + 0.5)
1109
1110         return time.strftime("%x", time.gmtime(update_time))
1111         
1112     def getFeedNumberOfUnreadItems(self, key):
1113         return self.lookup('feeds', 'unread', key)
1114         
1115     def getFeedTitle(self, key):
1116         title = self.lookup('feeds', 'title', key)
1117         if title:
1118             return title
1119
1120         return self.getFeedUrl(key)
1121         
1122     def getFeedUrl(self, key):
1123         return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1124     
1125     def getFeedCategory(self, key):
1126         return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1127         
1128     def getListOfFeeds(self, category=None):
1129         if category:
1130             rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
1131         else:
1132             rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
1133         keys = []
1134         for row in rows:
1135             if row[0]:
1136                 keys.append(row[0])
1137         return keys
1138     
1139     def getListOfCategories(self):
1140         return list(row[0] for row in self.db.execute(
1141                 "SELECT id FROM categories ORDER BY rank;"))
1142     
1143     def getCategoryTitle(self, id):
1144         return self.lookup('categories', 'title', id)
1145     
1146     def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
1147         if   order == "Most unread":
1148             tmp = "ORDER BY unread DESC"
1149             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
1150         elif order == "Least unread":
1151             tmp = "ORDER BY unread"
1152             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
1153         elif order == "Most recent":
1154             tmp = "ORDER BY updateTime DESC"
1155             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
1156         elif order == "Least recent":
1157             tmp = "ORDER BY updateTime"
1158             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
1159         else: # order == "Manual" or invalid value...
1160             tmp = "ORDER BY rank"
1161             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
1162         if onlyUnread:
1163             sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp 
1164         else:
1165             sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
1166         rows = self.db.execute(sql)
1167         keys = []
1168         for row in rows:
1169             if row[0]:
1170                 keys.append(row[0])
1171         return keys
1172     
1173     def getFavicon(self, key):
1174         filename = "%s%s.d/favicon.ico" % (self.configdir, key)
1175         if isfile(filename):
1176             return filename
1177         else:
1178             return False
1179         
1180     def updateUnread(self, key):
1181         feed = self.getFeed(key)
1182         self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
1183         self.db.commit()
1184         self.cache_invalidate('feeds')
1185
1186     def addFeed(self, title, url, id=None, category=1):
1187         if not id:
1188             id = getId(url)
1189         count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
1190         if count == 0:
1191             max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1192             if max_rank == None:
1193                 max_rank = 0
1194             values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
1195             self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
1196             self.db.commit()
1197             # Ask for the feed object, it will create the necessary tables
1198             self.getFeed(id)
1199
1200             if wc().available():
1201                 # Register the stream with Woodchuck.  Update approximately
1202                 # every 6 hours.
1203                 wc().stream_register(stream_identifier=id,
1204                                      human_readable_name=title,
1205                                      freshness=6*60*60)
1206
1207             return True
1208         else:
1209             return False
1210         
1211     def addCategory(self, title):
1212         rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
1213         if rank==None:
1214             rank=1
1215         id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
1216         if id==None:
1217             id=1
1218         self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
1219         self.db.commit()
1220     
1221     def removeFeed(self, key):
1222         if wc().available ():
1223             try:
1224                 del wc()[key]
1225             except KeyError:
1226                 logger.debug("Removing unregistered feed %s failed" % (key,))
1227
1228         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
1229         self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
1230         self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
1231         self.db.commit()
1232
1233         if isdir(self.configdir+key+".d/"):
1234            rmtree(self.configdir+key+".d/")
1235            
1236     def removeCategory(self, key):
1237         if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
1238             rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
1239             self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
1240             self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
1241             self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
1242             self.db.commit()
1243         
1244     #def saveConfig(self):
1245     #    self.listOfFeeds["feedingit-order"] = self.sortedKeys
1246     #    file = open(self.configdir+"feeds.pickle", "w")
1247     #    pickle.dump(self.listOfFeeds, file)
1248     #    file.close()
1249         
1250     def moveUp(self, key):
1251         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1252         if rank>0:
1253             self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
1254             self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
1255             self.db.commit()
1256             
1257     def moveCategoryUp(self, key):
1258         rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1259         if rank>0:
1260             self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
1261             self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
1262             self.db.commit()
1263         
1264     def moveDown(self, key):
1265         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1266         max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1267         if rank<max_rank:
1268             self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
1269             self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
1270             self.db.commit()
1271             
1272     def moveCategoryDown(self, key):
1273         rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1274         max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
1275         if rank<max_rank:
1276             self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
1277             self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )
1278             self.db.commit()
1279             
1280