vcs.maemo.org Git - feedingit/blob - src/rss_sqlite.py

   1 #!/usr/bin/env python2.5
   2
   3 #
   4 # Copyright (c) 2007-2008 INdT.
   5 # Copyright (c) 2011 Neal H. Walfield
   6 # This program is free software: you can redistribute it and/or modify
   7 # it under the terms of the GNU Lesser General Public License as published by
   8 # the Free Software Foundation, either version 3 of the License, or
   9 # (at your option) any later version.
  10 #
  11 #  This program is distributed in the hope that it will be useful,
  12 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 #  GNU Lesser General Public License for more details.
  15 #
  16 #  You should have received a copy of the GNU Lesser General Public License
  17 #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 #
  19
  20 # ============================================================================
  21 # Name        : FeedingIt.py
  22 # Author      : Yves Marcoz
  23 # Version     : 0.5.4
  24 # Description : Simple RSS Reader
  25 # ============================================================================
  26
  27 import sqlite3
  28 from os.path import isfile, isdir
  29 from shutil import rmtree
  30 from os import mkdir, remove, utime
  31 import os
  32 import md5
  33 import feedparser
  34 import time
  35 import urllib2
  36 from BeautifulSoup import BeautifulSoup
  37 from urlparse import urljoin
  38 from calendar import timegm
  39 from updatedbus import get_lock, release_lock
  40 import threading
  41 import traceback
  42 from wc import wc, wc_init
  43 import woodchuck
  44 from jobmanager import JobManager
  45 import mainthread
  46 from httpprogresshandler import HTTPProgressHandler
  47 import random
  48 import sys
  49
  50 def getId(string):
  51     return md5.new(string).hexdigest()
  52
  53 def download_callback(connection):
  54     if JobManager().do_quit:
  55         raise KeyboardInterrupt
  56
  57 def downloader(progress_handler=None, proxy=None):
  58     openers = []
  59
  60     if progress_handler:
  61         openers.append (progress_handler)
  62     else:
  63         openers.append(HTTPProgressHandler(download_callback))
  64
  65     if proxy:
  66         openers.append (proxy)
  67
  68     return urllib2.build_opener (*openers)
  69
  70 class Feed:
  71     serial_execution_lock = threading.Lock()
  72
  73     def _getdb(self):
  74         try:
  75             db = self.tls.db
  76         except AttributeError:
  77             db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
  78             self.tls.db = db
  79         return db
  80     db = property(_getdb)
  81
  82     def __init__(self, configdir, key):
  83         self.key = key
  84         self.configdir = configdir
  85         self.dir = "%s/%s.d" %(self.configdir, self.key)
  86         self.tls = threading.local ()
  87
  88         if not isdir(self.dir):
  89             mkdir(self.dir)
  90         if not isfile("%s/%s.db" %(self.dir, self.key)):
  91             self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
  92             self.db.execute("CREATE TABLE images (id text, imagePath text);")
  93             self.db.commit()
  94
  95     def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
  96         filename = configdir+key+".d/"+getId(url)
  97         if not isfile(filename):
  98             try:
  99                 if not opener:
 100                     opener = downloader(proxy=proxy)
 101
 102                 abs_url = urljoin(baseurl,url)
 103                 f = opener.open(abs_url)
 104                 outf = open(filename, "w")
 105                 outf.write(f.read())
 106                 f.close()
 107                 outf.close()
 108             except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
 109                 print ("Could not download image %s: %s"
 110                        % (abs_url, str (exception)))
 111                 return None
 112             except:
 113                 exception = sys.exc_info()[0]
 114
 115                 print "Downloading image: %s" % abs_url
 116                 traceback.print_exc()
 117
 118                 try:
 119                     remove(filename)
 120                 except OSError:
 121                     pass
 122
 123                 raise exception
 124         else:
 125             #open(filename,"a").close()  # "Touch" the file
 126             file = open(filename,"a")
 127             utime(filename, None)
 128             file.close()
 129         return filename
 130
 131     def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
 132         def doit():
 133             def it():
 134                 self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
 135             return it
 136         JobManager().execute(doit(), self.key, priority=priority)
 137
 138     def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
 139         success = False
 140         have_serial_execution_lock = False
 141         try:
 142             update_lock = None
 143             update_lock = get_lock("key")
 144             if not update_lock:
 145                 # Someone else is doing an update.
 146                 return
 147
 148             download_start = time.time ()
 149
 150             progress_handler = HTTPProgressHandler(download_callback)
 151
 152             openers = [progress_handler]
 153             if proxy:
 154                 openers.append (proxy)
 155             kwargs = {'handlers':openers}
 156
 157             tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
 158             download_duration = time.time () - download_start
 159
 160             opener = downloader(progress_handler, proxy)
 161
 162             if JobManager().do_quit:
 163                 raise KeyboardInterrupt
 164
 165             process_start = time.time()
 166
 167             # Expiry time is in hours
 168             expiry = float(expiryTime) * 3600.
 169
 170             currentTime = 0
 171
 172             have_woodchuck = mainthread.execute (wc().available)
 173
 174             def wc_success():
 175                 try:
 176                     wc().stream_register (self.key, "", 6 * 60 * 60)
 177                 except woodchuck.ObjectExistsError:
 178                     pass
 179                 try:
 180                     wc()[self.key].updated (
 181                         indicator=(woodchuck.Indicator.ApplicationVisual
 182                                    |woodchuck.Indicator.StreamWide),
 183                         transferred_down=progress_handler.stats['received'],
 184                         transferred_up=progress_handler.stats['sent'],
 185                         transfer_time=download_start,
 186                         transfer_duration=download_duration,
 187                         new_objects=len (tmp.entries),
 188                         objects_inline=len (tmp.entries))
 189                 except KeyError:
 190                     print "Failed to register update with woodchuck!"
 191                     pass
 192
 193             http_status = tmp.get ('status', 200)
 194
 195             # Check if the parse was succesful.  If the http status code
 196             # is 304, then the download was successful, but there is
 197             # nothing new.  Indeed, no content is returned.  This make a
 198             # 304 look like an error because there are no entries and the
 199             # parse fails.  But really, everything went great!  Check for
 200             # this first.
 201             if http_status == 304:
 202                 print "%s: No changes to feed." % (self.key,)
 203                 mainthread.execute (wc_success, async=True)
 204                 success = True
 205             elif len(tmp["entries"])==0 and not tmp.version:
 206                 # An error occured fetching or parsing the feed.  (Version
 207                 # will be either None if e.g. the connection timed our or
 208                 # '' if the data is not a proper feed)
 209                 print ("Error fetching %s: version is: %s: error: %s"
 210                        % (url, str (tmp.version),
 211                           str (tmp.get ('bozo_exception', 'Unknown error'))))
 212                 print tmp
 213                 if have_woodchuck:
 214                     def e():
 215                         print "%s: stream update failed!" % self.key
 216
 217                         try:
 218                             # It's not easy to get the feed's title from here.
 219                             # At the latest, the next time the application is
 220                             # started, we'll fix up the human readable name.
 221                             wc().stream_register (self.key, "", 6 * 60 * 60)
 222                         except woodchuck.ObjectExistsError:
 223                             pass
 224                         ec = woodchuck.TransferStatus.TransientOther
 225                         if 300 <= http_status and http_status < 400:
 226                             ec = woodchuck.TransferStatus.TransientNetwork
 227                         if 400 <= http_status and http_status < 500:
 228                             ec = woodchuck.TransferStatus.FailureGone
 229                         if 500 <= http_status and http_status < 600:
 230                             ec = woodchuck.TransferStatus.TransientNetwork
 231                         wc()[self.key].update_failed(ec)
 232                     mainthread.execute (e, async=True)
 233             else:
 234                currentTime = time.time()
 235                # The etag and modified value should only be updated if the content was not null
 236                try:
 237                    etag = tmp["etag"]
 238                except KeyError:
 239                    etag = None
 240                try:
 241                    modified = tmp["modified"]
 242                except KeyError:
 243                    modified = None
 244                try:
 245                    abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
 246                    f = opener.open(abs_url)
 247                    data = f.read()
 248                    f.close()
 249                    outf = open(self.dir+"/favicon.ico", "w")
 250                    outf.write(data)
 251                    outf.close()
 252                    del data
 253                except (urllib2.HTTPError, urllib2.URLError), exception:
 254                    print ("Could not download favicon %s: %s"
 255                           % (abs_url, str (exception)))
 256
 257                self.serial_execution_lock.acquire ()
 258                have_serial_execution_lock = True
 259
 260                #reversedEntries = self.getEntries()
 261                #reversedEntries.reverse()
 262
 263                ids = self.getIds()
 264
 265                tmp["entries"].reverse()
 266                for entry in tmp["entries"]:
 267                    # Yield so as to make the main thread a bit more
 268                    # responsive.
 269                    time.sleep(0)
 270
 271                    if JobManager().do_quit:
 272                        raise KeyboardInterrupt
 273
 274                    received_base = progress_handler.stats['received']
 275                    sent_base = progress_handler.stats['sent']
 276                    object_size = 0
 277
 278                    date = self.extractDate(entry)
 279                    try:
 280                        entry["title"]
 281                    except KeyError:
 282                        entry["title"] = "No Title"
 283                    try :
 284                        entry["link"]
 285                    except KeyError:
 286                        entry["link"] = ""
 287                    try:
 288                        entry["author"]
 289                    except KeyError:
 290                        entry["author"] = None
 291                    if(not(entry.has_key("id"))):
 292                        entry["id"] = None
 293                    content = self.extractContent(entry)
 294                    object_size = len (content)
 295                    received_base -= len (content)
 296                    tmpEntry = {"title":entry["title"], "content":content,
 297                                 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
 298                    id = self.generateUniqueId(tmpEntry)
 299
 300                    #articleTime = time.mktime(self.entries[id]["dateTuple"])
 301                    soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
 302                    images = soup('img')
 303                    baseurl = tmpEntry["link"]
 304                    #if not id in ids:
 305                    if imageCache and len(images) > 0:
 306                        self.serial_execution_lock.release ()
 307                        have_serial_execution_lock = False
 308                        for img in images:
 309                             filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
 310                             if filename:
 311                                 img['src']="file://%s" %filename
 312                                 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
 313                                 if count == 0:
 314                                     self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
 315                                     self.db.commit()
 316
 317                                 try:
 318                                     object_size += os.path.getsize (filename)
 319                                 except os.error, exception:
 320                                     print ("Error getting size of %s: %s"
 321                                            % (filename, exception))
 322                                     pass
 323                        self.serial_execution_lock.acquire ()
 324                        have_serial_execution_lock = True
 325
 326                    tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
 327                    file = open(tmpEntry["contentLink"], "w")
 328                    file.write(soup.prettify())
 329                    file.close()
 330                    if id in ids:
 331                        self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
 332                        self.db.commit()
 333                    else:
 334                        values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
 335                        self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
 336                        self.db.commit()
 337 #                   else:
 338 #                       try:
 339 #                           self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
 340 #                           self.db.commit()
 341 #                           filename = configdir+self.key+".d/"+id+".html"
 342 #                           file = open(filename,"a")
 343 #                           utime(filename, None)
 344 #                           file.close()
 345 #                           images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
 346 #                           for image in images:
 347 #                                file = open(image[0],"a")
 348 #                                utime(image[0], None)
 349 #                                file.close()
 350 #                       except:
 351 #                           pass
 352
 353                    # Register the object with Woodchuck and mark it as
 354                    # downloaded.
 355                    if have_woodchuck:
 356                        def e():
 357                            try:
 358                                obj = wc()[self.key].object_register(
 359                                    object_identifier=id,
 360                                    human_readable_name=tmpEntry["title"])
 361                            except woodchuck.ObjectExistsError:
 362                                obj = wc()[self.key][id]
 363                            else:
 364                                # If the entry does not contain a publication
 365                                # time, the attribute won't exist.
 366                                pubtime = entry.get ('date_parsed', None)
 367                                if pubtime:
 368                                    obj.publication_time = time.mktime (pubtime)
 369
 370                                received = (progress_handler.stats['received']
 371                                            - received_base)
 372                                sent = progress_handler.stats['sent'] - sent_base
 373                                obj.transferred (
 374                                    indicator=(woodchuck.Indicator.ApplicationVisual
 375                                               |woodchuck.Indicator.StreamWide),
 376                                    transferred_down=received,
 377                                    transferred_up=sent,
 378                                    object_size=object_size)
 379                        mainthread.execute(e, async=True)
 380                self.db.commit()
 381
 382                print ("%s: Update successful: transferred: %d/%d; objects: %d)"
 383                       % (self.key,
 384                          progress_handler.stats['sent'],
 385                          progress_handler.stats['received'],
 386                          len (tmp.entries)))
 387                mainthread.execute (wc_success, async=True)
 388                success = True
 389
 390             rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
 391             for row in rows:
 392                self.removeEntry(row[0])
 393
 394             from glob import glob
 395             from os import stat
 396             for file in glob(configdir+self.key+".d/*"):
 397                 #
 398                 stats = stat(file)
 399                 #
 400                 # put the two dates into matching format
 401                 #
 402                 lastmodDate = stats[8]
 403                 #
 404                 expDate = time.time()-expiry*3
 405                 # check if image-last-modified-date is outdated
 406                 #
 407                 if expDate > lastmodDate:
 408                     #
 409                     try:
 410                         #
 411                         #print 'Removing', file
 412                         #
 413                         # XXX: Tell woodchuck.
 414                         remove(file) # commented out for testing
 415                         #
 416                     except OSError, exception:
 417                         #
 418                         print 'Could not remove %s: %s' % (file, str (exception))
 419             print ("updated %s: %fs in download, %fs in processing"
 420                    % (self.key, download_duration,
 421                       time.time () - process_start))
 422         except:
 423             print "Updating %s: %s" % (self.key, sys.exc_info()[0])
 424             traceback.print_exc()
 425         finally:
 426             self.db.commit ()
 427
 428             if have_serial_execution_lock:
 429                 self.serial_execution_lock.release ()
 430
 431             if update_lock is not None:
 432                 release_lock (update_lock)
 433
 434             updateTime = 0
 435             try:
 436                 rows = self.db.execute("SELECT MAX(date) FROM feed;")
 437                 for row in rows:
 438                     updateTime=row[0]
 439             except:
 440                 print "Fetching update time."
 441                 traceback.print_exc()
 442             finally:
 443                 if not success:
 444                     etag = None
 445                     modified = None
 446                 if postFeedUpdateFunc is not None:
 447                     postFeedUpdateFunc (self.key, updateTime, etag, modified,
 448                                         *postFeedUpdateFuncArgs)
 449
 450     def setEntryRead(self, id):
 451         self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
 452         self.db.commit()
 453
 454         def e():
 455             if wc().available():
 456                 try:
 457                     wc()[self.key][id].used()
 458                 except KeyError:
 459                     pass
 460
 461     def setEntryUnread(self, id):
 462         self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
 463         self.db.commit()
 464
 465     def markAllAsRead(self):
 466         self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
 467         self.db.commit()
 468
 469     def isEntryRead(self, id):
 470         read_status = self.db.execute("SELECT read FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 471         return read_status==1  # Returns True if read==1, and False if read==0
 472
 473     def getTitle(self, id):
 474         return self.db.execute("SELECT title FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 475
 476     def getContentLink(self, id):
 477         return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 478
 479     def getExternalLink(self, id):
 480         return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 481
 482     def getDate(self, id):
 483         dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 484         return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
 485
 486     def getDateTuple(self, id):
 487         dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 488         return time.localtime(dateStamp)
 489
 490     def getDateStamp(self, id):
 491         return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 492
 493     def generateUniqueId(self, entry):
 494         if(entry["id"] != None):
 495             return getId(str(entry["id"]))
 496         else:
 497             try:
 498                 return getId(str(entry["date"]) + str(entry["title"]))
 499             except:
 500                 #print entry["title"]
 501                 return getId(str(entry["date"]))
 502
 503     def getIds(self, onlyUnread=False):
 504         if onlyUnread:
 505             rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
 506         else:
 507             rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
 508         ids = []
 509         for row in rows:
 510             ids.append(row[0])
 511         #ids.reverse()
 512         return ids
 513
 514     def getNextId(self, id):
 515         ids = self.getIds()
 516         index = ids.index(id)
 517         return ids[(index+1)%len(ids)]
 518
 519     def getPreviousId(self, id):
 520         ids = self.getIds()
 521         index = ids.index(id)
 522         return ids[(index-1)%len(ids)]
 523
 524     def getNumberOfUnreadItems(self):
 525         return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
 526
 527     def getNumberOfEntries(self):
 528         return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
 529
 530     def getArticle(self, entry):
 531         #self.setEntryRead(id)
 532         #entry = self.entries[id]
 533         title = entry['title']
 534         #content = entry.get('content', entry.get('summary_detail', {}))
 535         content = entry["content"]
 536
 537         link = entry['link']
 538         author = entry['author']
 539         date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
 540
 541         #text = '''<div style="color: black; background-color: white;">'''
 542         text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
 543         text += "<html><head><title>" + title + "</title>"
 544         text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
 545         #text += '<style> body {-webkit-user-select: none;} </style>'
 546         text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
 547         if author != None:
 548             text += "<BR /><small><i>Author: " + author + "</i></small>"
 549         text += "<BR /><small><i>Date: " + date + "</i></small></div>"
 550         text += "<BR /><BR />"
 551         text += content
 552         text += "</body></html>"
 553         return text
 554
 555     def getContent(self, id):
 556         contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
 557         try:
 558             file = open(self.entries[id]["contentLink"])
 559             content = file.read()
 560             file.close()
 561         except:
 562             content = "Content unavailable"
 563         return content
 564
 565     def extractDate(self, entry):
 566         if entry.has_key("updated_parsed"):
 567             return timegm(entry["updated_parsed"])
 568         elif entry.has_key("published_parsed"):
 569             return timegm(entry["published_parsed"])
 570         else:
 571             return time.time()
 572
 573     def extractContent(self, entry):
 574         content = ""
 575         if entry.has_key('summary'):
 576             content = entry.get('summary', '')
 577         if entry.has_key('content'):
 578             if len(entry.content[0].value) > len(content):
 579                 content = entry.content[0].value
 580         if content == "":
 581             content = entry.get('description', '')
 582         return content
 583
 584     def removeEntry(self, id):
 585         contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
 586         if contentLink:
 587             try:
 588                 remove(contentLink)
 589             except OSError, exception:
 590                 print "Deleting %s: %s" % (contentLink, str (exception))
 591         self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
 592         self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
 593         self.db.commit()
 594
 595         def e():
 596             if wc().available():
 597                 try:
 598                     wc()[self.key][id].files_deleted (
 599                         woodchuck.DeletionResponse.Deleted)
 600                     del wc()[self.key][id]
 601                 except KeyError:
 602                     pass
 603         mainthread.execute (e, async=True)
 604
 605 class ArchivedArticles(Feed):
 606     def addArchivedArticle(self, title, link, date, configdir):
 607         id = self.generateUniqueId({"date":date, "title":title})
 608         values = (id, title, link, date, 0, link, 0)
 609         self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
 610         self.db.commit()
 611
 612     def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
 613         currentTime = 0
 614         rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
 615         for row in rows:
 616             currentTime = time.time()
 617             id = row[0]
 618             link = row[1]
 619             f = urllib2.urlopen(link)
 620             #entry["content"] = f.read()
 621             html = f.read()
 622             f.close()
 623             soup = BeautifulSoup(html)
 624             images = soup('img')
 625             baseurl = link
 626             for img in images:
 627                 filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
 628                 img['src']=filename
 629                 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
 630                 self.db.commit()
 631             contentLink = configdir+self.key+".d/"+id+".html"
 632             file = open(contentLink, "w")
 633             file.write(soup.prettify())
 634             file.close()
 635
 636             self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
 637             self.db.commit()
 638         return (currentTime, None, None)
 639
 640     def purgeReadArticles(self):
 641         rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
 642         #ids = self.getIds()
 643         for row in rows:
 644             self.removeArticle(row[0])
 645
 646     def removeArticle(self, id):
 647         rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
 648         for row in rows:
 649             try:
 650                 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
 651                 if count == 0:
 652                     os.remove(row[0])
 653             except:
 654                 pass
 655         self.removeEntry(id)
 656
 657 class Listing:
 658     def _getdb(self):
 659         try:
 660             db = self.tls.db
 661         except AttributeError:
 662             db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
 663             self.tls.db = db
 664         return db
 665     db = property(_getdb)
 666
 667     # Lists all the feeds in a dictionary, and expose the data
 668     def __init__(self, config, configdir):
 669         self.config = config
 670         self.configdir = configdir
 671
 672         self.tls = threading.local ()
 673
 674         try:
 675             table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
 676             if table == None:
 677                 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
 678                 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
 679                 self.addCategory("Default Category")
 680                 if isfile(self.configdir+"feeds.pickle"):
 681                     self.importOldFormatFeeds()
 682                 else:
 683                     self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
 684             else:
 685                 from string import find, upper
 686                 if find(upper(table[0]), "WIDGET")<0:
 687                     self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
 688                     self.db.execute("UPDATE feeds SET widget=1;")
 689                     self.db.commit()
 690                 if find(upper(table[0]), "CATEGORY")<0:
 691                     self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
 692                     self.addCategory("Default Category")
 693                     self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
 694                     self.db.execute("UPDATE feeds SET category=1;")
 695             self.db.commit()
 696         except:
 697             pass
 698
 699         # Check that Woodchuck's state is up to date with respect our
 700         # state.
 701         wc_init (self)
 702         if wc().available():
 703             # The list of known streams.
 704             streams = wc().streams_list ()
 705             stream_ids = [s.identifier for s in streams]
 706
 707             # Register any unknown streams.  Remove known streams from
 708             # STREAMS_IDS.
 709             for key in self.getListOfFeeds():
 710                 title = self.getFeedTitle(key)
 711                 # XXX: We should also check whether the list of
 712                 # articles/objects in each feed/stream is up to date.
 713                 if key not in stream_ids:
 714                     print ("Registering previously unknown channel: %s (%s)"
 715                            % (key, title,))
 716                     # Use a default refresh interval of 6 hours.
 717                     wc().stream_register (key, title, 6 * 60 * 60)
 718                 else:
 719                     # Make sure the human readable name is up to date.
 720                     if wc()[key].human_readable_name != title:
 721                         wc()[key].human_readable_name = title
 722                     stream_ids.remove (key)
 723
 724
 725             # Unregister any streams that are no longer subscribed to.
 726             for id in stream_ids:
 727                 print ("Unregistering %s" % (id,))
 728                 w.stream_unregister (id)
 729
 730     def importOldFormatFeeds(self):
 731         """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
 732         import rss
 733         listing = rss.Listing(self.configdir)
 734         rank = 0
 735         for id in listing.getListOfFeeds():
 736             try:
 737                 rank += 1
 738                 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
 739                 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
 740                 self.db.commit()
 741
 742                 feed = listing.getFeed(id)
 743                 new_feed = self.getFeed(id)
 744
 745                 items = feed.getIds()[:]
 746                 items.reverse()
 747                 for item in items:
 748                         if feed.isEntryRead(item):
 749                             read_status = 1
 750                         else:
 751                             read_status = 0
 752                         date = timegm(feed.getDateTuple(item))
 753                         title = feed.getTitle(item)
 754                         newId = new_feed.generateUniqueId({"date":date, "title":title})
 755                         values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
 756                         new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
 757                         new_feed.db.commit()
 758                         try:
 759                             images = feed.getImages(item)
 760                             for image in images:
 761                                 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
 762                                 new_feed.db.commit()
 763                         except:
 764                             pass
 765                 self.updateUnread(id)
 766             except:
 767                 traceback.print_exc()
 768         remove(self.configdir+"feeds.pickle")
 769
 770
 771     def addArchivedArticle(self, key, index):
 772         feed = self.getFeed(key)
 773         title = feed.getTitle(index)
 774         link = feed.getExternalLink(index)
 775         date = feed.getDate(index)
 776         count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
 777         if count == 0:
 778             self.addFeed("Archived Articles", "", id="ArchivedArticles")
 779
 780         archFeed = self.getFeed("ArchivedArticles")
 781         archFeed.addArchivedArticle(title, link, date, self.configdir)
 782         self.updateUnread("ArchivedArticles")
 783
 784     def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
 785                    priority=0):
 786         if expiryTime is None:
 787             expiryTime = self.config.getExpiry()
 788         if not expiryTime:
 789             # Default to 24 hours
 790             expriyTime = 24
 791         if proxy is None:
 792             (use_proxy, proxy) = self.config.getProxy()
 793             if not use_proxy:
 794                 proxy = None
 795         if imageCache is None:
 796             imageCache = self.config.getImageCache()
 797
 798         feed = self.getFeed(key)
 799         (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
 800         try:
 801             modified = time.struct_time(eval(modified))
 802         except:
 803             modified = None
 804         feed.updateFeed(
 805             self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
 806             priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
 807
 808     def _queuePostFeedUpdate(self, *args, **kwargs):
 809         mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
 810
 811     def _postFeedUpdate(self, key, updateTime, etag, modified):
 812         if modified==None:
 813             modified="None"
 814         else:
 815             modified=str(tuple(modified))
 816         if updateTime > 0:
 817             self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
 818         else:
 819             self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
 820         self.db.commit()
 821         self.updateUnread(key)
 822
 823     def getFeed(self, key):
 824         if key == "ArchivedArticles":
 825             return ArchivedArticles(self.configdir, key)
 826         return Feed(self.configdir, key)
 827
 828     def editFeed(self, key, title, url, category=None):
 829         if category:
 830             self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
 831         else:
 832             self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
 833         self.db.commit()
 834
 835         if wc().available():
 836             try:
 837                 wc()[key].human_readable_name = title
 838             except KeyError:
 839                 print "Feed %s (%s) unknown." % (key, title)
 840                 pass
 841
 842     def getFeedUpdateTime(self, key):
 843         return time.ctime(self.db.execute("SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0])
 844
 845     def getFeedNumberOfUnreadItems(self, key):
 846         return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
 847
 848     def getFeedTitle(self, key):
 849         return self.db.execute("SELECT title FROM feeds WHERE id=?;", (key,)).fetchone()[0]
 850
 851     def getFeedUrl(self, key):
 852         return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
 853
 854     def getFeedCategory(self, key):
 855         return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
 856
 857     def getListOfFeeds(self, category=None):
 858         if category:
 859             rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
 860         else:
 861             rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
 862         keys = []
 863         for row in rows:
 864             if row[0]:
 865                 keys.append(row[0])
 866         return keys
 867
 868     def getListOfCategories(self):
 869         rows = self.db.execute("SELECT id FROM categories ORDER BY rank;" )
 870         keys = []
 871         for row in rows:
 872             if row[0]:
 873                 keys.append(row[0])
 874         return keys
 875
 876     def getCategoryTitle(self, id):
 877         row = self.db.execute("SELECT title FROM categories WHERE id=?;", (id, )).fetchone()
 878         return row[0]
 879
 880     def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
 881         if   order == "Most unread":
 882             tmp = "ORDER BY unread DESC"
 883             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
 884         elif order == "Least unread":
 885             tmp = "ORDER BY unread"
 886             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
 887         elif order == "Most recent":
 888             tmp = "ORDER BY updateTime DESC"
 889             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
 890         elif order == "Least recent":
 891             tmp = "ORDER BY updateTime"
 892             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
 893         else: # order == "Manual" or invalid value...
 894             tmp = "ORDER BY rank"
 895             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
 896         if onlyUnread:
 897             sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp
 898         else:
 899             sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
 900         rows = self.db.execute(sql)
 901         keys = []
 902         for row in rows:
 903             if row[0]:
 904                 keys.append(row[0])
 905         return keys
 906
 907     def getFavicon(self, key):
 908         filename = "%s%s.d/favicon.ico" % (self.configdir, key)
 909         if isfile(filename):
 910             return filename
 911         else:
 912             return False
 913
 914     def updateUnread(self, key):
 915         feed = self.getFeed(key)
 916         self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
 917         self.db.commit()
 918
 919     def addFeed(self, title, url, id=None, category=1):
 920         if not id:
 921             id = getId(title)
 922         count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
 923         if count == 0:
 924             max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
 925             if max_rank == None:
 926                 max_rank = 0
 927             values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
 928             self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
 929             self.db.commit()
 930             # Ask for the feed object, it will create the necessary tables
 931             self.getFeed(id)
 932
 933             if wc().available():
 934                 # Register the stream with Woodchuck.  Update approximately
 935                 # every 6 hours.
 936                 wc().stream_register(stream_identifier=id,
 937                                      human_readable_name=title,
 938                                      freshness=6*60*60)
 939
 940             return True
 941         else:
 942             return False
 943
 944     def addCategory(self, title):
 945         rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
 946         if rank==None:
 947             rank=1
 948         id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
 949         if id==None:
 950             id=1
 951         self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
 952         self.db.commit()
 953
 954     def removeFeed(self, key):
 955         if wc().available ():
 956             try:
 957                 del wc()[key]
 958             except KeyError:
 959                 print "Removing unregistered feed %s failed" % (key,)
 960
 961         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
 962         self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
 963         self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
 964         self.db.commit()
 965
 966         if isdir(self.configdir+key+".d/"):
 967            rmtree(self.configdir+key+".d/")
 968
 969     def removeCategory(self, key):
 970         if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
 971             rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
 972             self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
 973             self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
 974             self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
 975             self.db.commit()
 976
 977     #def saveConfig(self):
 978     #    self.listOfFeeds["feedingit-order"] = self.sortedKeys
 979     #    file = open(self.configdir+"feeds.pickle", "w")
 980     #    pickle.dump(self.listOfFeeds, file)
 981     #    file.close()
 982
 983     def moveUp(self, key):
 984         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
 985         if rank>0:
 986             self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
 987             self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
 988             self.db.commit()
 989
 990     def moveCategoryUp(self, key):
 991         rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
 992         if rank>0:
 993             self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
 994             self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
 995             self.db.commit()
 996
 997     def moveDown(self, key):
 998         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
 999         max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1000         if rank<max_rank:
1001             self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
1002             self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
1003             self.db.commit()
1004
1005     def moveCategoryDown(self, key):
1006         rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1007         max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
1008         if rank<max_rank:
1009             self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
1010             self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )
1011             self.db.commit()
1012
1013