vcs.maemo.org Git - feedingit/blob - src/rss_sqlite.py

   1 #!/usr/bin/env python2.5
   2
   3 #
   4 # Copyright (c) 2007-2008 INdT.
   5 # Copyright (c) 2011 Neal H. Walfield
   6 # This program is free software: you can redistribute it and/or modify
   7 # it under the terms of the GNU Lesser General Public License as published by
   8 # the Free Software Foundation, either version 3 of the License, or
   9 # (at your option) any later version.
  10 #
  11 #  This program is distributed in the hope that it will be useful,
  12 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 #  GNU Lesser General Public License for more details.
  15 #
  16 #  You should have received a copy of the GNU Lesser General Public License
  17 #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 #
  19
  20 # ============================================================================
  21 # Name        : FeedingIt.py
  22 # Author      : Yves Marcoz
  23 # Version     : 0.5.4
  24 # Description : Simple RSS Reader
  25 # ============================================================================
  26
  27 import sqlite3
  28 from os.path import isfile, isdir
  29 from shutil import rmtree
  30 from os import mkdir, remove, utime
  31 import os
  32 import md5
  33 import feedparser
  34 import time
  35 import urllib2
  36 from BeautifulSoup import BeautifulSoup
  37 from urlparse import urljoin
  38 from calendar import timegm
  39 from updatedbus import get_lock, release_lock
  40 import threading
  41 import traceback
  42 from wc import wc, wc_init, woodchuck
  43
  44 from jobmanager import JobManager
  45 import mainthread
  46 from httpprogresshandler import HTTPProgressHandler
  47 import random
  48 import sys
  49 import logging
  50 logger = logging.getLogger(__name__)
  51
  52 def getId(string):
  53     return md5.new(string).hexdigest()
  54
  55 def download_callback(connection):
  56     if JobManager().do_quit:
  57         raise KeyboardInterrupt
  58
  59 def downloader(progress_handler=None, proxy=None):
  60     openers = []
  61
  62     if progress_handler:
  63         openers.append (progress_handler)
  64     else:
  65         openers.append(HTTPProgressHandler(download_callback))
  66
  67     if proxy:
  68         openers.append (proxy)
  69
  70     return urllib2.build_opener (*openers)
  71
  72 class Feed:
  73     serial_execution_lock = threading.Lock()
  74
  75     def _getdb(self):
  76         try:
  77             db = self.tls.db
  78         except AttributeError:
  79             db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
  80             self.tls.db = db
  81         return db
  82     db = property(_getdb)
  83
  84     def __init__(self, configdir, key):
  85         self.key = key
  86         self.configdir = configdir
  87         self.dir = "%s/%s.d" %(self.configdir, self.key)
  88         self.tls = threading.local ()
  89
  90         if not isdir(self.dir):
  91             mkdir(self.dir)
  92         if not isfile("%s/%s.db" %(self.dir, self.key)):
  93             self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
  94             self.db.execute("CREATE TABLE images (id text, imagePath text);")
  95             self.db.commit()
  96
  97     def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
  98         filename = configdir+key+".d/"+getId(url)
  99         if not isfile(filename):
 100             try:
 101                 if not opener:
 102                     opener = downloader(proxy=proxy)
 103
 104                 abs_url = urljoin(baseurl,url)
 105                 f = opener.open(abs_url)
 106                 outf = open(filename, "w")
 107                 outf.write(f.read())
 108                 f.close()
 109                 outf.close()
 110             except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
 111                 logger.info("Could not download image %s: %s"
 112                             % (abs_url, str (exception)))
 113                 return None
 114             except:
 115                 exception = sys.exc_info()[0]
 116
 117                 logger.info("Downloading image %s: %s" %
 118                             (abs_url, traceback.format_exc()))
 119                 try:
 120                     remove(filename)
 121                 except OSError:
 122                     pass
 123
 124                 raise exception
 125         else:
 126             #open(filename,"a").close()  # "Touch" the file
 127             file = open(filename,"a")
 128             utime(filename, None)
 129             file.close()
 130         return filename
 131
 132     def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
 133         def doit():
 134             def it():
 135                 self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
 136             return it
 137         JobManager().execute(doit(), self.key, priority=priority)
 138
 139     def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
 140         success = False
 141         have_serial_execution_lock = False
 142         try:
 143             update_lock = None
 144             update_lock = get_lock("key")
 145             if not update_lock:
 146                 # Someone else is doing an update.
 147                 return
 148
 149             download_start = time.time ()
 150
 151             progress_handler = HTTPProgressHandler(download_callback)
 152
 153             openers = [progress_handler]
 154             if proxy:
 155                 openers.append (proxy)
 156             kwargs = {'handlers':openers}
 157
 158             tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
 159             download_duration = time.time () - download_start
 160
 161             opener = downloader(progress_handler, proxy)
 162
 163             if JobManager().do_quit:
 164                 raise KeyboardInterrupt
 165
 166             process_start = time.time()
 167
 168             # Expiry time is in hours
 169             expiry = float(expiryTime) * 3600.
 170
 171             currentTime = 0
 172
 173             have_woodchuck = mainthread.execute (wc().available)
 174
 175             def wc_success():
 176                 try:
 177                     wc().stream_register (self.key, "", 6 * 60 * 60)
 178                 except woodchuck.ObjectExistsError:
 179                     pass
 180                 try:
 181                     wc()[self.key].updated (
 182                         indicator=(woodchuck.Indicator.ApplicationVisual
 183                                    |woodchuck.Indicator.StreamWide),
 184                         transferred_down=progress_handler.stats['received'],
 185                         transferred_up=progress_handler.stats['sent'],
 186                         transfer_time=download_start,
 187                         transfer_duration=download_duration,
 188                         new_objects=len (tmp.entries),
 189                         objects_inline=len (tmp.entries))
 190                 except KeyError:
 191                     logger.warn(
 192                         "Failed to register update of %s with woodchuck!"
 193                         % (self.key))
 194
 195             http_status = tmp.get ('status', 200)
 196
 197             # Check if the parse was succesful.  If the http status code
 198             # is 304, then the download was successful, but there is
 199             # nothing new.  Indeed, no content is returned.  This make a
 200             # 304 look like an error because there are no entries and the
 201             # parse fails.  But really, everything went great!  Check for
 202             # this first.
 203             if http_status == 304:
 204                 logger.debug("%s: No changes to feed." % (self.key,))
 205                 mainthread.execute (wc_success, async=True)
 206                 success = True
 207             elif len(tmp["entries"])==0 and not tmp.version:
 208                 # An error occured fetching or parsing the feed.  (Version
 209                 # will be either None if e.g. the connection timed our or
 210                 # '' if the data is not a proper feed)
 211                 logger.error(
 212                     "Error fetching %s: version is: %s: error: %s"
 213                     % (url, str (tmp.version),
 214                        str (tmp.get ('bozo_exception', 'Unknown error'))))
 215                 logger.debug(tmp)
 216                 if have_woodchuck:
 217                     def e():
 218                         logger.debug("%s: stream update failed!" % self.key)
 219
 220                         try:
 221                             # It's not easy to get the feed's title from here.
 222                             # At the latest, the next time the application is
 223                             # started, we'll fix up the human readable name.
 224                             wc().stream_register (self.key, "", 6 * 60 * 60)
 225                         except woodchuck.ObjectExistsError:
 226                             pass
 227                         ec = woodchuck.TransferStatus.TransientOther
 228                         if 300 <= http_status and http_status < 400:
 229                             ec = woodchuck.TransferStatus.TransientNetwork
 230                         if 400 <= http_status and http_status < 500:
 231                             ec = woodchuck.TransferStatus.FailureGone
 232                         if 500 <= http_status and http_status < 600:
 233                             ec = woodchuck.TransferStatus.TransientNetwork
 234                         wc()[self.key].update_failed(ec)
 235                     mainthread.execute (e, async=True)
 236             else:
 237                currentTime = time.time()
 238                # The etag and modified value should only be updated if the content was not null
 239                try:
 240                    etag = tmp["etag"]
 241                except KeyError:
 242                    etag = None
 243                try:
 244                    modified = tmp["modified"]
 245                except KeyError:
 246                    modified = None
 247                try:
 248                    abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
 249                    f = opener.open(abs_url)
 250                    data = f.read()
 251                    f.close()
 252                    outf = open(self.dir+"/favicon.ico", "w")
 253                    outf.write(data)
 254                    outf.close()
 255                    del data
 256                except (urllib2.HTTPError, urllib2.URLError), exception:
 257                    logger.debug("Could not download favicon %s: %s"
 258                                 % (abs_url, str (exception)))
 259
 260                self.serial_execution_lock.acquire ()
 261                have_serial_execution_lock = True
 262
 263                #reversedEntries = self.getEntries()
 264                #reversedEntries.reverse()
 265
 266                ids = self.getIds()
 267
 268                tmp["entries"].reverse()
 269                for entry in tmp["entries"]:
 270                    # Yield so as to make the main thread a bit more
 271                    # responsive.
 272                    time.sleep(0)
 273
 274                    if JobManager().do_quit:
 275                        raise KeyboardInterrupt
 276
 277                    received_base = progress_handler.stats['received']
 278                    sent_base = progress_handler.stats['sent']
 279                    object_size = 0
 280
 281                    date = self.extractDate(entry)
 282                    try:
 283                        entry["title"]
 284                    except KeyError:
 285                        entry["title"] = "No Title"
 286                    try :
 287                        entry["link"]
 288                    except KeyError:
 289                        entry["link"] = ""
 290                    try:
 291                        entry["author"]
 292                    except KeyError:
 293                        entry["author"] = None
 294                    if(not(entry.has_key("id"))):
 295                        entry["id"] = None
 296                    content = self.extractContent(entry)
 297                    object_size = len (content)
 298                    received_base -= len (content)
 299                    tmpEntry = {"title":entry["title"], "content":content,
 300                                 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
 301                    id = self.generateUniqueId(tmpEntry)
 302
 303                    #articleTime = time.mktime(self.entries[id]["dateTuple"])
 304                    soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
 305                    images = soup('img')
 306                    baseurl = tmpEntry["link"]
 307                    #if not id in ids:
 308                    if imageCache and len(images) > 0:
 309                        self.serial_execution_lock.release ()
 310                        have_serial_execution_lock = False
 311                        for img in images:
 312                             filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
 313                             if filename:
 314                                 img['src']="file://%s" %filename
 315                                 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
 316                                 if count == 0:
 317                                     self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
 318                                     self.db.commit()
 319
 320                                 try:
 321                                     object_size += os.path.getsize (filename)
 322                                 except os.error, exception:
 323                                     logger.error ("Error getting size of %s: %s"
 324                                                   % (filename, exception))
 325                        self.serial_execution_lock.acquire ()
 326                        have_serial_execution_lock = True
 327
 328                    tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
 329                    file = open(tmpEntry["contentLink"], "w")
 330                    file.write(soup.prettify())
 331                    file.close()
 332                    if id in ids:
 333                        self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
 334                        self.db.commit()
 335                    else:
 336                        values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
 337                        self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
 338                        self.db.commit()
 339 #                   else:
 340 #                       try:
 341 #                           self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
 342 #                           self.db.commit()
 343 #                           filename = configdir+self.key+".d/"+id+".html"
 344 #                           file = open(filename,"a")
 345 #                           utime(filename, None)
 346 #                           file.close()
 347 #                           images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
 348 #                           for image in images:
 349 #                                file = open(image[0],"a")
 350 #                                utime(image[0], None)
 351 #                                file.close()
 352 #                       except:
 353 #                           pass
 354
 355                    # Register the object with Woodchuck and mark it as
 356                    # downloaded.
 357                    if have_woodchuck:
 358                        def e():
 359                            try:
 360                                obj = wc()[self.key].object_register(
 361                                    object_identifier=id,
 362                                    human_readable_name=tmpEntry["title"])
 363                            except woodchuck.ObjectExistsError:
 364                                obj = wc()[self.key][id]
 365                            else:
 366                                # If the entry does not contain a publication
 367                                # time, the attribute won't exist.
 368                                pubtime = entry.get ('date_parsed', None)
 369                                if pubtime:
 370                                    obj.publication_time = time.mktime (pubtime)
 371
 372                                received = (progress_handler.stats['received']
 373                                            - received_base)
 374                                sent = progress_handler.stats['sent'] - sent_base
 375                                obj.transferred (
 376                                    indicator=(woodchuck.Indicator.ApplicationVisual
 377                                               |woodchuck.Indicator.StreamWide),
 378                                    transferred_down=received,
 379                                    transferred_up=sent,
 380                                    object_size=object_size)
 381                        mainthread.execute(e, async=True)
 382                self.db.commit()
 383
 384                logger.debug (
 385                    "%s: Update successful: transferred: %d/%d; objects: %d)"
 386                    % (self.key,
 387                       progress_handler.stats['sent'],
 388                       progress_handler.stats['received'],
 389                       len (tmp.entries)))
 390                mainthread.execute (wc_success, async=True)
 391                success = True
 392
 393             rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
 394             for row in rows:
 395                self.removeEntry(row[0])
 396
 397             from glob import glob
 398             from os import stat
 399             for file in glob(configdir+self.key+".d/*"):
 400                 #
 401                 stats = stat(file)
 402                 #
 403                 # put the two dates into matching format
 404                 #
 405                 lastmodDate = stats[8]
 406                 #
 407                 expDate = time.time()-expiry*3
 408                 # check if image-last-modified-date is outdated
 409                 #
 410                 if expDate > lastmodDate:
 411                     #
 412                     try:
 413                         #
 414                         #print 'Removing', file
 415                         #
 416                         # XXX: Tell woodchuck.
 417                         remove(file) # commented out for testing
 418                         #
 419                     except OSError, exception:
 420                         #
 421                         logger.error('Could not remove %s: %s'
 422                                      % (file, str (exception)))
 423             logger.debug("updated %s: %fs in download, %fs in processing"
 424                          % (self.key, download_duration,
 425                             time.time () - process_start))
 426         except:
 427             logger.error("Updating %s: %s" % (self.key, traceback.format_exc()))
 428         finally:
 429             self.db.commit ()
 430
 431             if have_serial_execution_lock:
 432                 self.serial_execution_lock.release ()
 433
 434             if update_lock is not None:
 435                 release_lock (update_lock)
 436
 437             updateTime = 0
 438             try:
 439                 rows = self.db.execute("SELECT MAX(date) FROM feed;")
 440                 for row in rows:
 441                     updateTime=row[0]
 442             except Exception, e:
 443                 logger.error("Fetching update time: %s: %s"
 444                              % (str(e), traceback.format_exc()))
 445             finally:
 446                 if not success:
 447                     etag = None
 448                     modified = None
 449                 title = None
 450                 try:
 451                     title = tmp.feed.title
 452                 except (AttributeError, UnboundLocalError), exception:
 453                     pass
 454                 if postFeedUpdateFunc is not None:
 455                     postFeedUpdateFunc (self.key, updateTime, etag, modified,
 456                                         title, *postFeedUpdateFuncArgs)
 457
 458     def setEntryRead(self, id):
 459         self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
 460         self.db.commit()
 461
 462         def e():
 463             if wc().available():
 464                 try:
 465                     wc()[self.key][id].used()
 466                 except KeyError:
 467                     pass
 468
 469     def setEntryUnread(self, id):
 470         self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
 471         self.db.commit()
 472
 473     def markAllAsRead(self):
 474         self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
 475         self.db.commit()
 476
 477     def isEntryRead(self, id):
 478         read_status = self.db.execute("SELECT read FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 479         return read_status==1  # Returns True if read==1, and False if read==0
 480
 481     def getTitle(self, id):
 482         return self.db.execute("SELECT title FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 483
 484     def getContentLink(self, id):
 485         return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 486
 487     def getExternalLink(self, id):
 488         return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 489
 490     def getDate(self, id):
 491         dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 492         return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
 493
 494     def getDateTuple(self, id):
 495         dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 496         return time.localtime(dateStamp)
 497
 498     def getDateStamp(self, id):
 499         return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 500
 501     def generateUniqueId(self, entry):
 502         """
 503         Generate a stable identifier for the article.  For the same
 504         entry, this should result in the same identifier.  If
 505         possible, the identifier should remain the same even if the
 506         article is updated.
 507         """
 508         # Prefer the entry's id, which is supposed to be globally
 509         # unique.
 510         key = entry.get('id', None)
 511         if not key:
 512             # Next, try the link to the content.
 513             key = entry.get('link', None)
 514         if not key:
 515             # Ok, the title and the date concatenated are likely to be
 516             # relatively stable.
 517             key = entry.get('title', None) + entry.get('date', None)
 518         if not key:
 519             # Hmm, the article's content will at least guarantee no
 520             # false negatives (i.e., missing articles)
 521             key = entry.get('content', None)
 522         if not key:
 523             # If all else fails, just use a random number.
 524             key = str (random.random ())
 525         return getId (key)
 526
 527     def getIds(self, onlyUnread=False):
 528         if onlyUnread:
 529             rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
 530         else:
 531             rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
 532         ids = []
 533         for row in rows:
 534             ids.append(row[0])
 535         #ids.reverse()
 536         return ids
 537
 538     def getNextId(self, id):
 539         ids = self.getIds()
 540         index = ids.index(id)
 541         return ids[(index+1)%len(ids)]
 542
 543     def getPreviousId(self, id):
 544         ids = self.getIds()
 545         index = ids.index(id)
 546         return ids[(index-1)%len(ids)]
 547
 548     def getNumberOfUnreadItems(self):
 549         return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
 550
 551     def getNumberOfEntries(self):
 552         return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
 553
 554     def getArticle(self, entry):
 555         #self.setEntryRead(id)
 556         #entry = self.entries[id]
 557         title = entry['title']
 558         #content = entry.get('content', entry.get('summary_detail', {}))
 559         content = entry["content"]
 560
 561         link = entry['link']
 562         author = entry['author']
 563         date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
 564
 565         #text = '''<div style="color: black; background-color: white;">'''
 566         text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
 567         text += "<html><head><title>" + title + "</title>"
 568         text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
 569         #text += '<style> body {-webkit-user-select: none;} </style>'
 570         text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
 571         if author != None:
 572             text += "<BR /><small><i>Author: " + author + "</i></small>"
 573         text += "<BR /><small><i>Date: " + date + "</i></small></div>"
 574         text += "<BR /><BR />"
 575         text += content
 576         text += "</body></html>"
 577         return text
 578
 579     def getContent(self, id):
 580         contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
 581         try:
 582             file = open(self.entries[id]["contentLink"])
 583             content = file.read()
 584             file.close()
 585         except:
 586             content = "Content unavailable"
 587         return content
 588
 589     def extractDate(self, entry):
 590         if entry.has_key("updated_parsed"):
 591             return timegm(entry["updated_parsed"])
 592         elif entry.has_key("published_parsed"):
 593             return timegm(entry["published_parsed"])
 594         else:
 595             return time.time()
 596
 597     def extractContent(self, entry):
 598         content = ""
 599         if entry.has_key('summary'):
 600             content = entry.get('summary', '')
 601         if entry.has_key('content'):
 602             if len(entry.content[0].value) > len(content):
 603                 content = entry.content[0].value
 604         if content == "":
 605             content = entry.get('description', '')
 606         return content
 607
 608     def removeEntry(self, id):
 609         contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
 610         if contentLink:
 611             try:
 612                 remove(contentLink)
 613             except OSError, exception:
 614                 logger.error("Deleting %s: %s" % (contentLink, str (exception)))
 615         self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
 616         self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
 617         self.db.commit()
 618
 619         def e():
 620             if wc().available():
 621                 try:
 622                     wc()[self.key][id].files_deleted (
 623                         woodchuck.DeletionResponse.Deleted)
 624                     del wc()[self.key][id]
 625                 except KeyError:
 626                     pass
 627         mainthread.execute (e, async=True)
 628
 629 class ArchivedArticles(Feed):
 630     def addArchivedArticle(self, title, link, date, configdir):
 631         id = self.generateUniqueId({"date":date, "title":title})
 632         values = (id, title, link, date, 0, link, 0)
 633         self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
 634         self.db.commit()
 635
 636     def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
 637         currentTime = 0
 638         rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
 639         for row in rows:
 640             currentTime = time.time()
 641             id = row[0]
 642             link = row[1]
 643             f = urllib2.urlopen(link)
 644             #entry["content"] = f.read()
 645             html = f.read()
 646             f.close()
 647             soup = BeautifulSoup(html)
 648             images = soup('img')
 649             baseurl = link
 650             for img in images:
 651                 filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
 652                 img['src']=filename
 653                 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
 654                 self.db.commit()
 655             contentLink = configdir+self.key+".d/"+id+".html"
 656             file = open(contentLink, "w")
 657             file.write(soup.prettify())
 658             file.close()
 659
 660             self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
 661             self.db.commit()
 662         return (currentTime, None, None)
 663
 664     def purgeReadArticles(self):
 665         rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
 666         #ids = self.getIds()
 667         for row in rows:
 668             self.removeArticle(row[0])
 669
 670     def removeArticle(self, id):
 671         rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
 672         for row in rows:
 673             try:
 674                 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
 675                 if count == 0:
 676                     os.remove(row[0])
 677             except:
 678                 pass
 679         self.removeEntry(id)
 680
 681 class Listing:
 682     def _getdb(self):
 683         try:
 684             db = self.tls.db
 685         except AttributeError:
 686             db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
 687             self.tls.db = db
 688         return db
 689     db = property(_getdb)
 690
 691     # Lists all the feeds in a dictionary, and expose the data
 692     def __init__(self, config, configdir):
 693         self.config = config
 694         self.configdir = configdir
 695
 696         self.tls = threading.local ()
 697
 698         try:
 699             table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
 700             if table == None:
 701                 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
 702                 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
 703                 self.addCategory("Default Category")
 704                 if isfile(self.configdir+"feeds.pickle"):
 705                     self.importOldFormatFeeds()
 706                 else:
 707                     self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
 708             else:
 709                 from string import find, upper
 710                 if find(upper(table[0]), "WIDGET")<0:
 711                     self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
 712                     self.db.execute("UPDATE feeds SET widget=1;")
 713                     self.db.commit()
 714                 if find(upper(table[0]), "CATEGORY")<0:
 715                     self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
 716                     self.addCategory("Default Category")
 717                     self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
 718                     self.db.execute("UPDATE feeds SET category=1;")
 719             self.db.commit()
 720         except:
 721             pass
 722
 723         # Check that Woodchuck's state is up to date with respect our
 724         # state.
 725         wc_init (self)
 726         if wc().available():
 727             # The list of known streams.
 728             streams = wc().streams_list ()
 729             stream_ids = [s.identifier for s in streams]
 730
 731             # Register any unknown streams.  Remove known streams from
 732             # STREAMS_IDS.
 733             for key in self.getListOfFeeds():
 734                 title = self.getFeedTitle(key)
 735                 # XXX: We should also check whether the list of
 736                 # articles/objects in each feed/stream is up to date.
 737                 if key not in stream_ids:
 738                     logger.debug(
 739                         "Registering previously unknown channel: %s (%s)"
 740                         % (key, title,))
 741                     # Use a default refresh interval of 6 hours.
 742                     wc().stream_register (key, title, 6 * 60 * 60)
 743                 else:
 744                     # Make sure the human readable name is up to date.
 745                     if wc()[key].human_readable_name != title:
 746                         wc()[key].human_readable_name = title
 747                     stream_ids.remove (key)
 748
 749
 750             # Unregister any streams that are no longer subscribed to.
 751             for id in stream_ids:
 752                 logger.debug("Unregistering %s" % (id,))
 753                 w.stream_unregister (id)
 754
 755     def importOldFormatFeeds(self):
 756         """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
 757         import rss
 758         listing = rss.Listing(self.configdir)
 759         rank = 0
 760         for id in listing.getListOfFeeds():
 761             try:
 762                 rank += 1
 763                 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
 764                 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
 765                 self.db.commit()
 766
 767                 feed = listing.getFeed(id)
 768                 new_feed = self.getFeed(id)
 769
 770                 items = feed.getIds()[:]
 771                 items.reverse()
 772                 for item in items:
 773                         if feed.isEntryRead(item):
 774                             read_status = 1
 775                         else:
 776                             read_status = 0
 777                         date = timegm(feed.getDateTuple(item))
 778                         title = feed.getTitle(item)
 779                         newId = new_feed.generateUniqueId({"date":date, "title":title})
 780                         values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
 781                         new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
 782                         new_feed.db.commit()
 783                         try:
 784                             images = feed.getImages(item)
 785                             for image in images:
 786                                 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
 787                                 new_feed.db.commit()
 788                         except:
 789                             pass
 790                 self.updateUnread(id)
 791             except:
 792                 logger.error("importOldFormatFeeds: %s"
 793                              % (traceback.format_exc(),))
 794         remove(self.configdir+"feeds.pickle")
 795
 796
 797     def addArchivedArticle(self, key, index):
 798         feed = self.getFeed(key)
 799         title = feed.getTitle(index)
 800         link = feed.getExternalLink(index)
 801         date = feed.getDate(index)
 802         count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
 803         if count == 0:
 804             self.addFeed("Archived Articles", "", id="ArchivedArticles")
 805
 806         archFeed = self.getFeed("ArchivedArticles")
 807         archFeed.addArchivedArticle(title, link, date, self.configdir)
 808         self.updateUnread("ArchivedArticles")
 809
 810     def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
 811                    priority=0):
 812         if expiryTime is None:
 813             expiryTime = self.config.getExpiry()
 814         if not expiryTime:
 815             # Default to 24 hours
 816             expriyTime = 24
 817         if proxy is None:
 818             (use_proxy, proxy) = self.config.getProxy()
 819             if not use_proxy:
 820                 proxy = None
 821         if imageCache is None:
 822             imageCache = self.config.getImageCache()
 823
 824         feed = self.getFeed(key)
 825         (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
 826         try:
 827             modified = time.struct_time(eval(modified))
 828         except:
 829             modified = None
 830         feed.updateFeed(
 831             self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
 832             priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
 833
 834     def _queuePostFeedUpdate(self, *args, **kwargs):
 835         mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
 836
 837     def _postFeedUpdate(self, key, updateTime, etag, modified, title):
 838         if modified==None:
 839             modified="None"
 840         else:
 841             modified=str(tuple(modified))
 842         if updateTime > 0:
 843             self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
 844         else:
 845             self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
 846
 847         if title is not None:
 848             self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
 849                             (title, key))
 850         self.db.commit()
 851         self.updateUnread(key)
 852
 853     def getFeed(self, key):
 854         if key == "ArchivedArticles":
 855             return ArchivedArticles(self.configdir, key)
 856         return Feed(self.configdir, key)
 857
 858     def editFeed(self, key, title, url, category=None):
 859         if category:
 860             self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
 861         else:
 862             self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
 863         self.db.commit()
 864
 865         if wc().available():
 866             try:
 867                 wc()[key].human_readable_name = title
 868             except KeyError:
 869                 logger.debug("Feed %s (%s) unknown." % (key, title))
 870
 871     def getFeedUpdateTime(self, key):
 872         return time.ctime(self.db.execute("SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0])
 873
 874     def getFeedNumberOfUnreadItems(self, key):
 875         return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
 876
 877     def getFeedTitle(self, key):
 878         (title, url) = self.db.execute("SELECT title, url FROM feeds WHERE id=?;", (key,)).fetchone()
 879         if title:
 880             return title
 881         return url
 882
 883     def getFeedUrl(self, key):
 884         return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
 885
 886     def getFeedCategory(self, key):
 887         return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
 888
 889     def getListOfFeeds(self, category=None):
 890         if category:
 891             rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
 892         else:
 893             rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
 894         keys = []
 895         for row in rows:
 896             if row[0]:
 897                 keys.append(row[0])
 898         return keys
 899
 900     def getListOfCategories(self):
 901         rows = self.db.execute("SELECT id FROM categories ORDER BY rank;" )
 902         keys = []
 903         for row in rows:
 904             if row[0]:
 905                 keys.append(row[0])
 906         return keys
 907
 908     def getCategoryTitle(self, id):
 909         row = self.db.execute("SELECT title FROM categories WHERE id=?;", (id, )).fetchone()
 910         return row[0]
 911
 912     def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
 913         if   order == "Most unread":
 914             tmp = "ORDER BY unread DESC"
 915             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
 916         elif order == "Least unread":
 917             tmp = "ORDER BY unread"
 918             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
 919         elif order == "Most recent":
 920             tmp = "ORDER BY updateTime DESC"
 921             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
 922         elif order == "Least recent":
 923             tmp = "ORDER BY updateTime"
 924             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
 925         else: # order == "Manual" or invalid value...
 926             tmp = "ORDER BY rank"
 927             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
 928         if onlyUnread:
 929             sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp
 930         else:
 931             sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
 932         rows = self.db.execute(sql)
 933         keys = []
 934         for row in rows:
 935             if row[0]:
 936                 keys.append(row[0])
 937         return keys
 938
 939     def getFavicon(self, key):
 940         filename = "%s%s.d/favicon.ico" % (self.configdir, key)
 941         if isfile(filename):
 942             return filename
 943         else:
 944             return False
 945
 946     def updateUnread(self, key):
 947         feed = self.getFeed(key)
 948         self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
 949         self.db.commit()
 950
 951     def addFeed(self, title, url, id=None, category=1):
 952         if not id:
 953             id = getId(url)
 954         count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
 955         if count == 0:
 956             max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
 957             if max_rank == None:
 958                 max_rank = 0
 959             values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
 960             self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
 961             self.db.commit()
 962             # Ask for the feed object, it will create the necessary tables
 963             self.getFeed(id)
 964
 965             if wc().available():
 966                 # Register the stream with Woodchuck.  Update approximately
 967                 # every 6 hours.
 968                 wc().stream_register(stream_identifier=id,
 969                                      human_readable_name=title,
 970                                      freshness=6*60*60)
 971
 972             return True
 973         else:
 974             return False
 975
 976     def addCategory(self, title):
 977         rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
 978         if rank==None:
 979             rank=1
 980         id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
 981         if id==None:
 982             id=1
 983         self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
 984         self.db.commit()
 985
 986     def removeFeed(self, key):
 987         if wc().available ():
 988             try:
 989                 del wc()[key]
 990             except KeyError:
 991                 logger.debug("Removing unregistered feed %s failed" % (key,))
 992
 993         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
 994         self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
 995         self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
 996         self.db.commit()
 997
 998         if isdir(self.configdir+key+".d/"):
 999            rmtree(self.configdir+key+".d/")
1000
1001     def removeCategory(self, key):
1002         if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
1003             rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
1004             self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
1005             self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
1006             self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
1007             self.db.commit()
1008
1009     #def saveConfig(self):
1010     #    self.listOfFeeds["feedingit-order"] = self.sortedKeys
1011     #    file = open(self.configdir+"feeds.pickle", "w")
1012     #    pickle.dump(self.listOfFeeds, file)
1013     #    file.close()
1014
1015     def moveUp(self, key):
1016         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1017         if rank>0:
1018             self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
1019             self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
1020             self.db.commit()
1021
1022     def moveCategoryUp(self, key):
1023         rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1024         if rank>0:
1025             self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
1026             self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
1027             self.db.commit()
1028
1029     def moveDown(self, key):
1030         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1031         max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1032         if rank<max_rank:
1033             self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
1034             self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
1035             self.db.commit()
1036
1037     def moveCategoryDown(self, key):
1038         rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1039         max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
1040         if rank<max_rank:
1041             self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
1042             self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )
1043             self.db.commit()
1044
1045