Replace use of print with Python's logging infrastructure.
[feedingit] / src / rss_sqlite.py
1 #!/usr/bin/env python2.5
2
3
4 # Copyright (c) 2007-2008 INdT.
5 # Copyright (c) 2011 Neal H. Walfield
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Lesser General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10 #
11 #  This program is distributed in the hope that it will be useful,
12 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
13 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 #  GNU Lesser General Public License for more details.
15 #
16 #  You should have received a copy of the GNU Lesser General Public License
17 #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 #
19
20 # ============================================================================
21 # Name        : FeedingIt.py
22 # Author      : Yves Marcoz
23 # Version     : 0.5.4
24 # Description : Simple RSS Reader
25 # ============================================================================
26
27 import sqlite3
28 from os.path import isfile, isdir
29 from shutil import rmtree
30 from os import mkdir, remove, utime
31 import os
32 import md5
33 import feedparser
34 import time
35 import urllib2
36 from BeautifulSoup import BeautifulSoup
37 from urlparse import urljoin
38 from calendar import timegm
39 from updatedbus import get_lock, release_lock
40 import threading
41 import traceback
42 from wc import wc, wc_init, woodchuck
43
44 from jobmanager import JobManager
45 import mainthread
46 from httpprogresshandler import HTTPProgressHandler
47 import random
48 import sys
49 import logging
50 logger = logging.getLogger(__name__)
51
52 def getId(string):
53     return md5.new(string).hexdigest()
54
55 def download_callback(connection):
56     if JobManager().do_quit:
57         raise KeyboardInterrupt
58
59 def downloader(progress_handler=None, proxy=None):
60     openers = []
61
62     if progress_handler:
63         openers.append (progress_handler)
64     else:
65         openers.append(HTTPProgressHandler(download_callback))
66
67     if proxy:
68         openers.append (proxy)
69
70     return urllib2.build_opener (*openers)
71
72 class Feed:
73     serial_execution_lock = threading.Lock()
74
75     def _getdb(self):
76         try:
77             db = self.tls.db
78         except AttributeError:
79             db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
80             self.tls.db = db
81         return db
82     db = property(_getdb)
83
84     def __init__(self, configdir, key):
85         self.key = key
86         self.configdir = configdir
87         self.dir = "%s/%s.d" %(self.configdir, self.key)
88         self.tls = threading.local ()
89
90         if not isdir(self.dir):
91             mkdir(self.dir)
92         if not isfile("%s/%s.db" %(self.dir, self.key)):
93             self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
94             self.db.execute("CREATE TABLE images (id text, imagePath text);")
95             self.db.commit()
96
97     def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
98         filename = configdir+key+".d/"+getId(url)
99         if not isfile(filename):
100             try:
101                 if not opener:
102                     opener = downloader(proxy=proxy)
103
104                 abs_url = urljoin(baseurl,url)
105                 f = opener.open(abs_url)
106                 outf = open(filename, "w")
107                 outf.write(f.read())
108                 f.close()
109                 outf.close()
110             except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
111                 logger.info("Could not download image %s: %s"
112                             % (abs_url, str (exception)))
113                 return None
114             except:
115                 exception = sys.exc_info()[0]
116
117                 logger.info("Downloading image %s: %s" %
118                             (abs_url, traceback.format_exc()))
119                 try:
120                     remove(filename)
121                 except OSError:
122                     pass
123
124                 raise exception
125         else:
126             #open(filename,"a").close()  # "Touch" the file
127             file = open(filename,"a")
128             utime(filename, None)
129             file.close()
130         return filename
131
132     def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
133         def doit():
134             def it():
135                 self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
136             return it
137         JobManager().execute(doit(), self.key, priority=priority)
138
139     def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
140         success = False
141         have_serial_execution_lock = False
142         try:
143             update_lock = None
144             update_lock = get_lock("key")
145             if not update_lock:
146                 # Someone else is doing an update.
147                 return
148
149             download_start = time.time ()
150
151             progress_handler = HTTPProgressHandler(download_callback)
152
153             openers = [progress_handler]
154             if proxy:
155                 openers.append (proxy)
156             kwargs = {'handlers':openers}
157             
158             tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
159             download_duration = time.time () - download_start
160     
161             opener = downloader(progress_handler, proxy)
162
163             if JobManager().do_quit:
164                 raise KeyboardInterrupt
165
166             process_start = time.time()
167
168             # Expiry time is in hours
169             expiry = float(expiryTime) * 3600.
170     
171             currentTime = 0
172     
173             have_woodchuck = mainthread.execute (wc().available)
174
175             def wc_success():
176                 try:
177                     wc().stream_register (self.key, "", 6 * 60 * 60)
178                 except woodchuck.ObjectExistsError:
179                     pass
180                 try:
181                     wc()[self.key].updated (
182                         indicator=(woodchuck.Indicator.ApplicationVisual
183                                    |woodchuck.Indicator.StreamWide),
184                         transferred_down=progress_handler.stats['received'],
185                         transferred_up=progress_handler.stats['sent'],
186                         transfer_time=download_start,
187                         transfer_duration=download_duration,
188                         new_objects=len (tmp.entries),
189                         objects_inline=len (tmp.entries))
190                 except KeyError:
191                     logger.warn(
192                         "Failed to register update of %s with woodchuck!"
193                         % (self.key))
194     
195             http_status = tmp.get ('status', 200)
196     
197             # Check if the parse was succesful.  If the http status code
198             # is 304, then the download was successful, but there is
199             # nothing new.  Indeed, no content is returned.  This make a
200             # 304 look like an error because there are no entries and the
201             # parse fails.  But really, everything went great!  Check for
202             # this first.
203             if http_status == 304:
204                 logger.debug("%s: No changes to feed." % (self.key,))
205                 mainthread.execute (wc_success, async=True)
206                 success = True
207             elif len(tmp["entries"])==0 and not tmp.version:
208                 # An error occured fetching or parsing the feed.  (Version
209                 # will be either None if e.g. the connection timed our or
210                 # '' if the data is not a proper feed)
211                 logger.error(
212                     "Error fetching %s: version is: %s: error: %s"
213                     % (url, str (tmp.version),
214                        str (tmp.get ('bozo_exception', 'Unknown error'))))
215                 logger.debug(tmp)
216                 if have_woodchuck:
217                     def e():
218                         logger.debug("%s: stream update failed!" % self.key)
219     
220                         try:
221                             # It's not easy to get the feed's title from here.
222                             # At the latest, the next time the application is
223                             # started, we'll fix up the human readable name.
224                             wc().stream_register (self.key, "", 6 * 60 * 60)
225                         except woodchuck.ObjectExistsError:
226                             pass
227                         ec = woodchuck.TransferStatus.TransientOther
228                         if 300 <= http_status and http_status < 400:
229                             ec = woodchuck.TransferStatus.TransientNetwork
230                         if 400 <= http_status and http_status < 500:
231                             ec = woodchuck.TransferStatus.FailureGone
232                         if 500 <= http_status and http_status < 600:
233                             ec = woodchuck.TransferStatus.TransientNetwork
234                         wc()[self.key].update_failed(ec)
235                     mainthread.execute (e, async=True)
236             else:
237                currentTime = time.time()
238                # The etag and modified value should only be updated if the content was not null
239                try:
240                    etag = tmp["etag"]
241                except KeyError:
242                    etag = None
243                try:
244                    modified = tmp["modified"]
245                except KeyError:
246                    modified = None
247                try:
248                    abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
249                    f = opener.open(abs_url)
250                    data = f.read()
251                    f.close()
252                    outf = open(self.dir+"/favicon.ico", "w")
253                    outf.write(data)
254                    outf.close()
255                    del data
256                except (urllib2.HTTPError, urllib2.URLError), exception:
257                    logger.debug("Could not download favicon %s: %s"
258                                 % (abs_url, str (exception)))
259     
260                self.serial_execution_lock.acquire ()
261                have_serial_execution_lock = True
262
263                #reversedEntries = self.getEntries()
264                #reversedEntries.reverse()
265     
266                ids = self.getIds()
267     
268                tmp["entries"].reverse()
269                for entry in tmp["entries"]:
270                    # Yield so as to make the main thread a bit more
271                    # responsive.
272                    time.sleep(0)
273     
274                    if JobManager().do_quit:
275                        raise KeyboardInterrupt
276
277                    received_base = progress_handler.stats['received']
278                    sent_base = progress_handler.stats['sent']
279                    object_size = 0
280
281                    date = self.extractDate(entry)
282                    try:
283                        entry["title"]
284                    except KeyError:
285                        entry["title"] = "No Title"
286                    try :
287                        entry["link"]
288                    except KeyError:
289                        entry["link"] = ""
290                    try:
291                        entry["author"]
292                    except KeyError:
293                        entry["author"] = None
294                    if(not(entry.has_key("id"))):
295                        entry["id"] = None
296                    content = self.extractContent(entry)
297                    object_size = len (content)
298                    received_base -= len (content)
299                    tmpEntry = {"title":entry["title"], "content":content,
300                                 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
301                    id = self.generateUniqueId(tmpEntry)
302                    
303                    #articleTime = time.mktime(self.entries[id]["dateTuple"])
304                    soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
305                    images = soup('img')
306                    baseurl = tmpEntry["link"]
307                    #if not id in ids:
308                    if imageCache and len(images) > 0:
309                        self.serial_execution_lock.release ()
310                        have_serial_execution_lock = False
311                        for img in images:
312                             filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
313                             if filename:
314                                 img['src']="file://%s" %filename
315                                 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
316                                 if count == 0:
317                                     self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
318                                     self.db.commit()
319     
320                                 try:
321                                     object_size += os.path.getsize (filename)
322                                 except os.error, exception:
323                                     logger.error ("Error getting size of %s: %s"
324                                                   % (filename, exception))
325                        self.serial_execution_lock.acquire ()
326                        have_serial_execution_lock = True
327     
328                    tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
329                    file = open(tmpEntry["contentLink"], "w")
330                    file.write(soup.prettify())
331                    file.close()
332                    if id in ids:
333                        self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
334                        self.db.commit()
335                    else:
336                        values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
337                        self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
338                        self.db.commit()
339 #                   else:
340 #                       try:
341 #                           self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
342 #                           self.db.commit()
343 #                           filename = configdir+self.key+".d/"+id+".html"
344 #                           file = open(filename,"a")
345 #                           utime(filename, None)
346 #                           file.close()
347 #                           images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
348 #                           for image in images:
349 #                                file = open(image[0],"a")
350 #                                utime(image[0], None)
351 #                                file.close()
352 #                       except:
353 #                           pass
354     
355                    # Register the object with Woodchuck and mark it as
356                    # downloaded.
357                    if have_woodchuck:
358                        def e():
359                            try:
360                                obj = wc()[self.key].object_register(
361                                    object_identifier=id,
362                                    human_readable_name=tmpEntry["title"])
363                            except woodchuck.ObjectExistsError:
364                                obj = wc()[self.key][id]
365                            else:
366                                # If the entry does not contain a publication
367                                # time, the attribute won't exist.
368                                pubtime = entry.get ('date_parsed', None)
369                                if pubtime:
370                                    obj.publication_time = time.mktime (pubtime)
371         
372                                received = (progress_handler.stats['received']
373                                            - received_base)
374                                sent = progress_handler.stats['sent'] - sent_base
375                                obj.transferred (
376                                    indicator=(woodchuck.Indicator.ApplicationVisual
377                                               |woodchuck.Indicator.StreamWide),
378                                    transferred_down=received,
379                                    transferred_up=sent,
380                                    object_size=object_size)
381                        mainthread.execute(e, async=True)
382                self.db.commit()
383
384                logger.debug (
385                    "%s: Update successful: transferred: %d/%d; objects: %d)"
386                    % (self.key,
387                       progress_handler.stats['sent'],
388                       progress_handler.stats['received'],
389                       len (tmp.entries)))
390                mainthread.execute (wc_success, async=True)
391                success = True
392
393             rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
394             for row in rows:
395                self.removeEntry(row[0])
396             
397             from glob import glob
398             from os import stat
399             for file in glob(configdir+self.key+".d/*"):
400                 #
401                 stats = stat(file)
402                 #
403                 # put the two dates into matching format
404                 #
405                 lastmodDate = stats[8]
406                 #
407                 expDate = time.time()-expiry*3
408                 # check if image-last-modified-date is outdated
409                 #
410                 if expDate > lastmodDate:
411                     #
412                     try:
413                         #
414                         #print 'Removing', file
415                         #
416                         # XXX: Tell woodchuck.
417                         remove(file) # commented out for testing
418                         #
419                     except OSError, exception:
420                         #
421                         logger.error('Could not remove %s: %s'
422                                      % (file, str (exception)))
423             logger.debug("updated %s: %fs in download, %fs in processing"
424                          % (self.key, download_duration,
425                             time.time () - process_start))
426         except:
427             logger.error("Updating %s: %s" % (self.key, traceback.format_exc()))
428         finally:
429             self.db.commit ()
430
431             if have_serial_execution_lock:
432                 self.serial_execution_lock.release ()
433
434             if update_lock is not None:
435                 release_lock (update_lock)
436
437             updateTime = 0
438             try:
439                 rows = self.db.execute("SELECT MAX(date) FROM feed;")
440                 for row in rows:
441                     updateTime=row[0]
442             except Exception, e:
443                 logger.error("Fetching update time: %s: %s"
444                              % (str(e), traceback.format_exc()))
445             finally:
446                 if not success:
447                     etag = None
448                     modified = None
449                 title = None
450                 try:
451                     title = tmp.feed.title
452                 except (AttributeError, UnboundLocalError), exception:
453                     pass
454                 if postFeedUpdateFunc is not None:
455                     postFeedUpdateFunc (self.key, updateTime, etag, modified,
456                                         title, *postFeedUpdateFuncArgs)
457
458     def setEntryRead(self, id):
459         self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
460         self.db.commit()
461
462         def e():
463             if wc().available():
464                 try:
465                     wc()[self.key][id].used()
466                 except KeyError:
467                     pass
468
469     def setEntryUnread(self, id):
470         self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
471         self.db.commit()     
472         
473     def markAllAsRead(self):
474         self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
475         self.db.commit()
476
477     def isEntryRead(self, id):
478         read_status = self.db.execute("SELECT read FROM feed WHERE id=?;", (id,) ).fetchone()[0]
479         return read_status==1  # Returns True if read==1, and False if read==0
480     
481     def getTitle(self, id):
482         return self.db.execute("SELECT title FROM feed WHERE id=?;", (id,) ).fetchone()[0]
483     
484     def getContentLink(self, id):
485         return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
486     
487     def getExternalLink(self, id):
488         return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
489     
490     def getDate(self, id):
491         dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
492         return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
493
494     def getDateTuple(self, id):
495         dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
496         return time.localtime(dateStamp)
497     
498     def getDateStamp(self, id):
499         return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
500     
501     def generateUniqueId(self, entry):
502         """
503         Generate a stable identifier for the article.  For the same
504         entry, this should result in the same identifier.  If
505         possible, the identifier should remain the same even if the
506         article is updated.
507         """
508         # Prefer the entry's id, which is supposed to be globally
509         # unique.
510         key = entry.get('id', None)
511         if not key:
512             # Next, try the link to the content.
513             key = entry.get('link', None)
514         if not key:
515             # Ok, the title and the date concatenated are likely to be
516             # relatively stable.
517             key = entry.get('title', None) + entry.get('date', None)
518         if not key:
519             # Hmm, the article's content will at least guarantee no
520             # false negatives (i.e., missing articles)
521             key = entry.get('content', None)
522         if not key:
523             # If all else fails, just use a random number.
524             key = str (random.random ())
525         return getId (key)
526     
527     def getIds(self, onlyUnread=False):
528         if onlyUnread:
529             rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
530         else:
531             rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
532         ids = []
533         for row in rows:
534             ids.append(row[0])
535         #ids.reverse()
536         return ids
537     
538     def getNextId(self, id):
539         ids = self.getIds()
540         index = ids.index(id)
541         return ids[(index+1)%len(ids)]
542         
543     def getPreviousId(self, id):
544         ids = self.getIds()
545         index = ids.index(id)
546         return ids[(index-1)%len(ids)]
547     
548     def getNumberOfUnreadItems(self):
549         return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
550     
551     def getNumberOfEntries(self):
552         return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
553
554     def getArticle(self, entry):
555         #self.setEntryRead(id)
556         #entry = self.entries[id]
557         title = entry['title']
558         #content = entry.get('content', entry.get('summary_detail', {}))
559         content = entry["content"]
560
561         link = entry['link']
562         author = entry['author']
563         date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
564
565         #text = '''<div style="color: black; background-color: white;">'''
566         text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
567         text += "<html><head><title>" + title + "</title>"
568         text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
569         #text += '<style> body {-webkit-user-select: none;} </style>'
570         text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
571         if author != None:
572             text += "<BR /><small><i>Author: " + author + "</i></small>"
573         text += "<BR /><small><i>Date: " + date + "</i></small></div>"
574         text += "<BR /><BR />"
575         text += content
576         text += "</body></html>"
577         return text
578    
579     def getContent(self, id):
580         contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
581         try:
582             file = open(self.entries[id]["contentLink"])
583             content = file.read()
584             file.close()
585         except:
586             content = "Content unavailable"
587         return content
588     
589     def extractDate(self, entry):
590         if entry.has_key("updated_parsed"):
591             return timegm(entry["updated_parsed"])
592         elif entry.has_key("published_parsed"):
593             return timegm(entry["published_parsed"])
594         else:
595             return time.time()
596         
597     def extractContent(self, entry):
598         content = ""
599         if entry.has_key('summary'):
600             content = entry.get('summary', '')
601         if entry.has_key('content'):
602             if len(entry.content[0].value) > len(content):
603                 content = entry.content[0].value
604         if content == "":
605             content = entry.get('description', '')
606         return content
607     
608     def removeEntry(self, id):
609         contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
610         if contentLink:
611             try:
612                 remove(contentLink)
613             except OSError, exception:
614                 logger.error("Deleting %s: %s" % (contentLink, str (exception)))
615         self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
616         self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
617         self.db.commit()
618
619         def e():
620             if wc().available():
621                 try:
622                     wc()[self.key][id].files_deleted (
623                         woodchuck.DeletionResponse.Deleted)
624                     del wc()[self.key][id]
625                 except KeyError:
626                     pass
627         mainthread.execute (e, async=True)
628  
629 class ArchivedArticles(Feed):    
630     def addArchivedArticle(self, title, link, date, configdir):
631         id = self.generateUniqueId({"date":date, "title":title})
632         values = (id, title, link, date, 0, link, 0)
633         self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
634         self.db.commit()
635
636     def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
637         currentTime = 0
638         rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
639         for row in rows:
640             currentTime = time.time()
641             id = row[0]
642             link = row[1]
643             f = urllib2.urlopen(link)
644             #entry["content"] = f.read()
645             html = f.read()
646             f.close()
647             soup = BeautifulSoup(html)
648             images = soup('img')
649             baseurl = link
650             for img in images:
651                 filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
652                 img['src']=filename
653                 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
654                 self.db.commit()
655             contentLink = configdir+self.key+".d/"+id+".html"
656             file = open(contentLink, "w")
657             file.write(soup.prettify())
658             file.close()
659             
660             self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
661             self.db.commit()
662         return (currentTime, None, None)
663     
664     def purgeReadArticles(self):
665         rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
666         #ids = self.getIds()
667         for row in rows:
668             self.removeArticle(row[0])
669
670     def removeArticle(self, id):
671         rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
672         for row in rows:
673             try:
674                 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
675                 if count == 0:
676                     os.remove(row[0])
677             except:
678                 pass
679         self.removeEntry(id)
680
681 class Listing:
682     def _getdb(self):
683         try:
684             db = self.tls.db
685         except AttributeError:
686             db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
687             self.tls.db = db
688         return db
689     db = property(_getdb)
690
691     # Lists all the feeds in a dictionary, and expose the data
692     def __init__(self, config, configdir):
693         self.config = config
694         self.configdir = configdir
695
696         self.tls = threading.local ()
697         
698         try:
699             table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
700             if table == None:
701                 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
702                 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
703                 self.addCategory("Default Category")
704                 if isfile(self.configdir+"feeds.pickle"):
705                     self.importOldFormatFeeds()
706                 else:
707                     self.addFeed("Maemo News", "http://maemo.org/news/items.xml")    
708             else:
709                 from string import find, upper
710                 if find(upper(table[0]), "WIDGET")<0:
711                     self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
712                     self.db.execute("UPDATE feeds SET widget=1;")
713                     self.db.commit()
714                 if find(upper(table[0]), "CATEGORY")<0:
715                     self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
716                     self.addCategory("Default Category")
717                     self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
718                     self.db.execute("UPDATE feeds SET category=1;")
719             self.db.commit()
720         except:
721             pass
722
723         # Check that Woodchuck's state is up to date with respect our
724         # state.
725         wc_init (self)
726         if wc().available():
727             # The list of known streams.
728             streams = wc().streams_list ()
729             stream_ids = [s.identifier for s in streams]
730
731             # Register any unknown streams.  Remove known streams from
732             # STREAMS_IDS.
733             for key in self.getListOfFeeds():
734                 title = self.getFeedTitle(key)
735                 # XXX: We should also check whether the list of
736                 # articles/objects in each feed/stream is up to date.
737                 if key not in stream_ids:
738                     logger.debug(
739                         "Registering previously unknown channel: %s (%s)"
740                         % (key, title,))
741                     # Use a default refresh interval of 6 hours.
742                     wc().stream_register (key, title, 6 * 60 * 60)
743                 else:
744                     # Make sure the human readable name is up to date.
745                     if wc()[key].human_readable_name != title:
746                         wc()[key].human_readable_name = title
747                     stream_ids.remove (key)
748                     
749
750             # Unregister any streams that are no longer subscribed to.
751             for id in stream_ids:
752                 logger.debug("Unregistering %s" % (id,))
753                 w.stream_unregister (id)
754
755     def importOldFormatFeeds(self):
756         """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
757         import rss
758         listing = rss.Listing(self.configdir)
759         rank = 0
760         for id in listing.getListOfFeeds():
761             try:
762                 rank += 1
763                 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
764                 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
765                 self.db.commit()
766                 
767                 feed = listing.getFeed(id)
768                 new_feed = self.getFeed(id)
769                 
770                 items = feed.getIds()[:]
771                 items.reverse()
772                 for item in items:
773                         if feed.isEntryRead(item):
774                             read_status = 1
775                         else:
776                             read_status = 0 
777                         date = timegm(feed.getDateTuple(item))
778                         title = feed.getTitle(item)
779                         newId = new_feed.generateUniqueId({"date":date, "title":title})
780                         values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
781                         new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
782                         new_feed.db.commit()
783                         try:
784                             images = feed.getImages(item)
785                             for image in images:
786                                 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
787                                 new_feed.db.commit()
788                         except:
789                             pass
790                 self.updateUnread(id)
791             except:
792                 logger.error("importOldFormatFeeds: %s"
793                              % (traceback.format_exc(),))
794         remove(self.configdir+"feeds.pickle")
795                 
796         
797     def addArchivedArticle(self, key, index):
798         feed = self.getFeed(key)
799         title = feed.getTitle(index)
800         link = feed.getExternalLink(index)
801         date = feed.getDate(index)
802         count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
803         if count == 0:
804             self.addFeed("Archived Articles", "", id="ArchivedArticles")
805
806         archFeed = self.getFeed("ArchivedArticles")
807         archFeed.addArchivedArticle(title, link, date, self.configdir)
808         self.updateUnread("ArchivedArticles")
809         
810     def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
811                    priority=0):
812         if expiryTime is None:
813             expiryTime = self.config.getExpiry()
814         if not expiryTime:
815             # Default to 24 hours
816             expriyTime = 24
817         if proxy is None:
818             (use_proxy, proxy) = self.config.getProxy()
819             if not use_proxy:
820                 proxy = None
821         if imageCache is None:
822             imageCache = self.config.getImageCache()
823
824         feed = self.getFeed(key)
825         (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
826         try:
827             modified = time.struct_time(eval(modified))
828         except:
829             modified = None
830         feed.updateFeed(
831             self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
832             priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
833
834     def _queuePostFeedUpdate(self, *args, **kwargs):
835         mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
836
837     def _postFeedUpdate(self, key, updateTime, etag, modified, title):
838         if modified==None:
839             modified="None"
840         else:
841             modified=str(tuple(modified))
842         if updateTime > 0:
843             self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
844         else:
845             self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
846
847         if title is not None:
848             self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
849                             (title, key))
850         self.db.commit()
851         self.updateUnread(key)
852         
853     def getFeed(self, key):
854         if key == "ArchivedArticles":
855             return ArchivedArticles(self.configdir, key)
856         return Feed(self.configdir, key)
857         
858     def editFeed(self, key, title, url, category=None):
859         if category:
860             self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
861         else:
862             self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
863         self.db.commit()
864
865         if wc().available():
866             try:
867                 wc()[key].human_readable_name = title
868             except KeyError:
869                 logger.debug("Feed %s (%s) unknown." % (key, title))
870         
871     def getFeedUpdateTime(self, key):
872         return time.ctime(self.db.execute("SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0])
873         
874     def getFeedNumberOfUnreadItems(self, key):
875         return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
876         
877     def getFeedTitle(self, key):
878         (title, url) = self.db.execute("SELECT title, url FROM feeds WHERE id=?;", (key,)).fetchone()
879         if title:
880             return title
881         return url
882         
883     def getFeedUrl(self, key):
884         return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
885     
886     def getFeedCategory(self, key):
887         return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
888         
889     def getListOfFeeds(self, category=None):
890         if category:
891             rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
892         else:
893             rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
894         keys = []
895         for row in rows:
896             if row[0]:
897                 keys.append(row[0])
898         return keys
899     
900     def getListOfCategories(self):
901         rows = self.db.execute("SELECT id FROM categories ORDER BY rank;" )
902         keys = []
903         for row in rows:
904             if row[0]:
905                 keys.append(row[0])
906         return keys
907     
908     def getCategoryTitle(self, id):
909         row = self.db.execute("SELECT title FROM categories WHERE id=?;", (id, )).fetchone()
910         return row[0]
911     
912     def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
913         if   order == "Most unread":
914             tmp = "ORDER BY unread DESC"
915             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
916         elif order == "Least unread":
917             tmp = "ORDER BY unread"
918             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
919         elif order == "Most recent":
920             tmp = "ORDER BY updateTime DESC"
921             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
922         elif order == "Least recent":
923             tmp = "ORDER BY updateTime"
924             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
925         else: # order == "Manual" or invalid value...
926             tmp = "ORDER BY rank"
927             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
928         if onlyUnread:
929             sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp 
930         else:
931             sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
932         rows = self.db.execute(sql)
933         keys = []
934         for row in rows:
935             if row[0]:
936                 keys.append(row[0])
937         return keys
938     
939     def getFavicon(self, key):
940         filename = "%s%s.d/favicon.ico" % (self.configdir, key)
941         if isfile(filename):
942             return filename
943         else:
944             return False
945         
946     def updateUnread(self, key):
947         feed = self.getFeed(key)
948         self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
949         self.db.commit()
950
951     def addFeed(self, title, url, id=None, category=1):
952         if not id:
953             id = getId(url)
954         count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
955         if count == 0:
956             max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
957             if max_rank == None:
958                 max_rank = 0
959             values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
960             self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
961             self.db.commit()
962             # Ask for the feed object, it will create the necessary tables
963             self.getFeed(id)
964
965             if wc().available():
966                 # Register the stream with Woodchuck.  Update approximately
967                 # every 6 hours.
968                 wc().stream_register(stream_identifier=id,
969                                      human_readable_name=title,
970                                      freshness=6*60*60)
971
972             return True
973         else:
974             return False
975         
976     def addCategory(self, title):
977         rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
978         if rank==None:
979             rank=1
980         id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
981         if id==None:
982             id=1
983         self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
984         self.db.commit()
985     
986     def removeFeed(self, key):
987         if wc().available ():
988             try:
989                 del wc()[key]
990             except KeyError:
991                 logger.debug("Removing unregistered feed %s failed" % (key,))
992
993         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
994         self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
995         self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
996         self.db.commit()
997
998         if isdir(self.configdir+key+".d/"):
999            rmtree(self.configdir+key+".d/")
1000            
1001     def removeCategory(self, key):
1002         if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
1003             rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
1004             self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
1005             self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
1006             self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
1007             self.db.commit()
1008         
1009     #def saveConfig(self):
1010     #    self.listOfFeeds["feedingit-order"] = self.sortedKeys
1011     #    file = open(self.configdir+"feeds.pickle", "w")
1012     #    pickle.dump(self.listOfFeeds, file)
1013     #    file.close()
1014         
1015     def moveUp(self, key):
1016         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1017         if rank>0:
1018             self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
1019             self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
1020             self.db.commit()
1021             
1022     def moveCategoryUp(self, key):
1023         rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1024         if rank>0:
1025             self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
1026             self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
1027             self.db.commit()
1028         
1029     def moveDown(self, key):
1030         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1031         max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1032         if rank<max_rank:
1033             self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
1034             self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
1035             self.db.commit()
1036             
1037     def moveCategoryDown(self, key):
1038         rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1039         max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
1040         if rank<max_rank:
1041             self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
1042             self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )
1043             self.db.commit()
1044             
1045