1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # Copyright (c) 2011 Neal H. Walfield
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Lesser General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU Lesser General Public License for more details.
16 # You should have received a copy of the GNU Lesser General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 # ============================================================================
22 # Author : Yves Marcoz
24 # Description : Simple RSS Reader
25 # ============================================================================
28 from os.path import isfile, isdir
29 from shutil import rmtree
30 from os import mkdir, remove, utime
35 from BeautifulSoup import BeautifulSoup
36 from urlparse import urljoin
37 from calendar import timegm
41 return md5.new(string).hexdigest()
47 except AttributeError:
48 db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
53 def __init__(self, configdir, key):
55 self.configdir = configdir
56 self.dir = "%s/%s.d" %(self.configdir, self.key)
57 self.tls = threading.local ()
59 if not isdir(self.dir):
61 if not isfile("%s/%s.db" %(self.dir, self.key)):
62 self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
63 self.db.execute("CREATE TABLE images (id text, imagePath text);")
66 def addImage(self, configdir, key, baseurl, url):
67 filename = configdir+key+".d/"+getId(url)
68 if not isfile(filename):
70 f = urllib2.urlopen(urljoin(baseurl,url))
71 outf = open(filename, "w")
76 print "Could not download " + url
78 #open(filename,"a").close() # "Touch" the file
79 file = open(filename,"a")
84 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
85 # Expiry time is in hours
87 tmp=feedparser.parse(url, etag = etag, modified = modified)
89 tmp=feedparser.parse(url, etag = etag, modified = modified, handlers = [proxy])
90 expiry = float(expiryTime) * 3600.
93 # Check if the parse was succesful (number of entries > 0, else do nothing)
94 if len(tmp["entries"])>0:
95 currentTime = time.time()
96 # The etag and modified value should only be updated if the content was not null
102 modified = tmp["modified"]
106 f = urllib2.urlopen(urljoin(tmp["feed"]["link"],"/favicon.ico"))
109 outf = open(self.dir+"/favicon.ico", "w")
115 #traceback.print_exc()
119 #reversedEntries = self.getEntries()
120 #reversedEntries.reverse()
124 tmp["entries"].reverse()
125 for entry in tmp["entries"]:
126 date = self.extractDate(entry)
130 entry["title"] = "No Title"
138 entry["author"] = None
139 if(not(entry.has_key("id"))):
141 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
142 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
143 id = self.generateUniqueId(tmpEntry)
145 #articleTime = time.mktime(self.entries[id]["dateTuple"])
146 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
148 baseurl = tmpEntry["link"]
153 filename = self.addImage(configdir, self.key, baseurl, img['src'])
154 img['src']="file://%s" %filename
155 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
157 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
160 traceback.print_exc()
161 print "Error downloading image %s" % img
162 tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
163 file = open(tmpEntry["contentLink"], "w")
164 file.write(soup.prettify())
167 self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
170 values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
171 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
175 # self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
177 # filename = configdir+self.key+".d/"+id+".html"
178 # file = open(filename,"a")
179 # utime(filename, None)
181 # images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
182 # for image in images:
183 # file = open(image[0],"a")
184 # utime(image[0], None)
191 rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
193 self.removeEntry(row[0])
195 from glob import glob
197 for file in glob(configdir+self.key+".d/*"):
201 # put the two dates into matching format
203 lastmodDate = stats[8]
205 expDate = time.time()-expiry*3
206 # check if image-last-modified-date is outdated
208 if expDate > lastmodDate:
212 #print 'Removing', file
214 remove(file) # commented out for testing
218 print 'Could not remove', file
220 rows = self.db.execute("SELECT MAX(date) FROM feed;")
223 return (updateTime, etag, modified)
225 def setEntryRead(self, id):
226 self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
229 def setEntryUnread(self, id):
230 self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
233 def markAllAsRead(self):
234 self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
237 def isEntryRead(self, id):
238 read_status = self.db.execute("SELECT read FROM feed WHERE id=?;", (id,) ).fetchone()[0]
239 return read_status==1 # Returns True if read==1, and False if read==0
241 def getTitle(self, id):
242 return self.db.execute("SELECT title FROM feed WHERE id=?;", (id,) ).fetchone()[0]
244 def getContentLink(self, id):
245 return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
247 def getExternalLink(self, id):
248 return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
250 def getDate(self, id):
251 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
252 return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
254 def getDateTuple(self, id):
255 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
256 return time.localtime(dateStamp)
258 def getDateStamp(self, id):
259 return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
261 def generateUniqueId(self, entry):
262 if(entry["id"] != None):
263 return getId(str(entry["id"]))
266 return getId(str(entry["date"]) + str(entry["title"]))
268 #print entry["title"]
269 return getId(str(entry["date"]))
271 def getIds(self, onlyUnread=False):
273 rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
275 rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
282 def getNextId(self, id):
284 index = ids.index(id)
285 return ids[(index+1)%len(ids)]
287 def getPreviousId(self, id):
289 index = ids.index(id)
290 return ids[(index-1)%len(ids)]
292 def getNumberOfUnreadItems(self):
293 return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
295 def getNumberOfEntries(self):
296 return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
298 def getArticle(self, entry):
299 #self.setEntryRead(id)
300 #entry = self.entries[id]
301 title = entry['title']
302 #content = entry.get('content', entry.get('summary_detail', {}))
303 content = entry["content"]
306 author = entry['author']
307 date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
309 #text = '''<div style="color: black; background-color: white;">'''
310 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
311 text += "<html><head><title>" + title + "</title>"
312 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
313 #text += '<style> body {-webkit-user-select: none;} </style>'
314 text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
316 text += "<BR /><small><i>Author: " + author + "</i></small>"
317 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
318 text += "<BR /><BR />"
320 text += "</body></html>"
323 def getContent(self, id):
324 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
326 file = open(self.entries[id]["contentLink"])
327 content = file.read()
330 content = "Content unavailable"
333 def extractDate(self, entry):
334 if entry.has_key("updated_parsed"):
335 return timegm(entry["updated_parsed"])
336 elif entry.has_key("published_parsed"):
337 return timegm(entry["published_parsed"])
341 def extractContent(self, entry):
343 if entry.has_key('summary'):
344 content = entry.get('summary', '')
345 if entry.has_key('content'):
346 if len(entry.content[0].value) > len(content):
347 content = entry.content[0].value
349 content = entry.get('description', '')
352 def removeEntry(self, id):
353 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
357 except OSError, exception:
358 print "Deleting %s: %s" % (contentLink, str (exception))
359 self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
360 self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
363 class ArchivedArticles(Feed):
364 def addArchivedArticle(self, title, link, date, configdir):
365 id = self.generateUniqueId({"date":date, "title":title})
366 values = (id, title, link, date, 0, link, 0)
367 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
370 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
372 rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
374 currentTime = time.time()
377 f = urllib2.urlopen(link)
378 #entry["content"] = f.read()
381 soup = BeautifulSoup(html)
385 filename = self.addImage(configdir, self.key, baseurl, img['src'])
387 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
388 contentLink = configdir+self.key+".d/"+id+".html"
389 file = open(contentLink, "w")
390 file.write(soup.prettify())
393 self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
395 return (currentTime, None, None)
397 def purgeReadArticles(self):
398 rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
401 self.removeArticle(row[0])
403 def removeArticle(self, id):
404 rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
407 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
418 except AttributeError:
419 db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
422 db = property(_getdb)
424 # Lists all the feeds in a dictionary, and expose the data
425 def __init__(self, configdir):
426 self.configdir = configdir
428 self.tls = threading.local ()
431 table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
433 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
434 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
435 self.addCategory("Default Category")
436 if isfile(self.configdir+"feeds.pickle"):
437 self.importOldFormatFeeds()
439 self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
441 from string import find, upper
442 if find(upper(table[0]), "WIDGET")<0:
443 self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
444 self.db.execute("UPDATE feeds SET widget=1;")
446 if find(upper(table[0]), "CATEGORY")<0:
447 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
448 self.addCategory("Default Category")
449 self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
450 self.db.execute("UPDATE feeds SET category=1;")
455 def importOldFormatFeeds(self):
456 """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
458 listing = rss.Listing(self.configdir)
460 for id in listing.getListOfFeeds():
463 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
464 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
467 feed = listing.getFeed(id)
468 new_feed = self.getFeed(id)
470 items = feed.getIds()[:]
473 if feed.isEntryRead(item):
477 date = timegm(feed.getDateTuple(item))
478 title = feed.getTitle(item)
479 newId = new_feed.generateUniqueId({"date":date, "title":title})
480 values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
481 new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
484 images = feed.getImages(item)
486 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
490 self.updateUnread(id)
493 traceback.print_exc()
494 remove(self.configdir+"feeds.pickle")
497 def addArchivedArticle(self, key, index):
498 feed = self.getFeed(key)
499 title = feed.getTitle(index)
500 link = feed.getExternalLink(index)
501 date = feed.getDate(index)
502 count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
504 self.addFeed("Archived Articles", "", id="ArchivedArticles")
506 archFeed = self.getFeed("ArchivedArticles")
507 archFeed.addArchivedArticle(title, link, date, self.configdir)
508 self.updateUnread("ArchivedArticles")
510 def updateFeed(self, key, expiryTime=24, proxy=None, imageCache=False):
511 feed = self.getFeed(key)
512 (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
514 modified = time.struct_time(eval(modified))
517 (updateTime, etag, modified) = feed.updateFeed(self.configdir, url, etag, modified, expiryTime, proxy, imageCache)
521 modified=str(tuple(modified))
523 self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
525 self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
527 self.updateUnread(key)
529 def getFeed(self, key):
530 if key == "ArchivedArticles":
531 return ArchivedArticles(self.configdir, key)
532 return Feed(self.configdir, key)
534 def editFeed(self, key, title, url, category=None):
536 self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
538 self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
541 def getFeedUpdateTime(self, key):
542 return time.ctime(self.db.execute("SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0])
544 def getFeedNumberOfUnreadItems(self, key):
545 return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
547 def getFeedTitle(self, key):
548 return self.db.execute("SELECT title FROM feeds WHERE id=?;", (key,)).fetchone()[0]
550 def getFeedUrl(self, key):
551 return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
553 def getFeedCategory(self, key):
554 return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
556 def getListOfFeeds(self, category=None):
558 rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
560 rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
567 def getListOfCategories(self):
568 rows = self.db.execute("SELECT id FROM categories ORDER BY rank;" )
575 def getCategoryTitle(self, id):
576 row = self.db.execute("SELECT title FROM categories WHERE id=?;", (id, )).fetchone()
579 def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
580 if order == "Most unread":
581 tmp = "ORDER BY unread DESC"
582 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
583 elif order == "Least unread":
584 tmp = "ORDER BY unread"
585 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
586 elif order == "Most recent":
587 tmp = "ORDER BY updateTime DESC"
588 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
589 elif order == "Least recent":
590 tmp = "ORDER BY updateTime"
591 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
592 else: # order == "Manual" or invalid value...
593 tmp = "ORDER BY rank"
594 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
596 sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp
598 sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
599 rows = self.db.execute(sql)
606 def getFavicon(self, key):
607 filename = "%s%s.d/favicon.ico" % (self.configdir, key)
613 def updateUnread(self, key):
614 feed = self.getFeed(key)
615 self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
618 def addFeed(self, title, url, id=None, category=1):
621 count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
623 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
626 values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
627 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
629 # Ask for the feed object, it will create the necessary tables
635 def addCategory(self, title):
636 rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
639 id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
642 self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
645 def removeFeed(self, key):
646 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
647 self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
648 self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
651 if isdir(self.configdir+key+".d/"):
652 rmtree(self.configdir+key+".d/")
654 def removeCategory(self, key):
655 if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
656 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
657 self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
658 self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
659 self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
662 #def saveConfig(self):
663 # self.listOfFeeds["feedingit-order"] = self.sortedKeys
664 # file = open(self.configdir+"feeds.pickle", "w")
665 # pickle.dump(self.listOfFeeds, file)
668 def moveUp(self, key):
669 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
671 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
672 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
675 def moveCategoryUp(self, key):
676 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
678 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
679 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
682 def moveDown(self, key):
683 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
684 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
686 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
687 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
690 def moveCategoryDown(self, key):
691 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
692 max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
694 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
695 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )