1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # ============================================================================
21 # Author : Yves Marcoz
23 # Description : Simple RSS Reader
24 # ============================================================================
27 from os.path import isfile, isdir
28 from shutil import rmtree
29 from os import mkdir, remove, utime
34 from BeautifulSoup import BeautifulSoup
35 from urlparse import urljoin
36 from calendar import timegm
39 return md5.new(string).hexdigest()
42 def __init__(self, configdir, key):
44 self.configdir = configdir
45 self.dir = "%s/%s.d" %(self.configdir, self.key)
46 if not isdir(self.dir):
48 if not isfile("%s/%s.db" %(self.dir, self.key)):
49 self.db = sqlite3.connect("%s/%s.db" %(self.dir, self.key) )
50 self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
51 self.db.execute("CREATE TABLE images (id text, imagePath text);")
54 self.db = sqlite3.connect("%s/%s.db" %(self.dir, self.key) )
56 def addImage(self, configdir, key, baseurl, url):
57 filename = configdir+key+".d/"+getId(url)
58 if not isfile(filename):
60 f = urllib2.urlopen(urljoin(baseurl,url))
61 outf = open(filename, "w")
66 print "Could not download " + url
68 #open(filename,"a").close() # "Touch" the file
69 file = open(filename,"a")
74 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
75 # Expiry time is in hours
77 tmp=feedparser.parse(url, etag = etag, modified = modified)
79 tmp=feedparser.parse(url, etag = etag, modified = modified, handlers = [proxy])
80 expiry = float(expiryTime) * 3600.
83 # Check if the parse was succesful (number of entries > 0, else do nothing)
84 if len(tmp["entries"])>0:
85 currentTime = time.time()
86 # The etag and modified value should only be updated if the content was not null
92 modified = tmp["modified"]
96 f = urllib2.urlopen(urljoin(tmp["feed"]["link"],"/favicon.ico"))
99 outf = open(self.dir+"/favicon.ico", "w")
105 #traceback.print_exc()
109 #reversedEntries = self.getEntries()
110 #reversedEntries.reverse()
114 tmp["entries"].reverse()
115 for entry in tmp["entries"]:
116 date = self.extractDate(entry)
120 entry["title"] = "No Title"
128 entry["author"] = None
129 if(not(entry.has_key("id"))):
131 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
132 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
133 id = self.generateUniqueId(tmpEntry)
135 #articleTime = time.mktime(self.entries[id]["dateTuple"])
137 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
139 baseurl = tmpEntry["link"]
143 filename = self.addImage(configdir, self.key, baseurl, img['src'])
145 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
148 traceback.print_exc()
149 print "Error downloading image %s" % img
150 tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
151 file = open(tmpEntry["contentLink"], "w")
152 file.write(soup.prettify())
154 values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
155 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
158 self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
160 filename = configdir+self.key+".d/"+id+".html"
161 file = open(filename,"a")
162 utime(filename, None)
164 images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
166 file = open(image[0],"a")
167 utime(image[0], None)
174 rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
176 self.removeEntry(row[0])
178 from glob import glob
180 for file in glob(configdir+self.key+".d/*"):
184 # put the two dates into matching format
186 lastmodDate = stats[8]
188 expDate = time.time()-expiry*3
189 # check if image-last-modified-date is outdated
191 if expDate > lastmodDate:
195 #print 'Removing', file
197 remove(file) # commented out for testing
201 print 'Could not remove', file
203 rows = self.db.execute("SELECT MAX(date) FROM feed;")
206 return (updateTime, etag, modified)
208 def setEntryRead(self, id):
209 self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
212 def setEntryUnread(self, id):
213 self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
216 def markAllAsRead(self):
217 self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
220 def isEntryRead(self, id):
221 read_status = self.db.execute("SELECT read FROM feed WHERE id=?;", (id,) ).fetchone()[0]
222 return read_status==1 # Returns True if read==1, and False if read==0
224 def getTitle(self, id):
225 return self.db.execute("SELECT title FROM feed WHERE id=?;", (id,) ).fetchone()[0]
227 def getContentLink(self, id):
228 return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
230 def getExternalLink(self, id):
231 return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
233 def getDate(self, id):
234 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
235 return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
237 def getDateTuple(self, id):
238 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
239 return time.localtime(dateStamp)
241 def getDateStamp(self, id):
242 return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
244 def generateUniqueId(self, entry):
245 if(entry["id"] != None):
246 return getId(str(entry["id"]))
248 return getId(str(entry["date"]) + str(entry["title"]))
250 def getIds(self, onlyUnread=False):
252 rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
254 rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
261 def getNextId(self, id):
263 index = ids.index(id)
264 return ids[(index+1)%len(ids)]
266 def getPreviousId(self, id):
268 index = ids.index(id)
269 return ids[(index-1)%len(ids)]
271 def getNumberOfUnreadItems(self):
272 return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
274 def getNumberOfEntries(self):
275 return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
277 def getArticle(self, entry):
278 #self.setEntryRead(id)
279 #entry = self.entries[id]
280 title = entry['title']
281 #content = entry.get('content', entry.get('summary_detail', {}))
282 content = entry["content"]
285 author = entry['author']
286 date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
288 #text = '''<div style="color: black; background-color: white;">'''
289 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
290 text += "<html><head><title>" + title + "</title>"
291 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
292 #text += '<style> body {-webkit-user-select: none;} </style>'
293 text += '</head><body background=\"white\"><div><a href=\"' + link + '\">' + title + "</a>"
295 text += "<BR /><small><i>Author: " + author + "</i></small>"
296 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
297 text += "<BR /><BR />"
299 text += "</body></html>"
302 def getContent(self, id):
303 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
305 file = open(self.entries[id]["contentLink"])
306 content = file.read()
309 content = "Content unavailable"
312 def extractDate(self, entry):
313 if entry.has_key("updated_parsed"):
314 return timegm(entry["updated_parsed"])
315 elif entry.has_key("published_parsed"):
316 return timegm(entry["published_parsed"])
320 def extractContent(self, entry):
322 if entry.has_key('summary'):
323 content = entry.get('summary', '')
324 if entry.has_key('content'):
325 if len(entry.content[0].value) > len(content):
326 content = entry.content[0].value
328 content = entry.get('description', '')
331 def removeEntry(self, id):
332 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
335 os.remove(contentLink)
337 print "File not found for deletion: %s" % contentLink
338 self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
339 self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
342 class ArchivedArticles(Feed):
343 def addArchivedArticle(self, title, link, date, configdir):
344 id = self.generateUniqueId({"date":date, "title":title})
345 values = (id, title, link, date, 0, link, 0)
346 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
349 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
351 rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
353 currentTime = time.time()
356 f = urllib2.urlopen(link)
357 #entry["content"] = f.read()
360 soup = BeautifulSoup(html)
364 filename = self.addImage(configdir, self.key, baseurl, img['src'])
366 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
367 contentLink = configdir+self.key+".d/"+id+".html"
368 file = open(contentLink, "w")
369 file.write(soup.prettify())
372 self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
374 return (currentTime, None, None)
376 def purgeReadArticles(self):
377 rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
380 self.removeArticle(row[0])
382 def removeArticle(self, id):
383 rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
386 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
394 # Lists all the feeds in a dictionary, and expose the data
395 def __init__(self, configdir):
396 self.configdir = configdir
398 self.db = sqlite3.connect("%s/feeds.db" % self.configdir)
401 table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
403 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
404 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
405 self.addCategory("Default Category")
406 if isfile(self.configdir+"feeds.pickle"):
407 self.importOldFormatFeeds()
409 self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
411 from string import find, upper
412 if find(upper(table[0]), "WIDGET")<0:
413 self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
414 self.db.execute("UPDATE feeds SET widget=1;")
416 if find(upper(table[0]), "CATEGORY")<0:
417 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
418 self.addCategory("Default Category")
419 self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
420 self.db.execute("UPDATE feeds SET category=1;")
425 def importOldFormatFeeds(self):
426 """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
428 listing = rss.Listing(self.configdir)
430 for id in listing.getListOfFeeds():
433 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
434 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
437 feed = listing.getFeed(id)
438 new_feed = self.getFeed(id)
440 items = feed.getIds()[:]
443 if feed.isEntryRead(item):
447 date = timegm(feed.getDateTuple(item))
448 title = feed.getTitle(item)
449 newId = new_feed.generateUniqueId({"date":date, "title":title})
450 values = (newId, title , feed.getContentLink(item), date, time.time(), feed.getExternalLink(item), read_status)
451 new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
454 images = feed.getImages(item)
456 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
460 self.updateUnread(id)
463 traceback.print_exc()
464 remove(self.configdir+"feeds.pickle")
467 def addArchivedArticle(self, key, index):
468 feed = self.getFeed(key)
469 title = feed.getTitle(index)
470 link = feed.getExternalLink(index)
471 date = feed.getDate(index)
472 count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
474 self.addFeed("Archived Articles", "", id="ArchivedArticles")
476 archFeed = self.getFeed("ArchivedArticles")
477 archFeed.addArchivedArticle(title, link, date, self.configdir)
478 self.updateUnread("ArchivedArticles")
480 def updateFeed(self, key, expiryTime=24, proxy=None, imageCache=False):
481 feed = self.getFeed(key)
482 db = sqlite3.connect("%s/feeds.db" % self.configdir)
483 (url, etag, modified) = db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
484 (updateTime, etag, modified) = feed.updateFeed(self.configdir, url, etag, eval(modified), expiryTime, proxy, imageCache)
486 db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, str(modified), key) )
488 db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, str(modified), key) )
490 self.updateUnread(key, db=db)
492 def getFeed(self, key):
493 if key == "ArchivedArticles":
494 return ArchivedArticles(self.configdir, key)
495 return Feed(self.configdir, key)
497 def editFeed(self, key, title, url, category=None):
499 self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
501 self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
504 def getFeedUpdateTime(self, key):
505 return time.ctime(self.db.execute("SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0])
507 def getFeedNumberOfUnreadItems(self, key):
508 return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
510 def getFeedTitle(self, key):
511 return self.db.execute("SELECT title FROM feeds WHERE id=?;", (key,)).fetchone()[0]
513 def getFeedUrl(self, key):
514 return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
516 def getFeedCategory(self, key):
517 return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
519 def getListOfFeeds(self, category=None):
521 rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
523 rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
530 def getListOfCategories(self):
531 rows = self.db.execute("SELECT id FROM categories ORDER BY rank;" )
538 def getCategoryTitle(self, id):
539 row = self.db.execute("SELECT title FROM categories WHERE id=?;", (id, )).fetchone()
542 def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
543 if order == "Most unread":
544 tmp = "ORDER BY unread DESC"
545 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
546 elif order == "Least unread":
547 tmp = "ORDER BY unread"
548 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
549 elif order == "Most recent":
550 tmp = "ORDER BY updateTime DESC"
551 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
552 elif order == "Least recent":
553 tmp = "ORDER BY updateTime"
554 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
555 else: # order == "Manual" or invalid value...
556 tmp = "ORDER BY rank"
557 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
559 sql = "SELECT id FROM feeds WHERE unread>0 WHERE category=%s" %category + tmp
561 sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
562 rows = self.db.execute(sql)
569 def getFavicon(self, key):
570 filename = "%s%s.d/favicon.ico" % (self.configdir, key)
576 def updateUnread(self, key, db=None):
579 feed = self.getFeed(key)
580 db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
583 def addFeed(self, title, url, id=None, category=1):
586 count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
588 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
591 values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
592 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
594 # Ask for the feed object, it will create the necessary tables
600 def addCategory(self, title):
601 rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
604 id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
607 self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
610 def removeFeed(self, key):
611 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
612 self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
613 self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
616 if isdir(self.configdir+key+".d/"):
617 rmtree(self.configdir+key+".d/")
619 def removeCategory(self, key):
620 if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
621 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
622 self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
623 self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
624 self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
627 #def saveConfig(self):
628 # self.listOfFeeds["feedingit-order"] = self.sortedKeys
629 # file = open(self.configdir+"feeds.pickle", "w")
630 # pickle.dump(self.listOfFeeds, file)
633 def moveUp(self, key):
634 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
636 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
637 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
640 def moveCategoryUp(self, key):
641 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
643 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
644 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
647 def moveDown(self, key):
648 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
649 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
651 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
652 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
655 def moveCategoryDown(self, key):
656 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
657 max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
659 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
660 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )