From ba351fa3e084fa3bbaff0a7946a5402dccbdc6f9 Mon Sep 17 00:00:00 2001 From: Yves Date: Sun, 7 Mar 2010 21:14:32 -0800 Subject: [PATCH] 0.5.1-0: added image caching for webkit Archived Articles --- Makefile | 4 +- debian/changelog | 10 ++- debian/control | 3 +- src/FeedingIt.py | 9 ++- src/rss.py | 199 ++++++++++++++++++++++++++++++++++++++++++------------ 5 files changed, 174 insertions(+), 51 deletions(-) diff --git a/Makefile b/Makefile index e612da8..825a937 100644 --- a/Makefile +++ b/Makefile @@ -25,8 +25,8 @@ install: install data/64px.png ${DESTDIR}/usr/share/icons/hicolor/64x64/apps/feedingit.png install -d ${DESTDIR}/usr/share/dbus-1/services/ install src/feedingit.service ${DESTDIR}/usr/share/dbus-1/services/ - install -d ${DESTDIR}/etc/osso-backup/applications - install src/feedingit.conf /etc/osso-backup/applications/ + install -d ${DESTDIR}/etc/osso-backup/applications/ + install src/feedingit.conf ${DESTDIR}/etc/osso-backup/applications/ clean: rm src/*pyo diff --git a/debian/changelog b/debian/changelog index 139bece..bea1f2f 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,6 +1,12 @@ -feedingit (0.5.0-0) unstable; urgency=low +feedingit (0.5.1-0) unstable; urgency=low - * Refactored Feed and Listing object. Much smaller footprint, faster shutdown + * Added caching of images for Archived Articles + + -- Yves Sun, 07 Mar 2010 17:36:19 -0800 + +feedingit (0.5.0-1) unstable; urgency=low + + * Refactored Feed and Listing object. Much smaller memory footprint, faster shutdown -- Yves Wed, 03 Mar 2010 10:43:19 -0800 diff --git a/debian/control b/debian/control index 25e062e..af7030a 100644 --- a/debian/control +++ b/debian/control @@ -10,7 +10,8 @@ XSBC-Bugtracker: https://garage.maemo.org/tracker/?func=browse&group_id=1202&ati Package: feedingit Architecture: any Depends: ${shlibs:Depends}, ${misc:Depends}, python-gtkhtml2, python, python-hildon, - libgtkhtml2-0, python-dbus, python-osso, python-gconf + libgtkhtml2-0, python-dbus, python-osso, python-gconf, python-beautifulsoup +Recommends: python2.5-webkit Description: RSS Reader Its main features are: diff --git a/src/FeedingIt.py b/src/FeedingIt.py index 817520d..8231faf 100644 --- a/src/FeedingIt.py +++ b/src/FeedingIt.py @@ -379,6 +379,7 @@ class DisplayArticle(hildon.StackableWindow): self.view = webkit.WebView() #self.view.set_editable(False) else: + import gtkhtml2 self.view = gtkhtml2.View() self.document = gtkhtml2.Document() self.view.set_document(self.document) @@ -389,10 +390,13 @@ class DisplayArticle(hildon.StackableWindow): #self.gestureId = self.pannable_article.connect('horizontal-movement', self.gesture) if self.config.getWebkitSupport(): - self.view.load_string(self.text, "text/html", "utf-8", self.link) + if key=="ArchivedArticles": + self.view.open("file://" + self.link) + else: + self.view.load_html_string(self.text, self.link) # "text/html", "utf-8", self.link) self.view.set_zoom_level(float(config.getArtFontSize())/10.) else: - if not key == "1295627ef630df9d239abeb0ba631c3f": + if not key == "ArchivedArticles": # Do not download images if the feed is "Archived Articles" self.document.connect("request-url", self._signal_request_url) @@ -642,6 +646,7 @@ class DisplayFeed(hildon.StackableWindow): def onDownloadsDone(self, *widget): self.vbox.destroy() + self.feed = self.listing.getFeed(self.key) self.displayFeed() #self.feed.updateFeed() # self.clear() diff --git a/src/rss.py b/src/rss.py index fdf14fe..4fea77e 100644 --- a/src/rss.py +++ b/src/rss.py @@ -32,23 +32,50 @@ import md5 import feedparser import time import urllib2 +from BeautifulSoup import BeautifulSoup +from urlparse import urlparse #CONFIGDIR="/home/user/.feedingit/" def getId(string): return md5.new(string).hexdigest() -class Entry: - def __init__(self, title, content, date, link): - self.title = title - self.content = content - self.date = date - self.link = link - # Entry = {"title":XXX, "content":XXX, "date":XXX, "link":XXX, images = [] } +class ImageHandler: + def __init__(self, configdir): + self.configdir = configdir + self.images = {} + + def addImage(self, key, baseurl, url): + filename = self.configdir+key+".d/"+getId(url) + if not isfile(filename): + try: + if url.startswith("http"): + f = urllib2.urlopen(url) + else: + f = urllib2.urlopen(baseurl+"/"+url) + outf = open(filename, "w") + outf.write(f.read()) + f.close() + outf.close() + except: + print "Could not download" + url + if url in self.images: + self.images[url] += 1 + else: + self.images[url] = 1 + return "file://" + filename + + def removeImage(self, key, url): + filename = self.configdir+key+".d/"+getId(url) + self.images[url] -= 1 + if self.images[url] == 0: + os.remove(filename) + del self.images[url] + class Feed: - def __init__(self, name, url): + def __init__(self, uniqueId, name, url, imageHandler): self.titles = [] self.entries = {} self.ids = [] @@ -57,28 +84,30 @@ class Feed: self.url = url self.countUnread = 0 self.updateTime = "Never" + self.uniqueId = uniqueId + self.imageHandler = imageHandler def editFeed(self, url): self.url = url def saveFeed(self, configdir): - if not isdir(configdir+getId(self.name)+".d"): - mkdir(configdir+getId(self.name)+".d") - file = open(configdir+getId(self.name)+".d/feed", "w") + if not isdir(configdir+self.uniqueId+".d"): + mkdir(configdir+self.uniqueId+".d") + file = open(configdir+self.uniqueId+".d/feed", "w") pickle.dump(self, file ) file.close() self.saveUnread(configdir) def saveUnread(self, configdir): - if not isdir(configdir+getId(self.name)+".d"): - mkdir(configdir+getId(self.name)+".d") - file = open(configdir+getId(self.name)+".d/unread", "w") + if not isdir(configdir+self.uniqueId+".d"): + mkdir(configdir+self.uniqueId+".d") + file = open(configdir+self.uniqueId+".d/unread", "w") pickle.dump(self.readItems, file ) file.close() def reloadUnread(self, configdir): try: - file = open(configdir+getId(self.name)+".d/unread", "r") + file = open(configdir+self.uniqueId+".d/unread", "r") self.readItems = pickle.load( file ) file.close() self.countUnread = 0 @@ -110,9 +139,13 @@ class Feed: expiry = float(expiryTime) * 3600. articleTime = time.mktime(self.entries[entryId]["dateTuple"]) if currentTime - articleTime < expiry: - if not id in tmpIds: + if not entryId in tmpIds: tmpEntries[entryId] = self.entries[entryId] tmpIds.append(entryId) + else: + if (not self.isEntryRead(entryId)) and (currentTime - articleTime < 2*expiry): + tmpEntries[entryId] = self.entries[entryId] + tmpIds.append(entryId) self.entries = tmpEntries self.ids = tmpIds @@ -152,6 +185,7 @@ class Feed: else: date1= "" date = "" + #print date1, date return (date1, date) def setEntryRead(self, id): @@ -171,10 +205,15 @@ class Feed: return self.entries[id]["title"] def getLink(self, id): + if self.entries[id].has_key("contentLink"): + return self.entries[id]["contentLink"] return self.entries[id]["link"] def getDate(self, id): return self.entries[id]["date"] + + def getDateTuple(self, id): + return self.entries[id]["dateTuple"] def getUniqueId(self, index): return self.ids[index] @@ -210,8 +249,25 @@ class Feed: return [] def getContent(self, id): + if self.entries[id].has_key("contentLink"): + file = open(self.entries[id]["contentLink"]) + content = file.read() + file.close() + return content return self.entries[id]["content"] + def removeEntry(self, id): + entry = self.entries[id] + for img in entry["images"]: + self.imageHandler.removeImage(self.uniqueId, img) + if entry.has_key["contentLink"]: + os.remove(entry["contentLink"]) + self.entries.remove(id) + self.ids.remove(id) + if self.readItems[id]==False: + self.countUnread = self.countUnread - 1 + self.readItems.remove(id) + def getArticle(self, id): self.setEntryRead(id) entry = self.entries[id] @@ -226,48 +282,83 @@ class Feed: text = '' text += "" + title + "" text += '\n' - text += '' - text += '
' + title + "" + #text += '' + text += '
' + title + "" text += "
Date: " + date + "
" text += "

" text += content text += "" return text -class ArchivedArticles(Feed): +class ArchivedArticles(Feed): def addArchivedArticle(self, title, link, updated_parsed, configdir): entry = {} entry["title"] = title entry["link"] = link - entry["downloaded"] = False entry["summary"] = '' + title + "" entry["updated_parsed"] = updated_parsed entry["time"] = time.time() - self.entries.append(entry) - self.readItems[self.getUniqueId(len(self.entries)-1)] = False + #print entry + (dateTuple, date) = self.extractDate(entry) + tmpEntry = {"title":entry["title"], "content":self.extractContent(entry), + "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[], "downloaded":False, "time":entry["time"] } + id = self.generateUniqueId(tmpEntry) + self.entries[id] = tmpEntry + self.ids.append(id) + self.readItems[id] = False self.countUnread = self.countUnread + 1 self.saveFeed(configdir) - #print entry + self.saveUnread(configdir) def updateFeed(self, configdir, expiryTime=24): - index = 0 - for entry in self.getEntries(): + for id in self.getIds(): + entry = self.entries[id] if not entry["downloaded"]: - try: + #try: f = urllib2.urlopen(entry["link"]) - entry["summary"] = f.read() + #entry["content"] = f.read() + html = f.read() f.close() - if len(entry["summary"]) > 0: + soup = BeautifulSoup(html) + images = soup.body('img') + baseurl = ''.join(urlparse(entry["link"])[:-1]) + for img in images: + filename = self.imageHandler.addImage(self.uniqueId, baseurl, img['src']) + #filename = configdir+self.uniqueId+".d/"+getId(img['src']) + #if not isfile(filename): + # try: + # if img['src'].startswith("http"): + # f = urllib2.urlopen(img['src']) + # else: + # f = urllib2.urlopen(baseurl+"/"+img['src']) + # #print baseurl+"/"+img['src'] + # print filename + # outf = open(filename, "w") + # outf.write(f.read()) + # f.close() + # outf.close() + # except: + # print "Could not download" + img['src'] + img['src']=filename + entry["images"].append(filename) + entry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html" + file = open(entry["contentLink"], "w") + file.write(soup.prettify()) + file.close() + if len(entry["content"]) > 0: entry["downloaded"] = True entry["time"] = time.time() - self.setEntryUnread(index) - except: - pass + self.setEntryUnread(id) + #except: + # pass currentTime = time.time() expiry = float(expiryTime) * 3600 if currentTime - entry["time"] > expiry: - self.entries.remove(entry) - index += 1 + if self.isEntryRead(id): + self.removeEntry(id) + else: + if currentTime - entry["time"] > 2*expiry: + self.removeEntry(id) self.updateTime = time.asctime() self.saveFeed(configdir) @@ -288,6 +379,12 @@ class Listing: file.close() else: self.listOfFeeds = {getId("Slashdot"):{"title":"Slashdot", "url":"http://rss.slashdot.org/Slashdot/slashdot", "unread":0, "updateTime":"Never"}, } + if isfile(self.configdir+"images.pickle"): + file = open(self.configdir+"images.pickle") + self.imageHandler = pickle.load(file) + file.close() + else: + self.imageHandler = ImageHandler(self.configdir) if self.listOfFeeds.has_key("font"): del self.listOfFeeds["font"] if self.listOfFeeds.has_key("feedingit-order"): @@ -314,28 +411,39 @@ class Listing: #self.saveConfig() def addArchivedArticle(self, key, index): - title = self.getFeed(key).getTitle(index) - link = self.getFeed(key).getLink(index) - date = self.getFeed(key).getDate(index) - if not self.listOfFeeds.has_key(getId("Archived Articles")): - self.listOfFeeds[getId("Archived Articles")] = {"title":"Archived Articles", "url":""} - self.sortedKeys.append(getId("Archived Articles")) - self.feeds[getId("Archived Articles")] = ArchivedArticles("Archived Articles", "") + feed = self.getFeed(key) + title = feed.getTitle(index) + link = feed.getLink(index) + date = feed.getDateTuple(index) + if not self.listOfFeeds.has_key("ArchivedArticles"): + self.listOfFeeds["ArchivedArticles"] = {"title":"Archived Articles", "url":"", "unread":0, "updateTime":"Never"} + self.sortedKeys.append("ArchivedArticles") + #self.feeds["Archived Articles"] = ArchivedArticles("Archived Articles", "") self.saveConfig() - - self.getFeed(getId("Archived Articles")).addArchivedArticle(title, link, date, self.configdir) + archFeed = self.getFeed("ArchivedArticles") + archFeed.addArchivedArticle(title, link, date, self.configdir) + self.listOfFeeds[key]["unread"] = archFeed.getNumberOfUnreadItems() def loadFeed(self, key): if isfile(self.configdir+key+".d/feed"): file = open(self.configdir+key+".d/feed") feed = pickle.load(file) file.close() + try: + feed.uniqueId + feed.imageHandler + except AttributeError: + feed.uniqueId = getId(feed.name) + feed.imageHandler = self.imageHandler #feed.reloadUnread(self.configdir) else: #print key title = self.listOfFeeds[key]["title"] url = self.listOfFeeds[key]["url"] - feed = Feed(title, url) + if key == "ArchivedArticles": + feed = ArchivedArticles("ArchivedArticles", title, url, self.imageHandler) + else: + feed = Feed(getId(title), title, url, self.imageHandler) return feed def updateFeeds(self, expiryTime=24): @@ -405,7 +513,7 @@ class Listing: del self.listOfFeeds[key] self.sortedKeys.remove(key) #del self.feeds[key] - if isfile(self.configdir+key): + if isdir(self.configdir+key+".d/"): rmtree(self.configdir+key+".d/") self.saveConfig() @@ -414,6 +522,9 @@ class Listing: file = open(self.configdir+"feeds.pickle", "w") pickle.dump(self.listOfFeeds, file) file.close() + file = open(self.configdir+"images.pickle", "w") + pickle.dump(self.imageHandler, file) + file.close() def moveUp(self, key): index = self.sortedKeys.index(key) -- 1.7.9.5