From 4413bfee9e19087a7baecc1307615bf118879f27 Mon Sep 17 00:00:00 2001 From: Yves Date: Sun, 21 Mar 2010 22:12:54 -0700 Subject: [PATCH] 0.5.3 - Image caching, first try --- debian/changelog | 8 ++- src/FeedingIt.py | 179 +++++++++++++++++++++++++++--------------------------- src/config.py | 32 ++++------ src/rss.py | 158 ++++++++++++++++++++++++++--------------------- 4 files changed, 200 insertions(+), 177 deletions(-) diff --git a/debian/changelog b/debian/changelog index b0fce5f..f6b9233 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,10 @@ -feedingit (0.5.2-3) unstable; urgency=low +feedingit (0.5.3-0) unstable; urgency=low + + * Enabled image caching for all articles + + -- Yves Wed, 21 Mar 2010 21:39:19 -0800 + +feedingit (0.5.2-4) unstable; urgency=low * Changed theme colour used diff --git a/src/FeedingIt.py b/src/FeedingIt.py index 2259fd1..761ee54 100644 --- a/src/FeedingIt.py +++ b/src/FeedingIt.py @@ -28,12 +28,12 @@ import feedparser import pango import hildon #import gtkhtml2 -try: - import webkit - has_webkit=True -except: - import gtkhtml2 - has_webkit=False +#try: +import webkit +# has_webkit=True +#except: +# import gtkhtml2 +# has_webkit=False import time import dbus import pickle @@ -56,7 +56,7 @@ timeout = 5 socket.setdefaulttimeout(timeout) color_style = gtk.rc_get_style_by_paths(gtk.settings_get_default() , 'GtkButton', 'osso-logical-colors', gtk.Button) -fg_colog = color_style.lookup_color('ActiveTextColor') +fg_color = color_style.lookup_color('ActiveTextColor') del color_style CONFIGDIR="/home/user/.feedingit/" @@ -120,42 +120,42 @@ class AddWidgetWizard(hildon.WizardDialog): else: return True -class GetImage(threading.Thread): - def __init__(self, url, stream): - threading.Thread.__init__(self) - self.url = url - self.stream = stream - - def run(self): - f = urllib2.urlopen(self.url) - data = f.read() - f.close() - self.stream.write(data) - self.stream.close() - -class ImageDownloader(): - def __init__(self): - self.images = [] - self.downloading = False - - def queueImage(self, url, stream): - self.images.append((url, stream)) - if not self.downloading: - self.downloading = True - gobject.timeout_add(50, self.checkQueue) - - def checkQueue(self): - for i in range(4-threading.activeCount()): - if len(self.images) > 0: - (url, stream) = self.images.pop() - GetImage(url, stream).start() - if len(self.images)>0: - gobject.timeout_add(200, self.checkQueue) - else: - self.downloading=False - - def stopAll(self): - self.images = [] +#class GetImage(threading.Thread): +# def __init__(self, url, stream): +# threading.Thread.__init__(self) +# self.url = url +# self.stream = stream +# +# def run(self): +# f = urllib2.urlopen(self.url) +# data = f.read() +# f.close() +# self.stream.write(data) +# self.stream.close() +# +#class ImageDownloader(): +# def __init__(self): +# self.images = [] +# self.downloading = False +# +# def queueImage(self, url, stream): +# self.images.append((url, stream)) +# if not self.downloading: +# self.downloading = True +# gobject.timeout_add(50, self.checkQueue) +# +# def checkQueue(self): +# for i in range(4-threading.activeCount()): +# if len(self.images) > 0: +# (url, stream) = self.images.pop() +# GetImage(url, stream).start() +# if len(self.images)>0: +# gobject.timeout_add(200, self.checkQueue) +# else: +# self.downloading=False +# +# def stopAll(self): +# self.images = [] class Download(threading.Thread): @@ -168,9 +168,9 @@ class Download(threading.Thread): def run (self): (use_proxy, proxy) = self.config.getProxy() if use_proxy: - self.listing.updateFeed(self.key, self.config.getExpiry(), proxy=proxy) + self.listing.updateFeed(self.key, self.config.getExpiry(), proxy=proxy, imageCache=self.config.getImageCache() ) else: - self.listing.updateFeed(self.key, self.config.getExpiry()) + self.listing.updateFeed(self.key, self.config.getExpiry(), imageCache=self.config.getImageCache() ) class DownloadBar(gtk.ProgressBar): @@ -370,48 +370,48 @@ class SortList(gtk.Dialog): class DisplayArticle(hildon.StackableWindow): - def __init__(self, title, text, link, index, key, listing, config): + def __init__(self, feed, id, key, config): hildon.StackableWindow.__init__(self) - self.imageDownloader = ImageDownloader() - self.listing=listing + #self.imageDownloader = ImageDownloader() + self.feed = feed + #self.listing=listing self.key = key - self.index = index - self.text = text - self.link = link - self.set_title(title) + self.id = id + self.set_title(feed.getTitle(id)) self.config = config - self.images = [] # Init the article display - if self.config.getWebkitSupport(): - self.view = webkit.WebView() + #if self.config.getWebkitSupport(): + self.view = webkit.WebView() #self.view.set_editable(False) - else: - import gtkhtml2 - self.view = gtkhtml2.View() - self.document = gtkhtml2.Document() - self.view.set_document(self.document) - self.document.connect("link_clicked", self._signal_link_clicked) + #else: + # import gtkhtml2 + # self.view = gtkhtml2.View() + # self.document = gtkhtml2.Document() + # self.view.set_document(self.document) + # self.document.connect("link_clicked", self._signal_link_clicked) self.pannable_article = hildon.PannableArea() self.pannable_article.add(self.view) #self.pannable_article.set_property("mov-mode", hildon.MOVEMENT_MODE_BOTH) #self.gestureId = self.pannable_article.connect('horizontal-movement', self.gesture) - if self.config.getWebkitSupport(): - if key=="ArchivedArticles": - self.view.open("file://" + self.link) - else: - self.view.load_html_string(self.text, self.link) # "text/html", "utf-8", self.link) - self.view.set_zoom_level(float(config.getArtFontSize())/10.) - else: - if not key == "ArchivedArticles": + #if self.config.getWebkitSupport(): + contentLink = self.feed.getContentLink(self.id) + self.feed.setEntryRead(self.id) + #if key=="ArchivedArticles": + self.view.open("file://" + contentLink) + #else: + #self.view.load_html_string(self.text, contentLink) # "text/html", "utf-8", self.link) + self.view.set_zoom_level(float(config.getArtFontSize())/10.) + #else: + # if not key == "ArchivedArticles": # Do not download images if the feed is "Archived Articles" - self.document.connect("request-url", self._signal_request_url) + # self.document.connect("request-url", self._signal_request_url) - self.document.clear() - self.document.open_stream("text/html") - self.document.write_stream(self.text) - self.document.close_stream() + # self.document.clear() + # self.document.open_stream("text/html") + # self.document.write_stream(self.text) + # self.document.close_stream() menu = hildon.AppMenu() # Create a button and add it to the menu @@ -422,7 +422,7 @@ class DisplayArticle(hildon.StackableWindow): button = hildon.GtkButton(gtk.HILDON_SIZE_AUTO) button.set_label("Open in Browser") - button.connect("clicked", self._signal_link_clicked, self.link) + button.connect("clicked", self._signal_link_clicked, self.feed.getExternalLink(self.id)) menu.append(button) button = hildon.GtkButton(gtk.HILDON_SIZE_AUTO) @@ -456,9 +456,9 @@ class DisplayArticle(hildon.StackableWindow): if (abs(y) < 30): if (x > 15): - self.emit("article-previous", self.index) + self.emit("article-previous", self.id) elif (x<-15): - self.emit("article-next", self.index) + self.emit("article-next", self.id) #print x, y #print "Released" @@ -472,8 +472,8 @@ class DisplayArticle(hildon.StackableWindow): def destroyWindow(self, *args): self.disconnect(self.destroyId) - self.emit("article-closed", self.index) - self.imageDownloader.stopAll() + self.emit("article-closed", self.id) + #self.imageDownloader.stopAll() self.destroy() def horiz_scrolling_button(self, *widget): @@ -482,7 +482,7 @@ class DisplayArticle(hildon.StackableWindow): def archive_button(self, *widget): # Call the listing.addArchivedArticle - self.listing.addArchivedArticle(self.key, self.index) + self.listing.addArchivedArticle(self.key, self.id) #def reloadArticle(self, *widget): # if threading.activeCount() > 1: @@ -502,9 +502,9 @@ class DisplayArticle(hildon.StackableWindow): iface = dbus.Interface(proxy, 'com.nokia.osso_browser') iface.load_url(link) - def _signal_request_url(self, object, url, stream): + #def _signal_request_url(self, object, url, stream): #print url - self.imageDownloader.queueImage(url, stream) + # self.imageDownloader.queueImage(url, stream) #imageThread = GetImage(url) #imageThread.start() #self.images.append((stream, imageThread)) @@ -588,7 +588,8 @@ class DisplayFeed(hildon.StackableWindow): self.remove(self.pannableFeed) def button_clicked(self, button, index, previous=False, next=False): - newDisp = DisplayArticle(self.feedTitle, self.feed.getArticle(index), self.feed.getLink(index), index, self.key, self.listing, self.config) + #newDisp = DisplayArticle(self.feedTitle, self.feed.getArticle(index), self.feed.getLink(index), index, self.key, self.listing, self.config) + newDisp = DisplayArticle(self.feed, index, self.key, self.config) stack = hildon.WindowStack.get_default() if previous: tmp = stack.peek() @@ -687,15 +688,15 @@ class FeedingIt: self.mainVbox.pack_start(self.pannableListing) self.window.add(self.mainVbox) self.window.show_all() - self.config = Config(self.window, CONFIGDIR+"config.ini", has_webkit) + self.config = Config(self.window, CONFIGDIR+"config.ini") gobject.idle_add(self.createWindow) def createWindow(self): self.listing = Listing(CONFIGDIR) self.downloadDialog = False - self.orientation = FremantleRotation("FeedingIt", main_window=self.window) - self.orientation.set_mode(self.config.getOrientation()) + #self.orientation = FremantleRotation("FeedingIt", main_window=self.window) + #self.orientation.set_mode(self.config.getOrientation()) menu = hildon.AppMenu() # Create a button and add it to the menu @@ -732,8 +733,8 @@ class FeedingIt: self.window.set_app_menu(menu) menu.show_all() - #self.feedWindow = hildon.StackableWindow() - #self.articleWindow = hildon.StackableWindow() + self.feedWindow = hildon.StackableWindow() + self.articleWindow = hildon.StackableWindow() self.displayListing() self.autoupdate = False @@ -745,6 +746,8 @@ class FeedingIt: feed = self.listing.getFeed(key) for id in feed.getIds(): feed.setEntryRead(id) + feed.saveUnread(CONFIGDIR) + self.listing.updateUnread(key, feed.getNumberOfUnreadItems()) self.refreshList() def button_export_clicked(self, button): diff --git a/src/config.py b/src/config.py index cb7aaa0..987b551 100644 --- a/src/config.py +++ b/src/config.py @@ -36,10 +36,9 @@ titles = {"updateInterval":"Auto-update Interval", "expiry":"Expiry For Articles subtitles = {"updateInterval":"Update every %s hours", "expiry":"Delete articles after %s hours", "fontSize":"%s pixels", "orientation":"%s", "artFontSize":"%s pixels"} class Config(): - def __init__(self, parent, configFilename, has_webkit): + def __init__(self, parent, configFilename): self.configFilename = configFilename self.parent = parent - self.has_webkit = has_webkit # Load config self.loadConfig() @@ -51,10 +50,7 @@ class Config(): vbox = gtk.VBox(False, 10) self.buttons = {} - if self.has_webkit: - settings = ["fontSize", "artFontSize", "expiry", "orientation", "updateInterval",] - else: - settings = ["fontSize", "expiry", "orientation", "updateInterval",] + settings = ["fontSize", "expiry", "orientation", "updateInterval",] for setting in settings: picker = hildon.PickerButton(gtk.HILDON_SIZE_FINGER_HEIGHT, hildon.BUTTON_ARRANGEMENT_VERTICAL) selector = self.create_selector(ranges[setting], setting) @@ -72,12 +68,11 @@ class Config(): button.connect("toggled", self.button_toggled, "autoupdate") vbox.pack_start(button, expand=False) - if self.has_webkit: - button = hildon.CheckButton(gtk.HILDON_SIZE_FINGER_HEIGHT) - button.set_label("Webkit Articles Enabled") - button.set_active(self.config["webkit"]) - button.connect("toggled", self.button_toggled, "webkit") - vbox.pack_start(button, expand=False) + button = hildon.CheckButton(gtk.HILDON_SIZE_FINGER_HEIGHT) + button.set_label("Image Caching Enabled") + button.set_active(self.config["imageCache"]) + button.connect("toggled", self.button_toggled, "imageCache") + vbox.pack_start(button, expand=False) panArea.add_with_viewport(vbox) @@ -121,7 +116,7 @@ class Config(): self.config["autoupdate"] = configParser.getboolean(section, "autoupdate") self.config["updateInterval"] = configParser.getfloat(section, "updateInterval") self.config["orientation"] = configParser.get(section, "orientation") - self.config["webkit"] = configParser.getboolean(section, "webkit") + self.config["imageCache"] = configParser.getboolean(section, "imageCache") except: self.config["fontSize"] = 17 self.config["artFontSize"] = 14 @@ -129,7 +124,7 @@ class Config(): self.config["autoupdate"] = False self.config["updateInterval"] = 4 self.config["orientation"] = "Automatic" - self.config["webkit"] = self.has_webkit + self.config["imageCache"] = False def saveConfig(self): configParser = ConfigParser.RawConfigParser() @@ -140,7 +135,7 @@ class Config(): configParser.set(section, 'autoupdate', str(self.config["autoupdate"])) configParser.set(section, 'updateInterval', str(self.config["updateInterval"])) configParser.set(section, 'orientation', str(self.config["orientation"])) - configParser.set(section, 'webkit', str(self.config["webkit"])) + configParser.set(section, 'imageCache', str(self.config["imageCache"])) # Writing our configuration file file = open(self.configFilename, 'wb') @@ -177,11 +172,8 @@ class Config(): return "sans bold %s" % self.config["fontSize"] def getOrientation(self): return ranges["orientation"].index(self.config["orientation"]) - def getWebkitSupport(self): - if self.has_webkit: - return self.config["webkit"] - else: - return False + def getImageCache(self): + return self.config["imageCache"] def getProxy(self): if gconf.client_get_default().get_bool('/system/http_proxy/use_http_proxy'): port = gconf.client_get_default().get_int('/system/http_proxy/port') diff --git a/src/rss.py b/src/rss.py index 0ceb236..53c3892 100644 --- a/src/rss.py +++ b/src/rss.py @@ -26,7 +26,7 @@ from os.path import isfile from os.path import isdir from shutil import rmtree -from os import mkdir +from os import mkdir, remove import pickle import md5 import feedparser @@ -61,45 +61,24 @@ class ImageHandler: outf.close() except: print "Could not download" + url - if url in self.images: - self.images[url] += 1 + if filename in self.images: + self.images[filename] += 1 else: - self.images[url] = 1 - return "file://" + filename + self.images[filename] = 1 + return filename - def removeImage(self, key, url): - filename = self.configdir+key+".d/"+getId(url) - self.images[url] -= 1 - if self.images[url] == 0: - os.remove(filename) - del self.images[url] - -class UnreadTracker: - def __init__(self): - self.readItems = {} - self.countUnread - - def setEntryUnread(self, id): - if self.readItems.has_key(id): - if self.readItems[id]==True: - self.countUnread = self.countUnread + 1 - self.readItems[id] = False - else: - self.readItems[id] = False - self.countUnread = self.countUnread + 1 - - def setEntryRead(self, id): - if self.readItems[id]==False: - self.countUnread = self.countUnread - 1 - self.readItems[id] = True - - def isRead(self, id): - return self.readItems[id] - - def removeEntry(self, id): - if self.readItems[id]==False: - self.countUnread = self.countUnread - 1 - del self.readItems[id] + def removeImage(self, key, filename): + #filename = self.configdir+key+".d/"+getId(url) + try: + self.images[filename] -= 1 + except: + self.images[filename] = 0 #Delete image + try: + if self.images[filename] == 0: + remove(filename) #os.remove + del self.images[filename] + except: + print "Could not remove image %s" % filename class Feed: def __init__(self, uniqueId, name, url, imageHandler): @@ -145,7 +124,7 @@ class Feed: pass return self.countUnread - def updateFeed(self, configdir, expiryTime=24, proxy=None): + def updateFeed(self, configdir, expiryTime=24, proxy=None, imageCache=False): # Expiry time is in hours if proxy == None: tmp=feedparser.parse(self.url) @@ -155,6 +134,8 @@ class Feed: if len(tmp["entries"])>0: #reversedEntries = self.getEntries() #reversedEntries.reverse() + if not isdir(configdir+self.uniqueId+".d"): + mkdir(configdir+self.uniqueId+".d") tmpEntries = {} tmpIds = [] for entry in tmp["entries"]: @@ -162,20 +143,41 @@ class Feed: tmpEntry = {"title":entry["title"], "content":self.extractContent(entry), "date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[] } id = self.generateUniqueId(tmpEntry) - tmpEntries[id] = tmpEntry - tmpIds.append(id) - for entryId in self.getIds(): - currentTime = time.time() - expiry = float(expiryTime) * 3600. - articleTime = time.mktime(self.entries[entryId]["dateTuple"]) - if currentTime - articleTime < expiry: - if not entryId in tmpIds: + if not id in self.ids: + + soup = BeautifulSoup(tmpEntry["content"]) + images = soup('img') + baseurl = ''.join(urlparse(tmpEntry["link"])[:-1]) + if imageCache: + for img in images: + filename = self.imageHandler.addImage(self.uniqueId, baseurl, img['src']) + img['src']=filename + tmpEntry["images"].append(filename) + tmpEntry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html" + file = open(tmpEntry["contentLink"], "w") + file.write(soup.prettify()) + file.close() + tmpEntries[id] = tmpEntry + tmpIds.append(id) + + for entryId in self.getIds()[:]: + currentTime = time.time() + expiry = float(expiryTime) * 3600. + try: + articleTime = time.mktime(self.entries[entryId]["dateTuple"]) + if currentTime - articleTime < expiry: tmpEntries[entryId] = self.entries[entryId] tmpIds.append(entryId) - else: - if (not self.isEntryRead(entryId)) and (currentTime - articleTime < 2*expiry): - tmpEntries[entryId] = self.entries[entryId] - tmpIds.append(entryId) + else: + if (not self.isEntryRead(entryId)) and (currentTime - articleTime < 2*expiry): + tmpEntries[entryId] = self.entries[entryId] + tmpIds.append(entryId) + else: + self.removeEntry(id) + except: + self.removeEntry(id) + print "Error purging old articles %s" % id + self.entries = tmpEntries self.ids = tmpIds @@ -234,11 +236,14 @@ class Feed: def getTitle(self, id): return self.entries[id]["title"] - def getLink(self, id): + def getContentLink(self, id): if self.entries[id].has_key("contentLink"): return self.entries[id]["contentLink"] return self.entries[id]["link"] + def getExternalLink(self, id): + return self.entries[id]["link"] + def getDate(self, id): return self.entries[id]["date"] @@ -287,16 +292,33 @@ class Feed: return self.entries[id]["content"] def removeEntry(self, id): - entry = self.entries[id] - for img in entry["images"]: - self.imageHandler.removeImage(self.uniqueId, img) - if entry.has_key["contentLink"]: - os.remove(entry["contentLink"]) - self.entries.remove(id) - self.ids.remove(id) - if self.readItems[id]==False: - self.countUnread = self.countUnread - 1 - self.readItems.remove(id) + #try: + if self.entries.has_key(id): + entry = self.entries[id] + if entry.has_key("images"): + for img in entry["images"]: + self.imageHandler.removeImage(self.uniqueId, img) + + if entry.has_key("contentLink"): + try: + remove(entry["contentLink"]) #os.remove + except: + print "File not found for deletion: %s" % entry["contentLink"] + del self.entries[id] + else: + print "Entries has no %s key" % id + if id in self.ids: + self.ids.remove(id) + else: + print "Ids has no %s key" % id + if self.readItems.has_key(id): + if self.readItems[id]==False: + self.countUnread = self.countUnread - 1 + del self.readItems[id] + else: + print "ReadItems has no %s key" % id + #except: + # print "Error removing entry %s" %id def getArticle(self, id): self.setEntryRead(id) @@ -340,7 +362,7 @@ class ArchivedArticles(Feed): self.saveFeed(configdir) self.saveUnread(configdir) - def updateFeed(self, configdir, expiryTime=24, proxy=None): + def updateFeed(self, configdir, expiryTime=24, proxy=None, imageCache=False): for id in self.getIds(): entry = self.entries[id] if not entry["downloaded"]: @@ -350,7 +372,7 @@ class ArchivedArticles(Feed): html = f.read() f.close() soup = BeautifulSoup(html) - images = soup.body('img') + images = soup('img') baseurl = ''.join(urlparse(entry["link"])[:-1]) for img in images: filename = self.imageHandler.addImage(self.uniqueId, baseurl, img['src']) @@ -476,16 +498,16 @@ class Listing: feed = Feed(getId(title), title, url, self.imageHandler) return feed - def updateFeeds(self, expiryTime=24, proxy=None): + def updateFeeds(self, expiryTime=24, proxy=None, imageCache=False): for key in self.getListOfFeeds(): feed = self.loadFeed(key) - feed.updateFeed(self.configdir, expiryTime, proxy) + feed.updateFeed(self.configdir, expiryTime, proxy, imageCache) self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems() self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime() - def updateFeed(self, key, expiryTime=24, proxy=None): + def updateFeed(self, key, expiryTime=24, proxy=None, imageCache=False): feed = self.getFeed(key) - feed.updateFeed(self.configdir, expiryTime, proxy) + feed.updateFeed(self.configdir, expiryTime, proxy, imageCache) self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems() self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime() -- 1.7.9.5