major revamps due to unreliable comics
authorMarcus Wikström <mece@mariehamn.(none)>
Wed, 3 Feb 2010 16:32:07 +0000 (18:32 +0200)
committerMarcus Wikström <mece@mariehamn.(none)>
Wed, 3 Feb 2010 16:32:07 +0000 (18:32 +0200)
src/usr/lib/hildon-desktop/comicwidget.py

index 4b6239d..033ad47 100644 (file)
@@ -26,7 +26,12 @@ comiccache = "/home/user/MyDocs/.comics/"
 comics = {"xkcd":{"name":"xkcd","link":"http://xkcd.org/","start":666,"dbfile":dbdir + "comicdb.xkcd.csv"},
                "sinfest":{"name":"Sinfest","link":"http://sinfest.com/","start":3400,"dbfile":dbdir + "comicdb.sinfest.csv"},
                "phd":{"name":"PHD Comics","link":"http://www.phdcomics.com/","start":1240,"dbfile":dbdir + "comicdb.phd.csv"},
-               "dilbert":{"name":"Dilbert","link":"http://dilbert.com.com/","start":"2009-01-01","dbfile":dbdir + "comicdb.dilbert.csv"}}
+               "dilbert":{"name":"Dilbert","link":"http://dilbert.com/","start":"2009-01-01","dbfile":dbdir + "comicdb.dilbert.csv"},
+               "cyanide":{"name":"Cyanide&amp;Happiness","link":"http://explosm.com/","start":"1920","dbfile":dbdir + "comicdb.cyanide.csv"},
+               }
+
+previous = False
+next = False
 
 # handling of the comics
 class ComicDb():
@@ -45,6 +50,8 @@ class ComicDb():
                for row in dbr:
                        self.db.insert(0,row)
                dbf.close()
+               if len(self.db) == 0:
+                       self.refresh()
                self.currentcomic = 0
 
 
@@ -74,6 +81,7 @@ class ComicDb():
                                dbf = open(self.dbfile, 'w')
                                dbf.write('comic,id,link,url,filename,title\n')
                                dbf.close()
+                               
                        if os.path.isfile(self.dbfile) == True:
                                dbf = open(self.dbfile, 'r')
                                return dbf
@@ -148,6 +156,7 @@ class ComicDb():
 
        def refresh(self):
                if len(self.db) < 1:
+                       self.currentcomic = -1
                        self.fetch_latest_std(self.comic, self.start)
                elif self.currentcomic == 0 or self.currentcomic < 0:
                        self.fetch_latest_std(self.comic, self.db[0]['id'])
@@ -181,39 +190,95 @@ class ComicDb():
 
        def fetch_latest_std(self, comic, latest):
                print "fetching new after " + str(comic) + " " + str(latest)
+               next = False
                dateid = False
-               if len(str(latest)) == 10:
-                       # date id.
-                       dateid = True
-                       dt = string.split(latest, "-")
-                       d = datetime.date(int(dt[0]),int(dt[1]),int(dt[2]))
-                       newer = d + datetime.timedelta( 1 )
-                       comicid = newer.isoformat()
-               else:
-                       comicid = int(latest) + 1
+               if comic == 'cyanide':
+                       next = self.get_next_id(comic, latest)
+                       if not next:
+                               return
+                       else:
+                               comicid = next
+               else: 
+                       if len(str(latest)) == 10:
+                               # date id.
+                               dateid = True
+                               dt = string.split(latest, "-")
+                               d = datetime.date(int(dt[0]),int(dt[1]),int(dt[2]))
+                               newer = d + datetime.timedelta( 1 )
+                               comicid = newer.isoformat()
+                       else:
+                               comicid = int(latest) + 1
 
-               lasturl = self.db[0]['url']
+               if len(self.db) > 0:
+                       lasturl = self.db[0]['url']
+               else:
+                       lasturl = "http"
                while True:
                        irow = self.get_irow(comic, comicid)
                        if irow:
                                print "got irow: " + str(irow)
-                               if irow[3] == lasturl:
-                                       print "Looping the same, break break break!"
-                                       break
-                               lasturl = irow[3]
-                               print "inserting..."
-                               self.insert_row(irow)
+                               if str(irow[0]) == 'skip':
+                                       print "skipping this one..."
+                                       next = int(irow[1])
+                               else:
+                                       if irow[3] == lasturl:
+                                               print "Looping the same, break break break!"
+                                               break
+                                       lasturl = irow[3]
+                                       print "inserting..."
+                                       self.insert_row([irow[0],irow[1],irow[2],irow[3],irow[4],irow[5]])
+                                       if len(irow) > 6:
+                                               next = irow[6]
+                                               if not next:
+                                                       break
                                if dateid:
                                        dt = string.split(comicid, "-")
                                        d = datetime.date(int(dt[0]),int(dt[1]),int(dt[2]))
                                        newer = d + datetime.timedelta( 1 )
                                        comicid = newer.isoformat()
+                               elif next:
+                                       comicid = next
                                else:
                                        comicid += 1
                        else:
                                break
 
 
+
+
+       def get_next_id(self, comic, number):
+               if comic == 'cyanide':
+                       link = "http://www.explosm.net/comics/" + str(number) + "/"
+                       print "link: " + link
+                       try:
+                               f = urllib2.urlopen(link)
+                               hcode = f.code
+                       except:
+                               hcode = 404
+                       print "got hcode = " + str(hcode) + "\n"
+                       if (hcode != 200):
+                               return False
+                       else:
+                               print "Cyanide & Happiness is unreliable, so we need to track next and prev"
+                               s = f.read()
+                               f.close()
+                               # title:
+                               splt = string.split(s, 'Previous</a> | <a href="/comics/', 1)
+                               if len(splt) < 2:
+                                       print "no 'next' found"
+                                       return False
+                               splt = string.split(splt[1], '/">Next >', 1)
+                               if len(splt) < 2:
+                                       print "no 'next' found"
+                                       return False
+                               else:
+                                       return splt[0]                          
+
+
+
+
+
+
        def get_irow(self, comic, number):
                dateid = False
                if len(str(number)) == 10:
@@ -230,6 +295,8 @@ class ComicDb():
                        link = "http://dilbert.com/" + str(number) + "/"
                elif comic == 'phd':
                        link = "http://www.phdcomics.com/comics/archive.php?comicid=" + str(number)
+               elif comic == 'cyanide':
+                       link = "http://www.explosm.net/comics/" + str(number) + "/"
                else:
                        return False
 
@@ -297,7 +364,46 @@ class ComicDb():
                                        print "Fake 404! Break break break!"
                                        return False
                                
-
+                       elif comic == 'cyanide':
+                               s = f.read()
+                               f.close()
+                               # title:
+                               splt = string.split(s, ' First</a> | <a href="/comics/', 1)
+                               if len(splt) < 2:
+                                       print "first comic?"
+                                       prev = False
+                               else:
+                                       splt = string.split(splt[1], '/">< ', 1)
+                                       prev = splt[0];
+                               splt = string.split(s, 'Previous</a> | <a href="/comics/', 1)
+                               if len(splt) < 2:
+                                       print "last comic?"
+                                       next = False
+                               else:
+                                       splt = string.split(splt[1], '/">Next ></a>', 1)
+                                       if len(splt[0]) > 10:
+                                               next = False
+                                       else:
+                                               next = splt[0]
+
+                               splt = string.split(s, '</tr><tr><td colspan=2>', 1)
+                               if len(splt) < 2:
+                                       print "no comic?"
+                                       return False
+                               splt = string.split(splt[1], ' <b>by <a href="', 1)
+                               title = splt[0];
+                               splt = string.split(splt[1], 'a daily webcomic" src="http://www.explosm', 1)
+                               if len(splt) < 2:
+                                       print "a video? Try skipping"
+                                       return ['skip',next,prev]
+                                       
+                               splt = string.split(splt[1], '"></div><br />', 1)
+                               url = "http://www.explosm" + splt[0]
+                               splt2 = string.rsplit(url, "/", 1)
+                               filename = splt2[1]
+                               irow = [comic,number,link,url,filename,title,next,prev]
+                               return irow
+       
                        splt2 = string.rsplit(url, "/", 1)
                        filename = splt2[1]
                        irow = [comic,number,link,url,filename,title]