major revamps due to unreliable comics

author Marcus Wikström <mece@mariehamn.(none)>

Wed, 3 Feb 2010 16:32:07 +0000 (18:32 +0200)

committer Marcus Wikström <mece@mariehamn.(none)>

Wed, 3 Feb 2010 16:32:07 +0000 (18:32 +0200)
author Marcus Wikström <mece@mariehamn.(none)>
Wed, 3 Feb 2010 16:32:07 +0000 (18:32 +0200)
committer Marcus Wikström <mece@mariehamn.(none)>
Wed, 3 Feb 2010 16:32:07 +0000 (18:32 +0200)
diff --git a/src/usr/lib/hildon-desktop/comicwidget.py b/src/usr/lib/hildon-desktop/comicwidget.py

index 4b6239d..033ad47 100644 (file)
--- a/src/usr/lib/hildon-desktop/comicwidget.py
+++ b/src/usr/lib/hildon-desktop/comicwidget.py
@@ -26,7 +26,12 @@ comiccache = "/home/user/MyDocs/.comics/"
  comics = {"xkcd":{"name":"xkcd","link":"http://xkcd.org/","start":666,"dbfile":dbdir + "comicdb.xkcd.csv"},
                 "sinfest":{"name":"Sinfest","link":"http://sinfest.com/","start":3400,"dbfile":dbdir + "comicdb.sinfest.csv"},
                 "phd":{"name":"PHD Comics","link":"http://www.phdcomics.com/","start":1240,"dbfile":dbdir + "comicdb.phd.csv"},
-               "dilbert":{"name":"Dilbert","link":"http://dilbert.com.com/","start":"2009-01-01","dbfile":dbdir + "comicdb.dilbert.csv"}}
+               "dilbert":{"name":"Dilbert","link":"http://dilbert.com/","start":"2009-01-01","dbfile":dbdir + "comicdb.dilbert.csv"},
+               "cyanide":{"name":"Cyanide&amp;Happiness","link":"http://explosm.com/","start":"1920","dbfile":dbdir + "comicdb.cyanide.csv"},
+               }
+
+previous = False
+next = False
  
  # handling of the comics
  class ComicDb():
@@ -45,6 +50,8 @@ class ComicDb():
                 for row in dbr:
                         self.db.insert(0,row)
                 dbf.close()
+               if len(self.db) == 0:
+                       self.refresh()
                 self.currentcomic = 0
  
  
@@ -74,6 +81,7 @@ class ComicDb():
                                 dbf = open(self.dbfile, 'w')
                                 dbf.write('comic,id,link,url,filename,title\n')
                                 dbf.close()
+                               
                         if os.path.isfile(self.dbfile) == True:
                                 dbf = open(self.dbfile, 'r')
                                 return dbf
@@ -148,6 +156,7 @@ class ComicDb():
  
         def refresh(self):
                 if len(self.db) < 1:
+                       self.currentcomic = -1
                         self.fetch_latest_std(self.comic, self.start)
                 elif self.currentcomic == 0 or self.currentcomic < 0:
                         self.fetch_latest_std(self.comic, self.db[0]['id'])
@@ -181,39 +190,95 @@ class ComicDb():
  
         def fetch_latest_std(self, comic, latest):
                 print "fetching new after " + str(comic) + " " + str(latest)
+               next = False
                 dateid = False
-               if len(str(latest)) == 10:
-                       # date id.
-                       dateid = True
-                       dt = string.split(latest, "-")
-                       d = datetime.date(int(dt[0]),int(dt[1]),int(dt[2]))
-                       newer = d + datetime.timedelta( 1 )
-                       comicid = newer.isoformat()
-               else:
-                       comicid = int(latest) + 1
+               if comic == 'cyanide':
+                       next = self.get_next_id(comic, latest)
+                       if not next:
+                               return
+                       else:
+                               comicid = next
+               else: 
+                       if len(str(latest)) == 10:
+                               # date id.
+                               dateid = True
+                               dt = string.split(latest, "-")
+                               d = datetime.date(int(dt[0]),int(dt[1]),int(dt[2]))
+                               newer = d + datetime.timedelta( 1 )
+                               comicid = newer.isoformat()
+                       else:
+                               comicid = int(latest) + 1
  
-               lasturl = self.db[0]['url']
+               if len(self.db) > 0:
+                       lasturl = self.db[0]['url']
+               else:
+                       lasturl = "http"
                 while True:
                         irow = self.get_irow(comic, comicid)
                         if irow:
                                 print "got irow: " + str(irow)
-                               if irow[3] == lasturl:
-                                       print "Looping the same, break break break!"
-                                       break
-                               lasturl = irow[3]
-                               print "inserting..."
-                               self.insert_row(irow)
+                               if str(irow[0]) == 'skip':
+                                       print "skipping this one..."
+                                       next = int(irow[1])
+                               else:
+                                       if irow[3] == lasturl:
+                                               print "Looping the same, break break break!"
+                                               break
+                                       lasturl = irow[3]
+                                       print "inserting..."
+                                       self.insert_row([irow[0],irow[1],irow[2],irow[3],irow[4],irow[5]])
+                                       if len(irow) > 6:
+                                               next = irow[6]
+                                               if not next:
+                                                       break
                                 if dateid:
                                         dt = string.split(comicid, "-")
                                         d = datetime.date(int(dt[0]),int(dt[1]),int(dt[2]))
                                         newer = d + datetime.timedelta( 1 )
                                         comicid = newer.isoformat()
+                               elif next:
+                                       comicid = next
                                 else:
                                         comicid += 1
                         else:
                                 break
  
  
+
+
+       def get_next_id(self, comic, number):
+               if comic == 'cyanide':
+                       link = "http://www.explosm.net/comics/" + str(number) + "/"
+                       print "link: " + link
+                       try:
+                               f = urllib2.urlopen(link)
+                               hcode = f.code
+                       except:
+                               hcode = 404
+                       print "got hcode = " + str(hcode) + "\n"
+                       if (hcode != 200):
+                               return False
+                       else:
+                               print "Cyanide & Happiness is unreliable, so we need to track next and prev"
+                               s = f.read()
+                               f.close()
+                               # title:
+                               splt = string.split(s, 'Previous</a> | <a href="/comics/', 1)
+                               if len(splt) < 2:
+                                       print "no 'next' found"
+                                       return False
+                               splt = string.split(splt[1], '/">Next >', 1)
+                               if len(splt) < 2:
+                                       print "no 'next' found"
+                                       return False
+                               else:
+                                       return splt[0]                          
+
+
+
+
+
+
         def get_irow(self, comic, number):
                 dateid = False
                 if len(str(number)) == 10:
@@ -230,6 +295,8 @@ class ComicDb():
                         link = "http://dilbert.com/" + str(number) + "/"
                 elif comic == 'phd':
                         link = "http://www.phdcomics.com/comics/archive.php?comicid=" + str(number)
+               elif comic == 'cyanide':
+                       link = "http://www.explosm.net/comics/" + str(number) + "/"
                 else:
                         return False
  
@@ -297,7 +364,46 @@ class ComicDb():
                                         print "Fake 404! Break break break!"
                                         return False
                                 
-
+                       elif comic == 'cyanide':
+                               s = f.read()
+                               f.close()
+                               # title:
+                               splt = string.split(s, ' First</a> | <a href="/comics/', 1)
+                               if len(splt) < 2:
+                                       print "first comic?"
+                                       prev = False
+                               else:
+                                       splt = string.split(splt[1], '/">< ', 1)
+                                       prev = splt[0];
+                               splt = string.split(s, 'Previous</a> | <a href="/comics/', 1)
+                               if len(splt) < 2:
+                                       print "last comic?"
+                                       next = False
+                               else:
+                                       splt = string.split(splt[1], '/">Next ></a>', 1)
+                                       if len(splt[0]) > 10:
+                                               next = False
+                                       else:
+                                               next = splt[0]
+
+                               splt = string.split(s, '</tr><tr><td colspan=2>', 1)
+                               if len(splt) < 2:
+                                       print "no comic?"
+                                       return False
+                               splt = string.split(splt[1], ' <b>by <a href="', 1)
+                               title = splt[0];
+                               splt = string.split(splt[1], 'a daily webcomic" src="http://www.explosm', 1)
+                               if len(splt) < 2:
+                                       print "a video? Try skipping"
+                                       return ['skip',next,prev]
+                                       
+                               splt = string.split(splt[1], '"></div><br />', 1)
+                               url = "http://www.explosm" + splt[0]
+                               splt2 = string.rsplit(url, "/", 1)
+                               filename = splt2[1]
+                               irow = [comic,number,link,url,filename,title,next,prev]
+                               return irow
+       
                         splt2 = string.rsplit(url, "/", 1)
                         filename = splt2[1]
                         irow = [comic,number,link,url,filename,title]
author	Marcus Wikström <mece@mariehamn.(none)>
	Wed, 3 Feb 2010 16:32:07 +0000 (18:32 +0200)
committer	Marcus Wikström <mece@mariehamn.(none)>
	Wed, 3 Feb 2010 16:32:07 +0000 (18:32 +0200)