Tried out an optimization that didn't seem to work.

[doneit] / src / rtm_api.py
diff --git a/src/rtm_api.py b/src/rtm_api.py

index d513fc8..b748b38 100644 (file)
--- a/src/rtm_api.py
+++ b/src/rtm_api.py
@@ -8,7 +8,9 @@ Python library for Remember The Milk API
  import weakref
  import warnings
  import urllib
-from md5 import md5
+import urllib2
+import hashlib
+import time
  
  _use_simplejson = False
  try:
@@ -32,6 +34,10 @@ class RTMAPIError(RTMError):
         pass
  
  
+class RTMParseError(RTMError):
+       pass
+
+
  class AuthStateMachine(object):
         """If the state is in those setup for the machine, then return
         the datum sent.  Along the way, it is an automatic call if the
@@ -76,14 +82,71 @@ class RTMapi(object):
         def _sign(self, params):
                 "Sign the parameters with MD5 hash"
                 pairs = ''.join(['%s%s' % (k, v) for (k, v) in sortedItems(params)])
-               return md5(self._secret+pairs).hexdigest()
+               return hashlib.md5(self._secret+pairs).hexdigest()
  
         @staticmethod
         def open_url(url, queryArgs=None):
                 if queryArgs:
                         url += '?' + urllib.urlencode(queryArgs)
                 warnings.warn("Performing download of %s" % url, stacklevel=5)
-               return urllib.urlopen(url)
+               return urllib2.urlopen(url)
+
+       @staticmethod
+       def read_by_length(connection, timeout):
+               # It appears that urllib uses the non-blocking variant of file objects
+               # which means reads might not always be complete, so grabbing as much
+               # of the data as possible with a sleep in between to give it more time
+               # to grab data.
+               contentLengthField = "Content-Length"
+               assert contentLengthField in connection.info(), "Connection didn't provide content length info"
+               specifiedLength = int(connection.info()["Content-Length"])
+
+               actuallyRead = 0
+               chunks = []
+               chunk = connection.read()
+               while 0 < timeout:
+                       actuallyRead += len(chunk)
+                       if actuallyRead == specifiedLength:
+                               break
+                       chunks.append(chunk)
+                       time.sleep(1)
+                       timeout -= 1
+                       chunk = connection.read()
+               chunks.append(chunk)
+               json = "".join(chunks)
+
+               if "Content-Length" in connection.info():
+                       assert len(json) == int(connection.info()["Content-Length"]), "The packet header promised %s of data but only was able to read %s of data" % (
+                               connection.info()["Content-Length"],
+                               len(json),
+                       )
+
+               return json
+
+       @staticmethod
+       def read_by_guess(connection, timeout):
+               # It appears that urllib uses the non-blocking variant of file objects
+               # which means reads might not always be complete, so grabbing as much
+               # of the data as possible with a sleep in between to give it more time
+               # to grab data.
+
+               chunks = []
+               chunk = connection.read()
+               while chunk and 0 < timeout:
+                       chunks.append(chunk)
+                       time.sleep(1)
+                       timeout -= 1
+                       chunk = connection.read()
+               chunks.append(chunk)
+               json = "".join(chunks)
+
+               if "Content-Length" in connection.info():
+                       assert len(json) == int(connection.info()["Content-Length"]), "The packet header promised %s of data but only was able to read %s of data" % (
+                               connection.info()["Content-Length"],
+                               len(json),
+                       )
+
+               return json
  
         def get(self, **params):
                 "Get the XML response for the passed `params`."
@@ -91,12 +154,11 @@ class RTMapi(object):
                 params['format'] = 'json'
                 params['api_sig'] = self._sign(params)
  
-               json = self.open_url(SERVICE_URL, params).read()
+               connection = self.open_url(SERVICE_URL, params)
+               json = self.read_by_guess(connection, 5)
+               # json = self.read_by_length(connection, 5)
  
-               if _use_simplejson:
-                       data = DottedDict('ROOT', simplejson.loads(json))
-               else:
-                       data = DottedDict('ROOT', safer_eval(json))
+               data = DottedDict('ROOT', parse_json(json))
                 rsp = data.rsp
  
                 if rsp.stat == 'fail':
@@ -207,7 +269,21 @@ class DottedDict(object):
  
  
  def safer_eval(string):
-       return eval(string, {}, {})
+       try:
+               return eval(string, {}, {})
+       except SyntaxError, e:
+               print "="*60
+               print string
+               print "="*60
+               newE = RTMParseError("Error parseing json")
+               newE.error = e
+               raise newE
+
+
+if _use_simplejson:
+       parse_json = simplejson.loads
+else:
+       parse_json = safer_eval
  
  
  API = {