2 # -*- coding: utf-8 -*-
3 # This library is free software, distributed under the terms of
4 # the GNU Lesser General Public License Version 2.
5 # See the COPYING file included in this archive
7 # The docstrings in this module contain epytext markup; API documentation
8 # may be created by processing this file with epydoc: http://epydoc.sf.net
10 Original by Francois Aucamp
11 Modified by Nick Leppänen Larsson for use in Maemo5/Fremantle on the Nokia N900.
13 @author: Francois Aucamp C{<faucamp@csir.co.za>}
14 @author: Nick Leppänen Larsson <frals@frals.se>
15 @license: GNU Lesser General Public License, version 2
16 @note: This is part of the PyMMS library
18 WSP Data Unit structure encoding and decoding classes
20 Throughout the classes defined in this module, the following "primitive data
21 type" terminology applies, as specified in [5], section 8.1.1::
25 octet 8 bits of opaque data
26 uint8 8-bit unsigned integer
27 uint16 16-bit unsigned integer
28 uint32 32-bit unsigned integer
29 uintvar variable length unsigned integer
31 This Encoder and Decoder classes provided in this module firstly provides
32 public methods for decoding and encoding each of these data primitives (where
35 Next, they provide methods encapsulating the basic WSP Header encoding rules
36 as defined in section 8.4.2.1 of [5].
38 Finally, the classes defined here provide methods for decoding/parsing
39 specific WSP header fields.
41 @note: References used in the code and this document:
42 5. Wap Forum/Open Mobile Alliance, "WAP-230 Wireless Session Protocol Specification"
43 U{http://www.openmobilealliance.org/tech/affiliates/LicenseAgreement.asp?DocName=/wap/wap-230-wsp-20010705-a.pdf}
47 from iterator import PreviewIterator
53 class WSPEncodingAssignments:
54 """ Static class containing the constant values defined in [5] for
55 well-known content types, parameter names, etc.
57 It also defines some function for combining assigned number-tables for
58 specific WSP encoding versions, where appropriate.
60 This is used by both the Encoder and Decoder classes during well-known
61 assigned number lookups (usually these functions have the string
62 C{WellKnown} in their names).
64 - Assigned parameters are stored in a dictionary, C{wkParameters},
65 containing all assigned values for WSP encoding versions 1.1 - 1.4,
67 C{{<int>assigned number: (<str>name, <str>expected value type)}}
68 A "encoding versioned"-version of this dictionary can be retrieved
69 by calling the C{wellKnowParameters()} function with an appropriate
70 WSP encoding version as parameter.
71 - Assigned content types are stored in a list, C{wkContentTypes}, in
72 order; thus, their index in the list is equal to their assigned
76 wspPDUTypes = {0x01: 'Connect',
82 0x07: 'ConfirmedPush',
88 # Well-known parameter assignments ([5], table 38)
89 wkParameters = {0x00: ('Q', 'QValue'),
90 0x01: ('Charset', 'WellKnownCharset'),
91 0x02: ('Level', 'VersionValue'),
92 0x03: ('Type', 'IntegerValue'),
93 0x05: ('Name', 'TextString'),
94 0x06: ('Filename', 'TextString'),
95 0x07: ('Differences', 'Field-name'),
96 0x08: ('Padding', 'ShortInteger'),
97 0x09: ('Type', 'ConstrainedEncoding'), # encoding version 1.2
98 0x0a: ('Start', 'TextString'),
99 0x0b: ('Start-info', 'TextString'),
100 0x0c: ('Comment', 'TextString'), # encoding version 1.3
101 0x0d: ('Domain', 'TextString'),
102 0x0e: ('Max-Age', 'DeltaSecondsValue'),
103 0x0f: ('Path', 'TextString'),
104 0x10: ('Secure', 'NoValue'),
105 0x11: ('SEC', 'ShortInteger'), # encoding version 1.4
106 0x12: ('MAC', 'TextValue'),
107 0x13: ('Creation-date', 'DateValue'),
108 0x14: ('Modification-date', 'DateValue'),
109 0x15: ('Read-date', 'DateValue'),
110 0x16: ('Size', 'IntegerValue'),
111 0x17: ('Name', 'TextValue'),
112 0x18: ('Filename', 'TextValue'),
113 0x19: ('Start', 'TextValue'),
114 0x1a: ('Start-info', 'TextValue'),
115 0x1b: ('Comment', 'TextValue'),
116 0x1c: ('Domain', 'TextValue'),
117 0x1d: ('Path', 'TextValue'),
118 0x40: ('Content-ID', 'QuotedString')}
120 # Content type assignments ([5], table 40)
121 wkContentTypes = ['*/*', 'text/*', 'text/html', 'text/plain',
122 'text/x-hdml', 'text/x-ttml', 'text/x-vCalendar',
123 'text/x-vCard', 'text/vnd.wap.wml',
124 'text/vnd.wap.wmlscript', 'text/vnd.wap.wta-event',
125 'multipart/*', 'multipart/mixed', 'multipart/form-data',
126 'multipart/byterantes', 'multipart/alternative',
127 'application/*', 'application/java-vm',
128 'application/x-www-form-urlencoded',
129 'application/x-hdmlc', 'application/vnd.wap.wmlc',
130 'application/vnd.wap.wmlscriptc',
131 'application/vnd.wap.wta-eventc',
132 'application/vnd.wap.uaprof',
133 'application/vnd.wap.wtls-ca-certificate',
134 'application/vnd.wap.wtls-user-certificate',
135 'application/x-x509-ca-cert',
136 'application/x-x509-user-cert',
137 'image/*', 'image/gif', 'image/jpeg', 'image/tiff',
138 'image/png', 'image/vnd.wap.wbmp',
139 'application/vnd.wap.multipart.*',
140 'application/vnd.wap.multipart.mixed',
141 'application/vnd.wap.multipart.form-data',
142 'application/vnd.wap.multipart.byteranges',
143 'application/vnd.wap.multipart.alternative',
144 'application/xml', 'text/xml',
145 'application/vnd.wap.wbxml',
146 'application/x-x968-cross-cert',
147 'application/x-x968-ca-cert',
148 'application/x-x968-user-cert',
150 'application/vnd.wap.sic',
152 'application/vnd.wap.slc',
154 'application/vnd.wap.coc',
155 'application/vnd.wap.multipart.related',
156 'application/vnd.wap.sia',
157 'text/vnd.wap.connectivity-xml',
158 'application/vnd.wap.connectivity-wbxml',
159 'application/pkcs7-mime',
160 'application/vnd.wap.hashed-certificate',
161 'application/vnd.wap.signed-certificate',
162 'application/vnd.wap.cert-response',
163 'application/xhtml+xml',
164 'application/wml+xml',
166 'application/vnd.wap.mms-message',
167 'application/vnd.wap.rollover-certificate',
168 'application/vnd.wap.locc+wbxml',
169 'application/vnd.wap.loc+xml',
170 'application/vnd.syncml.dm+wbxml',
171 'application/vnd.syncml.dm+xml',
172 'application/vnd.syncml.notification',
173 'application/vnd.wap.xhtml+xml',
174 'application/vnd.wv.csp.cir',
175 'application/vnd.oma.dd+xml',
176 'application/vnd.oma.drm.message',
177 'application/vnd.oma.drm.content',
178 'application/vnd.oma.drm.rights+xml',
179 'application/vnd.oma.drm.rights+wbxml']
182 # Well-known character sets (table 42 of [5])
183 # Format {<assinged_number> : <charset>}
184 # Note that the assigned number is the same as the IANA MIBEnum value
185 # "gsm-default-alphabet" is not included, as it is not assigned any value in [5]
186 # Also note, this is by no means a complete list
187 wkCharSets = {0x07EA: 'big5',
188 0x03E8: 'iso-10646-ucs-2',
202 # Header Field Name assignments ([5], table 39)
203 hdrFieldNames = ['Accept', 'Accept-Charset', 'Accept-Encoding',
204 'Accept-Language', 'Accept-Ranges', 'Age',
205 'Allow', 'Authorization', 'Cache-Control',
206 'Connection', 'Content-Base', 'Content-Encoding',
207 'Content-Language', 'Content-Length',
208 'Content-Location', 'Content-MD5', 'Content-Range',
209 'Content-Type', 'Date', 'Etag', 'Expires', 'From',
210 'Host', 'If-Modified-Since', 'If-Match',
211 'If-None-Match', 'If-Range', 'If-Unmodified-Since',
212 'Location', 'Last-Modified', 'Max-Forwards', 'Pragma',
213 'Proxy-Authenticate', 'Proxy-Authorization', 'Public',
214 'Range', 'Referer', 'Retry-After', 'Server',
215 'Transfer-Encoding', 'Upgrade', 'User-Agent',
216 'Vary', 'Via', 'Warning', 'WWW-Authenticate',
217 'Content-Disposition',
218 # encoding version 1.2
219 'X-Wap-Application-Id', 'X-Wap-Content-URI',
220 'X-Wap-Initiator-URI', 'Accept-Application',
221 'Bearer-Indication', 'Push-Flag', 'Profile',
222 'Profile-Diff', 'Profile-Warning',
223 # encoding version 1.3
224 'Expect', 'TE', 'Trailer', 'Accept-Charset',
225 'Accept-Encoding', 'Cache-Control',
226 'Content-Range', 'X-Wap-Tod', 'Content-ID',
227 'Set-Cookie', 'Cookie', 'Encoding-Version',
228 # encoding version 1.4
229 'Profile-Warning', 'Content-Disposition',
230 'X-WAP-Security', 'Cache-Control']
232 #TODO: combine this dict with the hdrFieldNames table (same as well known parameter assignments)
233 # Temporary fix to allow different types of header field values to be dynamically decoded
234 hdrFieldEncodings = {'Accept': 'AcceptValue',
235 'Pragma': 'PragmaValue',
236 'Content-ID': 'QuotedString'}
239 def wellKnownParameters(encodingVersion = '1.2'):
240 """ Formats list of assigned values for well-known parameter names,
241 for the specified WSP encoding version.
243 @param encodingVersion: The WSP encoding version to use. This defaults
244 to "1.2", but may be "1.1", "1.2", "1.3" or
245 "1.4" (see table 38 in [5] for details).
246 @type encodingVersion: str
248 @raise ValueError: The specified encoding version is invalid.
250 @return: A dictionary containing the well-known parameters with
251 assigned numbers for the specified encoding version (and
252 lower). Entries in this dict follow the format:
253 C{{<int:assigned_number> : (<str:param_name>, <str:expected_type>)}}
256 if encodingVersion not in ('1.1', '1.2', '1.3', '1.4'):
257 raise ValueError, 'encodingVersion must be "1.1", "1.2", "1.3" or "1.4"'
259 version = int(encodingVersion.split('.')[1])
260 wkVersionedParameters = dict(WSPEncodingAssignments.wkParameters)
262 for assignedNumber in range(0x11, 0x1e):
263 del wkVersionedParameters[assignedNumber]
265 for assignedNumber in range(0x0c, 0x11):
266 del wkVersionedParameters[assignedNumber]
268 for assignedNumber in range(0x09, 0x0c):
269 del wkVersionedParameters[assignedNumber]
270 return wkVersionedParameters
273 def headerFieldNames(encodingVersion = '1.2'):
274 """ Formats list of assigned values for header field names, for the
275 specified WSP encoding version.
277 @param encodingVersion: The WSP encoding version to use. This defaults
278 to "1.2", but may be "1.1", "1.2", "1.3" or
279 "1.4" (see table 39 in [5] for details).
280 @type encodingVersion: str
282 @raise ValueError: The specified encoding version is invalid.
284 @return: A list containing the WSP header field names with assigned
285 numbers for the specified encoding version (and lower).
288 if encodingVersion not in ('1.1', '1.2', '1.3', '1.4'):
289 raise ValueError, 'encodingVersion must be "1.1", "1.2", "1.3" or "1.4"'
291 version = int(encodingVersion.split('.')[1])
292 versionedHdrFieldNames = list(WSPEncodingAssignments.hdrFieldNames)
293 ### TODO: uncomment and fix
295 versionedHdrFieldNames = versionedHdrFieldNames[:0x44]
297 versionedHdrFieldNames = versionedHdrFieldNames[:0x38]
299 versionedHdrFieldNames = versionedHdrFieldNames[:0x2f]"""
300 return versionedHdrFieldNames
303 class DecodeError(Exception):
304 """ The decoding operation failed; most probably due to an invalid byte in
305 the sequence provided for decoding """
307 class EncodeError(Exception):
308 """ The encoding operation failed; most probably due to an invalid value
309 provided for encoding """
312 """ A WSP Data unit decoder """
314 def decodeUint8(byteIter):
315 """ Decodes an 8-bit unsigned integer from the byte pointed to by
318 @note: this function will move the iterator passed as C{byteIter} one
321 @param byteIter: an iterator over a sequence of bytes
322 @type byteIteror: iter
324 @return: the decoded 8-bit unsigned integer
327 # Make the byte unsigned
328 return byteIter.next() & 0xff
331 def decodeUintvar(byteIter):
332 """ Decodes the variable-length unsigned integer starting at the
333 byte pointed to by C{byteIter.next()}
335 See C{wsp.Encoder.encodeUintvar()} for a detailed description of the
336 encoding scheme used for C{Uintvar} sequences.
338 @note: this function will move the iterator passed as C{byteIter} to
339 the last octet in the uintvar sequence; thus, after calling
340 this, that iterator's C{next()} function will return the first
341 byte B{after}the uintvar sequence.
343 @param byteIter: an iterator over a sequence of bytes
344 @type byteIteror: iter
346 @return: the decoded unsigned integer
350 byte = byteIter.next()
351 while (byte >> 7) == 0x01:
354 byte = byteIter.next()
361 def decodeShortInteger(byteIter):
362 """ Decodes the short-integer value starting at the byte pointed to
363 by C{byteIter.next()}.
365 The encoding for a long integer is specified in [5], section 8.4.2.1:
366 C{Short-integer = OCTET
367 Integers in range 0-127 shall be encoded as a one octet value with
368 the most significant bit set to one (1xxx xxxx) and with the value
369 in the remaining least significant bits.}
371 @raise DecodeError: Not a valid short-integer; the most significant
373 C{byteIter} will not be modified if this is raised
375 @return: The decoded short integer
378 byte = byteIter.preview()
380 byteIter.resetPreview()
381 raise DecodeError, 'Not a valid short-integer: most significant bit not set'
382 byte = byteIter.next()
386 def decodeShortIntegerFromByte(byte):
387 """ Decodes the short-integer value contained in the specified byte
390 @param byte: the byte value to decode
393 @raise DecodeError: Not a valid short-integer; the most significant
395 @return: The decoded short integer
399 raise DecodeError, 'Not a valid short-integer: most significant bit not set'
403 def decodeLongInteger(byteIter):
404 """ Decodes the long integer value starting at the byte pointed to
405 by C{byteIter.next()}.
407 The encoding for a long integer is specified in [5], section 8.4.2.1,
408 and follows the form::
410 Long-integer = [Short-length] [Multi-octet-integer]
411 ^^^^^^ ^^^^^^^^^^^^^^^^^^^^^
412 1 byte <Short-length> bytes
414 The Short-length indicates the length of the Multi-octet-integer.
416 @raise DecodeError: The byte pointed to by C{byteIter.next()} does
417 not indicate the start of a valid long-integer
418 sequence (short-length is invalid). If this is
419 raised, the iterator passed as C{byteIter} will
422 @note: If this function returns successfully, it will move the
423 iterator passed as C{byteIter} to the last octet in the encoded
424 long integer sequence; thus, after calling this, that
425 iterator's C{next()} function will return the first byte
426 B{after}the encoded long integer sequence.
428 @param byteIter: an iterator over a sequence of bytes
429 @type byteIteror: iter
431 @return: The decoded long integer
435 shortLength = Decoder.decodeShortLength(byteIter)
437 raise DecodeError, 'Not a valid long-integer: short-length byte is invalid'
439 # Decode the Multi-octect-integer
440 for i in range(shortLength):
441 longInt = longInt << 8
442 longInt |= byteIter.next()
446 def decodeTextString(byteIter):
447 """ Decodes the null-terminated, binary-encoded string value starting
448 at the byte pointed to by C{dataIter.next()}.
450 This follows the basic encoding rules specified in [5], section
453 @note: this function will move the iterator passed as C{byteIter} to
454 the last octet in the encoded string sequence; thus, after
455 calling this, that iterator's C{next()} function will return
456 the first byte B{after}the encoded string sequence.
458 @param byteIter: an iterator over a sequence of bytes
459 @type byteIteror: iter
461 @return: The decoded text string
465 byte = byteIter.next()
466 # Remove Quote character (octet 127), if present
468 byte = byteIter.next()
470 decodedString += chr(byte)
471 byte = byteIter.next()
475 def decodeQuotedString(byteIter):
476 """ From [5], section 8.4.2.1:
477 Quoted-string = <Octet 34> *TEXT End-of-string
478 The TEXT encodes an RFC2616 Quoted-string with the enclosing
479 quotation-marks <"> removed
481 @return: The decoded text string
484 # byteIter, localIter = itertools.tee(byteIter)
485 # look for the quote character
486 byte = byteIter.preview()
488 byteIter.resetPreview()
489 raise DecodeError, 'Invalid quoted string; must start with <octect 34>'
492 # CHECK: should the quotation chars be pre- and appended before returning/
493 # *technically* we should not check for quote characters. oh well.
494 return Decoder.decodeTextString(byteIter)
498 def decodeTokenText(byteIter):
499 """ From [5], section 8.4.2.1:
500 Token-text = Token End-of-string
502 @raise DecodeError: invalid token; in this case, byteIter is not modified
504 @return: The token string if successful, or the byte that was read if not
507 separators = (11, 32, 40, 41, 44, 47, 58, 59, 60, 61, 62, 63, 64, 91,
510 # byteIter, localIter = itertools.tee(byteIter)
511 # byte = localIter.next()
512 byte = byteIter.preview()
513 if byte <= 31 or byte in separators:
514 byteIter.resetPreview()
515 raise DecodeError, 'Invalid token'
516 byte = byteIter.next()
517 while byte > 31 and byte not in separators:
519 byte = byteIter.next()
523 def decodeExtensionMedia(byteIter):
524 """ From [5], section 8.4.2.1:
525 Extension-media = *TEXT End-of-string
526 This encoding is used for media values, which have no well-known
529 @raise DecodeError: The TEXT started with an invalid character.
530 C{byteIter} is not modified if this happens.
532 @return: The decoded media type value
536 # byteIter, localIter = itertools.tee(byteIter)
537 # byte = localIter.next()
538 byte = byteIter.preview()
539 if byte < 32 or byte == 127:
540 byteIter.resetPreview()
541 raise DecodeError, 'Invalid Extension-media: TEXT starts with invalid character: %d' % byte
542 byte = byteIter.next()
544 mediaValue += chr(byte)
545 byte = byteIter.next()
550 def decodeConstrainedEncoding(byteIter):
551 """ Constrained-encoding = Extension-Media --or-- Short-integer
552 This encoding is used for token values, which have no well-known
553 binary encoding, or when the assigned number of the well-known
554 encoding is small enough to fit into Short-integer.
556 @return: The decoding constrained-encoding token value
560 #backupIter, localIter = itertools.tee(byteIter)
562 #byteIter, localIter = itertools.tee(byteIter)
563 # First try and see if this is just a short-integer
564 result = Decoder.decodeShortInteger(byteIter)
565 #byteIter = localIter
566 except DecodeError, msg:
567 # Ok, it should be Extension-Media then
569 #backupIter, localIter = itertools.tee(byteIter)
570 result = Decoder.decodeExtensionMedia(byteIter)
571 except DecodeError, msg:
573 #fakeByte =localIter.next()
574 #fakeByte= localIter.next()
575 #fakeByte = localIter.next()
576 #byte = byteIter.next()
577 #byte = byteIter.next()
578 raise DecodeError, 'Not a valid Constrained-encoding sequence'
579 #byteIter = localIter
583 def decodeShortLength(byteIter):
584 """ From [5], section 8.4.2.2:
585 Short-length = <Any octet 0-30>
587 @raise DecodeError: The byte is not a valid short-length value;
588 it is not in octet range 0-30. In this case, the
589 iterator passed as C{byteIter} is not modified.
591 @note: If this function returns successfully, the iterator passed as
592 C{byteIter} is moved one byte forward.
594 @return The decoded short-length
597 # byteIter, localIter = itertools.tee(byteIter)
598 # Make sure it's a valid short-length
599 # byte = localIter.next()
600 byte = byteIter.preview()
602 byteIter.resetPreview()
603 raise DecodeError, 'Not a valid short-length; should be in octet range 0-30'
605 return byteIter.next()
608 def decodeValueLength(byteIter):
609 """ Decodes the value length indicator starting at the byte pointed to
610 by C{byteIter.next()}.
612 "Value length" is used to indicate the length of a value to follow, as
613 used in the C{Content-Type} header in the MMS body, for example.
615 The encoding for a value length indicator is specified in [5],
616 section 8.4.2.2, and follows the form::
618 Value-length = [Short-length] --or-- [Length-quote] [Length]
620 1 byte 1 byte x bytes
621 <Any octet 0-30> <Octet 31> Uintvar-integer
623 @raise DecodeError: The ValueLength could not be decoded. If this
624 happens, C{byteIter} is not modified.
626 @return: The decoded value length indicator
630 # Check for short-length
632 lengthValue = Decoder.decodeShortLength(byteIter)
634 byte = byteIter.preview()
635 #CHECK: this strictness MAY cause issues, but it is correct
637 byteIter.next() # skip past the length-quote
638 lengthValue = Decoder.decodeUintvar(byteIter)
640 byteIter.resetPreview()
641 raise DecodeError, 'Invalid Value-length: not short-length, and no length-quote present'
645 def decodeIntegerValue(byteIter):
646 """ From [5], section 8.4.2.3:
647 Integer-Value = Short-integer | Long-integer
649 @raise DecodeError: The sequence of bytes starting at
650 C{byteIter.next()} does not contain a valid
651 integervalue. If this is raised, the iterator
652 passed as C{byteIter} is not modified.
654 @note: If successful, this function will move the iterator passed as
655 C{byteIter} to the last octet in the integer value sequence;
656 thus, after calling this, that iterator's C{next()} function
657 will return the first byte B{after}the integer value sequence.
659 @return: The decoded integer value
663 # First try and see if it's a short-integer
665 integer = Decoder.decodeShortInteger(byteIter)
668 integer = Decoder.decodeLongInteger(byteIter)
670 raise DecodeError, 'Not a valid integer value'
674 def decodeContentTypeValue(byteIter):
675 """ Decodes an encoded content type value.
677 From [5], section 8.4.2.24:
678 C{Content-type-value = Constrained-media | Content-general-form}
680 The short form of the Content-type-value MUST only be used when the
681 well-known media is in the range of 0-127 or a text string. In all
682 other cases the general form MUST be used.
684 @return: The media type (content type), and a dictionary of
685 parameters to this content type (which is empty if there
686 are no parameters). This parameter dictionary is in the
688 C{{<str:parameter_name>: <str/int/float:parameter_value>}}.
689 The final returned tuple is in the format:
690 (<str:media_type>, <dict:parameter_dict>)
693 # First try do decode it as Constrained-media
697 contentType = Decoder.decodeConstrainedMedia(byteIter)
699 # Try the general form
700 contentType, parameters = Decoder.decodeContentGeneralForm(byteIter)
701 return (contentType, parameters)
705 def decodeWellKnownMedia(byteIter):
706 """ From [5], section 8.4.2.7:
707 Well-known-media = Integer-value
708 It is encoded using values from the "Content Type Assignments" table
711 @param byteIter: an iterator over a sequence of bytes
712 @type byteIteror: iter
714 @raise DecodeError: This is raised if the integer value representing
715 the well-known media type cannot be decoded
716 correctly, or the well-known media type value
717 could not be found in the table of assigned
719 If this exception is raised, the iterator passed
720 as C{byteIter} is not modified.
722 @note: If successful, this function will move the iterator passed as
723 C{byteIter} to the last octet in the content type value
724 sequence; thus, after calling this, that iterator's C{next()}
725 function will return the first byte B{after}the content type
728 @return: the decoded MIME content type name
731 # byteIter, localIter = itertools.tee(byteIter)
733 # wkContentTypeValue = Decoder.decodeIntegerValue(localIter)
734 wkContentTypeValue = Decoder.decodeIntegerValue(byteIter)
736 raise DecodeError, 'Invalid well-known media: could not read integer value representing it'
738 if wkContentTypeValue in range(len(WSPEncodingAssignments.wkContentTypes)):
739 decodedContentType = WSPEncodingAssignments.wkContentTypes[wkContentTypeValue]
740 # # Only iterate the main iterator now that everything is ok
743 raise DecodeError, 'Invalid well-known media: could not find content type in table of assigned values'
744 return decodedContentType
748 def decodeMediaType(byteIter):
749 """ From [5], section 8.2.4.24:
750 Media-type = (Well-known-media | Extension-Media) *(Parameter)
752 @param byteIter: an iterator over a sequence of bytes
753 @type byteIteror: iter
755 @note: Used by C{decodeContentGeneralForm()}
757 @return: The decoded media type
761 mediaType = Decoder.decodeWellKnownMedia(byteIter)
763 mediaType = Decoder.decodeExtensionMedia(byteIter)
767 def decodeConstrainedMedia(byteIter):
768 """ From [5], section 8.4.2.7:
769 Constrained-media = Constrained-encoding
770 It is encoded using values from the "Content Type Assignments" table.
772 @raise DecodeError: Invalid constrained media sequence
774 @return: The decoded media type
777 constrainedMedia = ''
779 constrainedMediaValue = Decoder.decodeConstrainedEncoding(byteIter)
780 except DecodeError, msg:
781 #byte = byteIter.next()
782 raise DecodeError, 'Invalid Constrained-media: %s' % msg
783 if type(constrainedMediaValue) == int:
784 if constrainedMediaValue in range(len(WSPEncodingAssignments.wkContentTypes)):
785 constrainedMedia = WSPEncodingAssignments.wkContentTypes[constrainedMediaValue]
787 raise DecodeError, 'Invalid constrained media: could not find well-known content type'
789 constrainedMedia = constrainedMediaValue
790 return constrainedMedia
793 def decodeContentGeneralForm(byteIter):
794 """ From [5], section 8.4.2.24:
795 Content-general-form = Value-length Media-type
797 @note Used in decoding Content-type fields and their parameters;
798 see C{decodeContentTypeValue}
800 @note: Used by C{decodeContentTypeValue()}
802 @return: The media type (content type), and a dictionary of
803 parameters to this content type (which is empty if there
804 are no parameters). This parameter dictionary is in the
806 C{{<str:parameter_name>: <str/int/float:parameter_value>}}.
807 The final returned tuple is in the format:
808 (<str:media_type>, <dict:parameter_dict>)
811 # This is the length of the (encoded) media-type and all parameters
813 valueLength = Decoder.decodeValueLength(byteIter)
815 #CHECK: this is being very leniet, based on real-world tests (specs don't mention this):
816 # valueLength = Decoder.decodeIntegerValue(byteIter)
818 # Read parameters, etc, until <valueLength> is reached
819 ctFieldBytes = array.array('B')
820 for i in range(valueLength):
821 ctFieldBytes.append(byteIter.next())
822 # contentTypeIter = iter(ctFieldBytes)
823 ctIter = PreviewIterator(ctFieldBytes)
824 # Now, decode all the bytes read
825 mediaType = Decoder.decodeMediaType(ctIter)
826 # Decode the included paramaters (if any)
830 parameter, value = Decoder.decodeParameter(ctIter)
831 parameters[parameter] = value
832 except StopIteration:
834 return (mediaType, parameters)
837 def decodeParameter(byteIter):
838 """ From [5], section 8.4.2.4:
839 Parameter = Typed-parameter | Untyped-parameter
841 @return: The name of the parameter, and its value, in the format:
842 (<parameter name>, <parameter value>)
846 parameter, value = Decoder.decodeTypedParameter(byteIter)
848 parameter, value = Decoder.decodeUntypedParameter(byteIter)
849 return (parameter, value)
852 def decodeTypedParameter(byteIter):
853 """ From [5], section 8.4.2.4:
854 C{Typed-parameter = Well-known-parameter-token Typed-value}
855 The actual expected type of the value is implied by the well-known
858 @note: This is used in decoding parameters; see C{decodeParameter}
860 @return: The name of the parameter, and its value, in the format:
861 (<parameter name>, <parameter value>)
864 parameterToken, expectedValueType = Decoder.decodeWellKnownParameter(byteIter)
867 # Split the iterator; sometimes the exec call seems to mess up with itertools if this not done here
868 # (to replicate: trace the program from here to decodeShortInteger(); the itertools.tee command there
869 # doesn't copy the iterator as it should - it creates pointers to the same memory)
870 #byteIter, execIter = itertools.tee(byteIter)
871 exec 'typedValue = Decoder.decode%s(byteIter)' % expectedValueType
872 except DecodeError, msg:
873 raise DecodeError, 'Could not decode Typed-parameter: %s' % msg
875 print 'A fatal error occurred, probably due to an unimplemented decoding operation'
877 return (parameterToken, typedValue)
880 def decodeUntypedParameter(byteIter):
881 """ From [5], section 8.4.2.4:
882 C{Untyped-parameter = Token-text Untyped-value}
883 The type of the value is unknown, but it shall be encoded as an
884 integer, if that is possible.
886 @note: This is used in decoding parameters; see C{decodeParameter}
888 @return: The name of the parameter, and its value, in the format:
889 (<parameter name>, <parameter value>)
892 parameterToken = Decoder.decodeTokenText(byteIter)
893 parameterValue = Decoder.decodeUntypedValue(byteIter)
894 return (parameterToken, parameterValue)
897 def decodeUntypedValue(byteIter):
898 """ From [5], section 8.4.2.4:
899 Untyped-value = Integer-value | Text-value
901 @note: This is used in decoding parameter values; see
902 C{decodeUntypedParameter}
903 @return: The decoded untyped-value
907 value = Decoder.decodeIntegerValue(byteIter)
909 value = Decoder.decodeTextValue(byteIter)
913 def decodeWellKnownParameter(byteIter, encodingVersion='1.2'):
914 """ Decodes the name and expected value type of a parameter of (for
915 example) a "Content-Type" header entry, taking into account the WSP
916 short form (assigned numbers) of well-known parameter names, as
917 specified in section 8.4.2.4 and table 38 of [5].
919 From [5], section 8.4.2.4:
920 Well-known-parameter-token = Integer-value
921 The code values used for parameters are specified in [5], table 38
923 @raise ValueError: The specified encoding version is invalid.
925 @raise DecodeError: This is raised if the integer value representing
926 the well-known parameter name cannot be decoded
927 correctly, or the well-known paramter token value
928 could not be found in the table of assigned
930 If this exception is raised, the iterator passed
931 as C{byteIter} is not modified.
933 @param encodingVersion: The WSP encoding version to use. This defaults
934 to "1.2", but may be "1.1", "1.2", "1.3" or
935 "1.4" (see table 39 in [5] for details).
936 @type encodingVersion: str
938 @return: the decoded parameter name, and its expected value type, in
939 the format (<parameter name>, <expected type>)
942 decodedParameterName = ''
944 # byteIter, localIter = itertools.tee(byteIter)
946 # wkParameterValue = Decoder.decodeIntegerValue(localIter)
947 wkParameterValue = Decoder.decodeIntegerValue(byteIter)
949 raise DecodeError, 'Invalid well-known parameter token: could not read integer value representing it'
951 wkParameters = WSPEncodingAssignments.wellKnownParameters(encodingVersion)
952 if wkParameterValue in wkParameters:
953 decodedParameterName, expectedValue = wkParameters[wkParameterValue]
954 # Only iterate the main iterator now that everything is ok
957 #If this is reached, the parameter isn't a WSP well-known one
958 raise DecodeError, 'Invalid well-known parameter token: could not find in table of assigned numbers (encoding version %s)' % encodingVersion
959 return (decodedParameterName, expectedValue)
961 #TODO: somehow this should be more dynamic; we need to know what type is EXPECTED (hence the TYPED value)
963 def decodeTypedValue(byteIter):
964 """ From [5], section 8.4.2.4:
965 Typed-value = Compact-value | Text-value
966 In addition to the expected type, there may be no value.
967 If the value cannot be encoded using the expected type, it shall be
970 @note This is used in decoding parameters, see C{decodeParameter()}
972 @return: The decoded Parameter Typed-value
977 typedValue = Decoder.decodeCompactValue(byteIter)
980 typedValue = Decoder.decodeTextValue(byteIter)
982 raise DecodeError, 'Could not decode the Parameter Typed-value'
985 #TODO: somehow this should be more dynamic; we need to know what type is EXPECTED
987 def decodeCompactValue(byteIter):
988 """ From [5], section 8.4.2.4:
989 Compact-value = Integer-value | Date-value | Delta-seconds-value
990 | Q-value | Version-value | Uri-value
992 @raise DecodeError: Failed to decode the Parameter Compact-value;
993 if this happens, C{byteIter} is unmodified
995 @note This is used in decoding parameters, see C{decodeTypeValue()}
999 # First, see if it's an integer value
1000 # This solves the checks for: Integer-value, Date-value, Delta-seconds-value, Q-value, Version-value
1001 compactValue = Decoder.decodeIntegerValue(byteIter)
1004 # Try parsing it as a Uri-value
1005 compactValue = Decoder.decodeUriValue(byteIter)
1007 raise DecodeError, 'Could not decode Parameter Compact-value'
1010 #TODO: the string output from this should be in the MMS format..?
1012 def decodeDateValue(byteIter):
1013 """ From [5], section 8.4.2.3:
1014 Date-value = Long-integer
1015 The encoding of dates shall be done in number of seconds from
1016 1970-01-01, 00:00:00 GMT.
1018 @raise DecodeError: This method uses C{decodeLongInteger}, and thus
1019 raises this under the same conditions.
1021 @return The date, in a format such as: C{Tue Nov 27 16:12:21 2007}
1025 return time.ctime(Decoder.decodeLongInteger(byteIter))
1028 def decodeDeltaSecondsValue(byteIter):
1029 """ From [5], section 8.4.2.3:
1030 Delta-seconds-value = Integer-value
1031 @raise DecodeError: This method uses C{decodeIntegerValue}, and thus
1032 raises this under the same conditions.
1033 @return the decoded delta-seconds-value
1036 return Decoder.decodeIntegerValue(byteIter)
1039 def decodeQValue(byteIter):
1040 """ From [5], section 8.4.2.1:
1041 The encoding is the same as in Uintvar-integer, but with restricted
1042 size. When quality factor 0 and quality factors with one or two
1043 decimal digits are encoded, they shall be multiplied by 100 and
1044 incremented by one, so that they encode as a one-octet value in
1045 range 1-100, ie, 0.1 is encoded as 11 (0x0B) and 0.99 encoded as
1046 100 (0x64). Three decimal quality factors shall be multiplied with
1047 1000 and incremented by 100, and the result shall be encoded as a
1048 one-octet or two-octet uintvar, eg, 0.333 shall be encoded as 0x83 0x31.
1049 Quality factor 1 is the default value and shall never be sent.
1051 @return: The decode quality factor (Q-value)
1055 qValueInt = Decoder.decodeUintvar(byteIter)
1056 #TODO: limit the amount of decimal points
1058 qValue = float(qValueInt - 100) / 1000.0
1060 qValue = float(qValueInt - 1) / 100.0
1065 def decodeVersionValue(byteIter):
1066 """ Decodes the version-value. From [5], section 8.4.2.3:
1067 Version-value = Short-integer | Text-string
1069 @return: the decoded version value in the format, usually in the
1070 format: "<major_version>.<minor_version>"
1075 byteValue = Decoder.decodeShortInteger(byteIter)
1076 major = (byteValue & 0x70) >> 4
1077 minor = byteValue & 0x0f
1078 version = '%d.%d' % (major, minor)
1080 version = Decoder.decodeTextString(byteIter)
1084 def decodeUriValue(byteIter):
1085 """ Stub for Uri-value decoding; this is a wrapper to C{decodeTextString} """
1086 return Decoder.decodeTextString(byteIter)
1089 def decodeTextValue(byteIter):
1090 """ Stub for Parameter Text-value decoding.
1091 From [5], section 8.4.2.3:
1092 Text-value = No-value | Token-text | Quoted-string
1094 This is used when decoding parameter values; see C{decodeTypedValue()}
1096 @return: The decoded Parameter Text-value
1101 textValue = Decoder.decodeTokenText(byteIter)
1104 textValue = Decoder.decodeQuotedString(byteIter)
1106 # Ok, so it's a "No-value"
1111 def decodeNoValue(byteIter):
1112 """ Basically verifies that the byte pointed to by C{byteIter.next()}
1115 @note: If successful, this function will move C{byteIter} one byte
1118 @raise DecodeError: If 0x00 is not found; C{byteIter} is not modified
1121 @return: No-value, which is 0x00
1124 byteIter, localIter = byteIter.next()
1125 if localIter.next() != 0x00:
1126 raise DecodeError, 'Expected No-value'
1132 def decodeAcceptValue(byteIter):
1133 """ From [5], section 8.4.2.7:
1134 Accept-value = Constrained-media | Accept-general-form
1135 Accept-general-form = Value-length Media-range [Accept-parameters]
1136 Media-range = (Well-known-media | Extension-Media) *(Parameter)
1137 Accept-parameters = Q-token Q-value *(Accept-extension)
1138 Accept-extension = Parameter
1139 Q-token = <Octet 128>
1141 @note: most of these things are currently decoded, but discarded (e.g
1142 accept-parameters); we only return the media type
1144 @raise DecodeError: The decoding failed. C{byteIter} will not be
1145 modified in this case.
1146 @return the decoded Accept-value (media/content type)
1150 # Try to use Constrained-media encoding
1152 acceptValue = Decoder.decodeConstrainedMedia(byteIter)
1154 # ...now try Accept-general-form
1155 valueLength = Decoder.decodeValueLength(byteIter)
1157 media = Decoder.decodeWellKnownMedia(byteIter)
1159 media = Decoder.decodeExtensionMedia(byteIter)
1160 # Check for the Q-Token (to see if there are Accept-parameters)
1161 if byteIter.preview() == 128:
1163 qValue = Decoder.decodeQValue(byteIter)
1165 acceptExtension = Decoder.decodeParameter(byteIter)
1167 # Just set an empty iterable
1168 acceptExtension = []
1169 byteIter.resetPreview()
1174 def decodePragmaValue(byteIter):
1175 """ Defined in [5], section 8.4.2.38:
1177 Pragma-value = No-cache | (Value-length Parameter)
1179 From [5], section 8.4.2.15:
1181 No-cache = <Octet 128>
1183 @raise DecodeError: The decoding failed. C{byteIter} will not be
1184 modified in this case.
1185 @return: the decoded Pragma-value, in the format:
1186 (<parameter name>, <parameter value>)
1189 byte = byteIter.preview()
1190 if byte == 0x80: # No-cache
1192 #TODO: Not sure if this parameter name (or even usage) is correct
1193 parameterName = 'Cache-control'
1194 parameterValue = 'No-cache'
1196 byteIter.resetPreview()
1197 valueLength = Decoder.decodeValueLength(byteIter)
1198 parameterName, parameterValue = Decoder.decodeParameter(byteIter)
1199 return parameterName, parameterValue
1202 def decodeWellKnownCharset(byteIter):
1203 """ From [5], section 8.4.2.8:
1204 C{Well-known-charset = Any-charset | Integer-value}
1205 It is encoded using values from "Character Set Assignments" table.
1206 C{Any-charset = <Octet 128>}
1207 Equivalent to the special RFC2616 charset value "*"
1210 # Look for the Any-charset value
1211 byte = byteIter.preview()
1212 byteIter.resetPreview()
1215 decodcedCharSet = '*'
1217 charSetValue = Decoder.decodeIntegerValue(byteIter)
1218 if charSetValue in WSPEncodingAssignments.wkCharSets:
1219 decodedCharSet = WSPEncodingAssignments.wkCharSets[charSetValue]
1221 # This charset is not in our table... so just use the value (at least for now)
1222 decodedCharSet = str(charSetValue)
1223 return decodedCharSet
1226 def decodeWellKnownHeader(byteIter):
1227 """ From [5], section 8.4.2.6:
1228 C{Well-known-header = Well-known-field-name Wap-value}
1229 C{Well-known-field-name = Short-integer}
1230 C{Wap-value = <many different headers value, most not implemented>}
1232 @todo: Currently, "Wap-value" is decoded as a Text-string in most cases
1234 @return: The header name, and its value, in the format:
1235 (<str:header_name>, <str:header_value>)
1238 decodedHeaderFieldName = ''
1239 hdrFieldValue = Decoder.decodeShortInteger(byteIter)
1240 hdrFields = WSPEncodingAssignments.headerFieldNames()
1241 #TODO: *technically* this can fail, but then we have already read a byte... should fix?
1242 if hdrFieldValue in range(len(hdrFields)):
1243 decodedHeaderFieldName = hdrFields[hdrFieldValue]
1245 raise DecodeError, 'Invalid Header Field value: %d' % hdrFieldValue
1246 #TODO: make this flow better, and implement it in decodeApplicationHeader also
1247 # Currently we decode most headers as TextStrings, except where we have a specific decoding algorithm implemented
1248 if decodedHeaderFieldName in WSPEncodingAssignments.hdrFieldEncodings:
1249 wapValueType = WSPEncodingAssignments.hdrFieldEncodings[decodedHeaderFieldName]
1251 exec 'decodedValue = Decoder.decode%s(byteIter)' % wapValueType
1252 except DecodeError, msg:
1253 raise DecodeError, 'Could not decode Wap-value: %s' % msg
1255 print 'An error occurred, probably due to an unimplemented decoding operation. Tried to decode header: %s' % decodedHeaderFieldName
1258 decodedValue = Decoder.decodeTextString(byteIter)
1259 return (decodedHeaderFieldName, decodedValue)
1262 def decodeApplicationHeader(byteIter):
1263 """ From [5], section 8.4.2.6:
1264 C{Application-header = Token-text Application-specific-value}
1266 From [4], section 7.1:
1267 C{Application-header = Token-text Application-specific-value}
1268 C{Application-specific-value = Text-string}
1270 @note: This is used when decoding generic WSP headers;
1271 see C{decodeHeader()}.
1272 @note: We follow [4], and decode the "Application-specific-value"
1275 @return: The application-header, and its value, in the format:
1276 (<str:application_header>, <str:application_specific_value>)
1280 appHeader = Decoder.decodeTokenText(byteIter)
1281 #FNA: added for brute-forcing
1283 appHeader = Decoder.decodeTextString(byteIter)
1284 #appSpecificValue = Decoder.decodeTextString(byteIter)
1286 appSpecificValue = Decoder.decodeWellKnownHeader(byteIter)
1288 appSpecificValue = Decoder.decodeTextString(byteIter)
1289 return (appHeader, appSpecificValue)
1292 def decodeHeader(byteIter):
1293 """ Decodes a WSP header entry
1295 From [5], section 8.4.2.6:
1296 C{Header = Message-header | Shift-sequence}
1297 C{Message-header = Well-known-header | Application-header}
1298 C{Well-known-header = Well-known-field-name Wap-value}
1299 C{Application-header = Token-text Application-specific-value}
1301 @note: "Shift-sequence" encoding has not been implemented
1302 @note: Currently, almost all header values are treated as text-strings
1304 @return: The decoded headername, and its value, in the format:
1305 (<str:header_name>, <str:header_value>)
1310 # First try decoding the header as a well-known-header
1312 header, value = Decoder.decodeWellKnownHeader(byteIter)
1314 # ...now try Application-header encoding
1315 header, value = Decoder.decodeApplicationHeader(byteIter)
1316 return (header, value)
1320 """ A WSP Data unit decoder """
1323 #def encodeUint8(uint):
1324 # """ Encodes an 8-bit unsigned integer
1326 # @param uint: The integer to encode
1327 # @type byteIteror: int
1329 # @return: the encoded Uint8, as a sequence of bytes
1332 # # Make the byte unsigned
1333 # return [uint & 0xff]
1337 def encodeUintvar(uint):
1338 """ Variable Length Unsigned Integer encoding algorithm
1340 This binary-encodes the given unsigned integer number as specified
1341 in section 8.1.2 of [5]. Basically, each encoded byte has the
1342 following structure::
1346 | 7 bits (actual data)
1350 The uint is split into 7-bit segments, and the "continue bit" of each
1351 used octet is set to '1' to indicate more is to follow; the last used
1352 octet's "continue bit" is set to 0.
1354 @return: the binary-encoded Uintvar, as a list of byte values
1358 # Since this is the lowest entry, we do not set the continue bit to 1
1359 uintVar.append(uint & 0x7f)
1361 # ...but for the remaining octets, we have to
1363 uintVar.insert(0, 0x80 | (uint & 0x7f))
1368 def encodeTextString(string):
1369 """ Encodes a "Text-string" value.
1371 This follows the basic encoding rules specified in [5], section
1374 @param string: The text string to encode
1377 @return: the null-terminated, binary-encoded version of the
1378 specified Text-string, as a list of byte values
1382 if(string.__class__ == int):
1383 string = str(string)
1386 encodedString.append(ord(char))
1387 encodedString.append(0x00)
1388 return encodedString
1391 def encodeQuotedString(string):
1392 """ Encodes a "Quoted-string" value.
1394 This follows the basic encoding rules specified in [5], section
1397 @param string: The text string to encode
1400 @return: the null-terminated, binary-encoded version of the
1401 specified Text-string, as a list of byte values
1405 if(string.__class__ == int):
1406 string = str(string)
1407 encodedString.append(ord('"'))
1409 encodedString.append(ord(char))
1410 encodedString.append(0x00)
1411 return encodedString
1415 def encodeShortInteger(integer):
1416 """ Encodes the specified short-integer value
1418 The encoding for a long integer is specified in [5], section 8.4.2.1:
1419 C{Short-integer = OCTET}
1420 Integers in range 0-127 shall be encoded as a one octet value with
1421 the most significant bit set to one (1xxx xxxx) and with the value
1422 in the remaining least significant bits.
1424 @param Integer: The short-integer value to encode
1427 @raise EncodeError: Not a valid short-integer; the integer must be in
1430 @return: The encoded short integer, as a list of byte values
1433 if integer < 0 or integer > 127:
1434 raise EncodeError, 'Short-integer value must be in range 0-127: %d' % integer
1436 # Make sure the most significant bit is set
1437 byte = 0x80 | integer
1438 encodedInteger.append(byte)
1439 return encodedInteger
1442 def encodeLongInteger(integer):
1443 """ Encodes a Long-integer value
1445 The encoding for a long integer is specified in [5], section 8.4.2.1;
1446 for a description of this encoding scheme, see
1447 C{wsp.Decoder.decodeLongIntger()}.
1450 From [5], section 8.4.2.2:
1451 Long-integer = Short-length Multi-octet-integer
1452 Short-length = <Any octet 0-30>
1454 @raise EncodeError: <integer> is not of type "int"
1456 @param integer: The integer value to encode
1459 @return: The encoded Long-integer, as a sequence of byte values
1462 if type(integer) != int:
1463 raise EncodeError, '<integer> must be of type "int"'
1466 # Encode the Multi-octect-integer
1468 byte = 0xff & longInt
1469 encodedLongInt.append(byte)
1470 longInt = longInt >> 8
1471 # Now add the SHort-length value, and make sure it's ok
1472 shortLength = len(encodedLongInt)
1473 if shortLength > 30:
1474 raise EncodeError, 'Cannot encode Long-integer value: Short-length is too long; should be in octet range 0-30'
1475 encodedLongInt.insert(0, shortLength)
1476 return encodedLongInt
1479 def encodeVersionValue(version):
1480 """ Encodes the version-value. From [5], section 8.4.2.3:
1481 Version-value = Short-integer | Text-string
1483 Example: An MMS version of "1.0" consists of a major version of 1 and a
1484 minor version of 0, and would be encoded as 0x90. However, a version
1485 of "1.2.4" would be encoded as the Text-string "1.2.4".
1487 @param version: The version number to encode, e.g. "1.0"
1490 @raise TypeError: The specified version value was not of type C{str}
1492 @return: the encoded version value, as a list of byte values
1495 if type(version) != str:
1496 raise TypeError, 'Parameter must be of type "str"'
1497 encodedVersionValue = []
1498 # First try short-integer encoding
1500 if len(version.split('.')) <= 2:
1501 majorVersion = int(version.split('.')[0])
1502 if majorVersion < 1 or majorVersion > 7:
1503 raise ValueError, 'Major version must be in range 1-7'
1504 major = majorVersion << 4
1505 if len(version.split('.')) == 2:
1506 minorVersion = int(version.split('.')[1])
1507 if minorVersion < 0 or minorVersion > 14:
1508 raise ValueError, 'Minor version must be in range 0-14'
1511 minor = minorVersion
1512 encodedVersionValue = Encoder.encodeShortInteger(major|minor)
1514 # The value couldn't be encoded as a short-integer; use a text-string instead
1515 encodedVersionValue = Encoder.encodeTextString(version)
1516 return encodedVersionValue
1519 def encodeMediaType(contentType):
1520 """ Encodes the specified MIME content type ("Media-type" value)
1522 From [5], section 8.2.4.24:
1523 Media-type = (Well-known-media | Extension-Media) *(Parameter)
1525 "Well-known-media" takes into account the WSP short form of well-known
1526 content types, as specified in section 8.4.2.24 and table 40 of [5].
1528 @param contentType: The MIME content type to encode
1529 @type contentType: str
1531 @return: The binary-encoded content type, as a list of (integer) byte
1535 encodedContentType = []
1536 if contentType in WSPEncodingAssignments.wkContentTypes:
1537 # Short-integer encoding
1538 encodedContentType.extend(Encoder.encodeShortInteger(WSPEncodingAssignments.wkContentTypes.index(contentType)))
1540 encodedContentType.extend(Encoder.encodeTextString(contentType))
1541 return encodedContentType
1544 def encodeParameter(parameterName, parameterValue, encodingVersion='1.4'):
1545 """ Binary-encodes the name of a parameter of (for example) a
1546 "Content-Type" header entry, taking into account the WSP short form of
1547 well-known parameter names, as specified in section 8.4.2.4 and table
1550 From [5], section 8.4.2.4:
1551 C{Parameter = Typed-parameter | Untyped-parameter}
1552 C{Typed-parameter = Well-known-parameter-token Typed-value}
1553 C{Untyped-parameter = Token-text Untyped-value}
1554 C{Untyped-value = Integer-value | Text-value}
1556 @param parameterName: The name of the parameter to encode
1557 @type parameterName: str
1558 @param parameterValue: The value of the parameter
1559 @type parameterValue: str or int
1561 @param encodingVersion: The WSP encoding version to use. This defaults
1562 to "1.2", but may be "1.1", "1.2", "1.3" or
1563 "1.4" (see table 38 in [5] for details).
1564 @type encodingVersion: str
1566 @raise ValueError: The specified encoding version is invalid.
1568 @return: The binary-encoded parameter name, as a list of (integer)
1572 wkParameters = WSPEncodingAssignments.wellKnownParameters(encodingVersion)
1573 encodedParameter = []
1574 # Try to encode the parameter using a "Typed-parameter" value
1575 #print wkParameters.keys()
1576 #wkParamNumbers = wkParameters.keys().sort(reverse=True)
1577 wkParamNumbers = wkParameters.keys()
1578 #print wkParamNumbers
1579 #print parameterName, parameterValue
1580 #print wkParamNumbers
1581 for assignedNumber in wkParamNumbers:
1582 if wkParameters[assignedNumber][0] == parameterName:
1583 # Ok, it's a Typed-parameter; encode the parameter name
1584 if parameterName == 'Type':
1586 # TODO: remove this ugly hack
1587 encodedParameter.extend(Encoder.encodeShortInteger(assignedNumber))
1589 encodedParameter.extend(Encoder.encodeShortInteger(assignedNumber))
1590 # ...and now the value
1591 expectedType = wkParameters[assignedNumber][1]
1593 if parameterName == 'Type':
1596 exec 'encodedParameter.extend(Encoder.encode%s(parameterValue))' % expectedType
1598 exec 'encodedParameter.extend(Encoder.encode%s(parameterValue))' % 'ConstrainedEncoding'
1601 exec 'encodedParameter.extend(Encoder.encode%s(parameterValue))' % expectedType
1602 except EncodeError, msg:
1603 raise EncodeError, 'Error encoding parameter value: %s' % msg
1605 print 'A fatal error occurred, probably due to an unimplemented encoding operation'
1608 # See if the "Typed-parameter" encoding worked
1609 if len(encodedParameter) == 0:
1610 # ...it didn't. Use "Untyped-parameter" encoding
1611 encodedParameter.extend(Encoder.encodeTokenText(parameterName))
1613 # First try to encode the untyped-value as an integer
1615 value = Encoder.encodeIntegerValue(parameterValue)
1617 value = Encoder.encodeTextString(parameterValue)
1618 encodedParameter.extend(value)
1619 return encodedParameter
1622 def encodeWellKnownCharset(value):
1623 #print "encoding well known charset:", value
1624 wkCharsets = WSPEncodingAssignments.wkCharSets
1625 wkCharsetNumber = wkCharsets.keys()
1626 for assignedNumber in wkCharsetNumber:
1627 if wkCharsets[assignedNumber] == value:
1629 # return assignedNumber
1630 return Encoder.encodeLongInteger(assignedNumber)
1631 #return Encoder.encodeTextString(value)
1633 #TODO: check up on the encoding/decoding of Token-text, in particular, how does this differ from text-string? does it have 0x00 at the end?
1635 def encodeTokenText(text):
1636 """ From [5], section 8.4.2.1:
1637 Token-text = Token End-of-string
1639 @raise EncodeError: Specified text cannot be encoding as a token
1641 @return: The encoded token string, as a list of byte values
1644 separators = (11, 32, 40, 41, 44, 47, 58, 59, 60, 61, 62, 63, 64, 91,
1647 for char in separators:
1648 if chr(char) in text:
1649 raise EncodeError, 'Char "%s" in text string; cannot encode as Token-text' % chr(char)
1650 encodedToken = Encoder.encodeTextString(text)
1654 def encodeIntegerValue(integer):
1655 """ Encodes an integer value
1657 From [5], section 8.4.2.3:
1658 Integer-Value = Short-integer | Long-integer
1660 This function will first try to encode the specified integer value
1661 into a short-integer, and failing that, will encode into a
1664 @param integer: The integer to encode
1667 @raise EncodeError: The <integer> parameter is not of type C{int}
1669 @return: The encoded integer value, as a list of byte values
1672 if type(integer) != int:
1673 raise EncodeError, '<integer> must be of type "int"'
1675 # First try and see if it's a short-integer
1677 encodedInteger = Encoder.encodeShortInteger(integer)
1679 encodedInteger = Encoder.encodeLongInteger(integer)
1680 return encodedInteger
1683 def encodeTextValue(text):
1684 """ Stub for encoding Text-values; this is equivalent to
1685 C{encodeTextString} """
1686 return Encoder.encodeTextString(text)
1689 def encodeNoValue(value=None):
1690 """ Encodes a No-value, which is 0x00
1692 @note: This function mainly exists for use by automatically-selected
1693 encoding routines (see C{encodeParameter()} for an example.
1695 @param value: This value is ignored; it is present so that this
1696 method complies with the format of the other C{encode}
1699 @return: A list containing a single "No-value", which is 0x00
1705 def encodeHeader(headerFieldName, headerValue):
1706 """ Encodes a WSP header entry, and its value
1708 From [5], section 8.4.2.6:
1709 C{Header = Message-header | Shift-sequence}
1710 C{Message-header = Well-known-header | Application-header}
1711 C{Well-known-header = Well-known-field-name Wap-value}
1712 C{Application-header = Token-text Application-specific-value}
1714 @note: "Shift-sequence" encoding has not been implemented
1715 @note: Currently, almost all header values are encoded as text-strings
1717 @return: The encoded header, and its value, as a sequence of byte
1722 # First try encoding the header name as a "well-known-header"...
1723 wkHdrFields = WSPEncodingAssignments.headerFieldNames()
1724 if headerFieldName in wkHdrFields:
1725 headerFieldValue = Encoder.encodeShortInteger(wkHdrFields.index(headerFieldName))
1726 encodedHeader.extend(headerFieldValue)
1728 # ...otherwise, encode it as an "application header"
1729 encodedHeaderName = Encoder.encodeTokenText(headerFieldName)
1730 encodedHeader.extend(encodedHeaderName)
1732 #TODO: make this flow better (see also Decoder.decodeHeader)
1733 # most header values are encoded as TextStrings, except where we have a specific Wap-value encoding implementation
1734 if headerFieldName in WSPEncodingAssignments.hdrFieldEncodings:
1735 wapValueType = WSPEncodingAssignments.hdrFieldEncodings[headerFieldName]
1737 exec 'encodedHeader.extend(Encoder.encode%s(headerValue))' % wapValueType
1738 except EncodeError, msg:
1739 raise EncodeError, 'Error encoding Wap-value: %s' % msg
1741 print 'A fatal error occurred, probably due to an unimplemented encoding operation'
1744 encodedHeader.extend(Encoder.encodeTextString(headerValue))
1745 return encodedHeader
1748 def encodeContentTypeValue(mediaType, parameters):
1749 """ Encodes a content type, and its parameters
1751 From [5], section 8.4.2.24:
1752 C{Content-type-value = Constrained-media | Content-general-form}
1754 The short form of the Content-type-value MUST only be used when the
1755 well-known media is in the range of 0-127 or a text string. In all
1756 other cases the general form MUST be used.
1758 @return: The encoded Content-type-value (including parameters, if
1759 any), as a sequence of bytes
1762 encodedContentTypeValue = []
1763 # First try do encode it using Constrained-media encoding
1765 if len(parameters) > 0:
1766 raise EncodeError, 'Need to use Content-general-form for parameters'
1768 encodedContentTypeValue = Encoder.encodeConstrainedMedia(mediaType)
1770 # Try the general form
1771 encodedContentTypeValue = Encoder.encodeContentGeneralForm(mediaType, parameters)
1772 return encodedContentTypeValue
1775 def encodeConstrainedMedia(mediaType):
1776 """ From [5], section 8.4.2.7:
1777 Constrained-media = Constrained-encoding
1778 It is encoded using values from the "Content Type Assignments" table.
1780 @param mediaType: The media type to encode
1781 @type mediaType: str
1783 @raise EncodeError: Media value is unsuitable for Constrained-encoding
1785 @return: The encoded media type, as a sequence of bytes
1788 encodedMediaType = []
1790 # See if this value is in the table of well-known content types
1791 if mediaType in WSPEncodingAssignments.wkContentTypes:
1792 mediaValue = WSPEncodingAssignments.wkContentTypes.index(mediaType)
1794 mediaValue = mediaType
1795 encodedMediaType = Encoder.encodeConstrainedEncoding(mediaValue)
1796 return encodedMediaType
1799 def encodeConstrainedEncoding(value):
1800 """ Constrained-encoding = Extension-Media --or-- Short-integer
1801 This encoding is used for token values, which have no well-known
1802 binary encoding, or when the assigned number of the well-known
1803 encoding is small enough to fit into Short-integer.
1805 @param value: The value to encode
1806 @type value: int or str
1808 @raise EncodeError: <value> cannot be encoded as a
1809 Constrained-encoding sequence
1811 @return: The encoded constrained-encoding token value, as a sequence
1816 if type(value) == int:
1817 # First try and encode the value as a short-integer
1818 encodedValue = Encoder.encodeShortInteger(value)
1820 # Ok, it should be Extension-Media then
1822 encodedValue = Encoder.encodeExtensionMedia(value)
1825 raise EncodeError, 'Cannot encode %s as a Constrained-encoding sequence' % str(value)
1829 def encodeExtensionMedia(mediaValue):
1830 """ From [5], section 8.4.2.1:
1831 Extension-media = *TEXT End-of-string
1832 This encoding is used for media values, which have no well-known
1835 @param mediaValue: The media value (string) to encode
1836 @type mediaValue: str
1838 @raise EncodeError: The value cannot be encoded as TEXT; probably it
1839 starts with/contains an invalid character
1841 @return: The encoded media type value, as a sequence of bytes
1844 encodedMediaValue = ''
1845 if type(mediaValue) != str:
1847 mediaValue = str(mediaValue)
1849 raise EncodeError, 'Invalid Extension-media: Cannot convert value to text string'
1850 char = mediaValue[0]
1851 if ord(char) < 32 or ord(char) == 127:
1852 raise EncodeError, 'Invalid Extension-media: TEXT starts with invalid character: %s' % ord(char)
1853 encodedMediaValue = Encoder.encodeTextString(mediaValue)
1854 return encodedMediaValue
1857 def encodeContentGeneralForm(mediaType, parameters):
1858 """ From [5], section 8.4.2.24:
1859 Content-general-form = Value-length Media-type
1861 @note Used in decoding Content-type fields and their parameters;
1862 see C{decodeContentTypeValue}
1864 @note: Used by C{decodeContentTypeValue()}
1866 @return: The encoded Content-general-form, as a sequence of bytes
1869 encodedContentGeneralForm = []
1870 encodedMediaType = []
1871 encodedParameters = []
1872 # Encode the actual content type
1873 encodedMediaType = Encoder.encodeMediaType(mediaType)
1874 # Encode all parameters
1875 for paramName in parameters:
1877 #print paramName, parameters[paramName]
1878 encodedParameters.extend(Encoder.encodeParameter(paramName, parameters[paramName]))
1879 valueLength = len(encodedMediaType) + len(encodedParameters)
1880 encodedValueLength = Encoder.encodeValueLength(valueLength)
1881 encodedContentGeneralForm.extend(encodedValueLength)
1882 encodedContentGeneralForm.extend(encodedMediaType)
1883 encodedContentGeneralForm.extend(encodedParameters)
1884 return encodedContentGeneralForm
1887 def encodeValueLength(length):
1888 """ Encodes the specified length value as a value length indicator
1890 "Value length" is used to indicate the length of a value to follow, as
1891 used in the C{Content-Type} header in the MMS body, for example.
1893 The encoding for a value length indicator is specified in [5],
1894 section 8.4.2.2, and follows the form::
1896 Value-length = [Short-length] --or-- [Length-quote] [Length]
1897 ^^^^^^ ^^^^^^ ^^^^^^
1898 1 byte 1 byte x bytes
1899 <Any octet 0-30> <Octet 31> Uintvar-integer
1901 @raise EncodeError: The ValueLength could not be encoded.
1903 @return: The encoded value length indicator, as a sequence of bytes
1906 encodedValueLength = []
1907 # Try and encode it as a short-length
1909 encodedValueLength = Encoder.encodeShortLength(length)
1911 # Encode it with a Length-quote and Uintvar
1912 encodedValueLength.append(31) # Length-quote
1913 encodedValueLength.extend(Encoder.encodeUintvar(length))
1914 return encodedValueLength
1917 def encodeShortLength(length):
1918 """ From [5], section 8.4.2.2:
1919 Short-length = <Any octet 0-30>
1921 @raise EmcodeError: The specified <length> cannot be encoded as a
1922 short-length value; it is not in octet range 0-30.
1924 @return The encoded short-length, as a sequence of bytes
1927 if length < 0 or length > 30:
1928 raise EncodeError, 'Cannot encode short-length; length should be in range 0-30'
1933 def encodeAcceptValue(acceptValue):
1934 """ From [5], section 8.4.2.7:
1935 Accept-value = Constrained-media | Accept-general-form
1936 Accept-general-form = Value-length Media-range [Accept-parameters]
1937 Media-range = (Well-known-media | Extension-Media) *(Parameter)
1938 Accept-parameters = Q-token Q-value *(Accept-extension)
1939 Accept-extension = Parameter
1940 Q-token = <Octet 128>
1942 @note: This implementation does not currently support encoding of
1943 "Accept-parameters".
1945 @param acceptValue: The Accept-value to encode (media/content type)
1946 @type acceptValue: str
1948 @raise EncodeError: The encoding failed.
1950 @return The encoded Accept-value, as a sequence of bytes
1953 encodedAcceptValue = []
1954 # Try to use Constrained-media encoding
1956 encodedAcceptValue = Encoder.encodeConstrainedMedia(acceptValue)
1958 # ...now try Accept-general-form
1960 encodedMediaRange = Encoder.encodeMediaType(acceptValue)
1961 except EncodeError, msg:
1962 raise EncodeError, 'Cannot encode Accept-value: %s' % msg
1963 valueLength = Encoder.encodeValueLength(len(encodedMediaRange))
1964 encodedAcceptValue = valueLength
1965 encodedAcceptValue.extend(encodedMediaRange)
1966 return encodedAcceptValue