added module for decoding GSM-encoded strings
authorpycage <martin.grimme@gmail.com>
Wed, 6 Jan 2010 20:35:05 +0000 (20:35 +0000)
committerpycage <martin.grimme@gmail.com>
Wed, 6 Jan 2010 20:35:05 +0000 (20:35 +0000)
git-svn-id: file:///svnroot/ussd-widget/trunk@4 d197f4d6-dc93-42ad-8354-0da1f58e353f

ussd-common/src/usr/lib/python2.5/gsmdecode.py [new file with mode: 0644]

diff --git a/ussd-common/src/usr/lib/python2.5/gsmdecode.py b/ussd-common/src/usr/lib/python2.5/gsmdecode.py
new file mode 100644 (file)
index 0000000..4fc1392
--- /dev/null
@@ -0,0 +1,236 @@
+LANG_DE = 0x0
+LANG_EN = 0x1
+LANG_IT = 0x2
+LANG_FR = 0x3
+LANG_ES = 0x4
+LANG_NL = 0x5
+LANG_SE = 0x6
+LANG_DA = 0x7
+LANG_PO = 0x8
+LANG_FI = 0x9
+LANG_NO = 0xa
+LANG_GR = 0xb
+LANG_TR = 0xc
+LANG_UNSPECIFIED = 0xf
+
+
+GSM_DEFAULT_ALPHABET = [
+    u"@",
+    u"\u00a3",
+    u"$",
+    u"\u00a5",
+    u"\u00e8",
+    u"\u00e9",
+    u"\u00f9",
+    u"\u00ec",
+    u"\u00f2",
+    u"\u00c7",
+    u"\n",
+    u"\u00d8",
+    u"\u00f8",
+    u"\r",
+    u"\u00c5",
+    u"\u00e5",
+    
+    u"\u0394",
+    u"_",
+    u"\u03a6",
+    u"\u0393",
+    u"\u039b",
+    u"\u03a9",
+    u"\u03a0",
+    u"\u03a8",
+    u"\u03a3",
+    u"\u0398",
+    u"\u039e",
+    u" ",
+    u"\u00c6",
+    u"\u00e6",
+    u"\u00df",
+    u"\u00c9",
+    
+    u" ",
+    u"!",
+    u"\"",
+    u"#",
+    u"\u00a4",
+    u"%",
+    u"&",
+    u"'",
+    u"(",
+    u")",
+    u"*",
+    u"+",
+    u",",
+    u"-",
+    u".",
+    u"/",
+    
+    u"0",
+    u"1",
+    u"2",
+    u"3",
+    u"4",
+    u"5",
+    u"6",
+    u"7",
+    u"8",
+    u"9",
+    u":",
+    u";",
+    u"<",
+    u"=",
+    u">",
+    u"?",
+    
+    u"\u00a1",
+    u"A",
+    u"B",
+    u"C",
+    u"D",
+    u"E",
+    u"F",
+    u"G",
+    u"H",
+    u"I",
+    u"J",
+    u"K",
+    u"L",
+    u"M",
+    u"N",
+    u"O",
+    
+    u"P",
+    u"Q",
+    u"R",
+    u"S",
+    u"T",
+    u"U",
+    u"V",
+    u"W",
+    u"X",
+    u"Y",
+    u"Z",
+    u"\u00c4",
+    u"\u00d6",
+    u"\u00d1",
+    u"\u00dc",
+    u"ยง",
+
+    u"\u00bf",
+    u"a",
+    u"b",
+    u"c",
+    u"d",
+    u"e",
+    u"f",
+    u"g",
+    u"h",
+    u"i",
+    u"j",
+    u"k",
+    u"l",
+    u"m",
+    u"n",
+    u"o",
+
+    u"p",
+    u"q",
+    u"r",
+    u"s",
+    u"t",
+    u"u",
+    u"v",
+    u"w",
+    u"x",
+    u"y",
+    u"z",
+    u"\u00e4",
+    u"\u00f6",
+    u"\u00f1",
+    u"\u00fc",
+    u"\u00e0"
+]
+
+
+def decode(s, n):
+    """
+    Decodes the given string using the given cell broadcast data coding scheme.
+    
+    @param s: string to decode
+    @param n: GSM cell broadcast data coding scheme
+    @return: UTF-8 string
+    """
+
+    # separate into nibbles
+    hbits = (n & 0xf0) >> 4
+    lbits = (n & 0x0f)
+
+    if (hbits == 0x0):
+        # language
+        return _decode_language(s, lbits)
+
+    elif (0x1 <= hbits <= 0x3):
+        # reserved language
+        return s
+        
+    elif (0x4 <= hbits <= 0x7):
+        # general data coding indication
+        return _decode_general_data_coding(s, h, l)
+        
+    elif (0x8 <= hbits <= 0xe):
+        # reserved coding group
+        return s
+        
+    elif (hbits == 0xf):
+        # data coding / message handling
+        return s
+
+
+def _decode_language(s, lang):
+
+    return _decode_default_alphabet(s)
+
+
+def _decode_default_alphabet(s):
+    
+    # TODO: we really might have to do 7 bit character unpacking here
+    
+    # ought to be all in the 7 bit GSM character map
+    chars = [ _GSM_DEFAULT_ALPHABET[ord(c)] for c in s ]
+    u_str = "".join(chars)
+    return u_str.encode("utf-8")
+
+
+def _decode_hex(s):
+
+    return s.decode("hex")
+
+
+def _decode_usc2(s):
+
+    return s.decode("hex").decode("utf-16-be").encode("utf-8")
+
+
+def _decode_general_data_coding(s, h, l):
+
+    is_compressed = (h & 0x2)
+    
+    alphabet = (l & 0xc) >> 2
+    if (l == 0x0):
+        # default alphabet
+        return _decode_defaul_alphabet(s)
+        
+    elif (l == 0x1):
+        # 8 bit
+        # actually, encoding is user-defined, but let's assume hex'd ASCII
+        # for now
+        return _decode_hex(s)
+        
+    elif (l == 0x2):
+        # USC2 (16 bit, BE)
+        return _decode_usc2(s)
+    elif (l == 0x3):
+        # reserved
+        return s
+