+#!/usr/bin/python
########################################################################
##
## Copyright (C) 2009 MiM
##
## Contact: Handspring <xhealer@gmail.com>
##
-## AUTHOR:
+## AUTHOR: Alsor Zhou <alsor.zhou@gmail.com>
##
## This file is part of MiM Pinyin.
##
## along with Sigil. If not, see <http://www.gnu.org/licenses/>.
##
########################################################################
+
+import getopt
+import sys
+import os.path
+import zlib
+
+DEBUG = True
+
+# Global ERROR DEFINATION
+ERR_PARAMS = 2
+ERR_UNKOWN = 255
+
+def PRINT(v):
+ '''Print wrapper with debug function supported
+
+ Never use this function in production (always output) code '''
+ if DEBUG == True:
+ print v
+
+def license():
+ '''
+Copyright (C) 2009 MiM
+
+ Contact: Handspring <xhealer@gmail.com>
+
+ AUTHOR:
+
+This file is part of MiM Pinyin.
+
+This is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with Sigil. If not, see <http://www.gnu.org/licenses/>.
+ '''
+ print license.__doc__
+
+def usage():
+ '''converter.py [options] SRC [options...DEST]
+ -s SRC : specify dictionary source
+ -t DEST : save converted binary map into DEST
+ -c SRC : syntax check SRC, without converstion
+ -d DEST : generate dummy dictionary bin map
+
+ GNU long option style
+ --source SRC : same with '-s SRC'
+ --target DEST: same with '-t DEST'
+ --check SRC : same with '-c SRC'
+ --dummy DEST: same with '-d DEST'
+ '''
+ print usage.__doc__
+
+def version():
+ '''MiM pinyin dictionary converter version 0.0.1 Handspring <xhealer@gmail.com>'''
+ print version.__doc__
+
+# Target file segmentation layout
+tgt_delimitor = "\n"
+tgt_file_start_index = 0
+tgt_header_start_offset = 0
+tgt_st1_offset = 100
+tgt_st2_offset = 200
+tgt_table_a_offset = 300
+tgt_table_b_offset = 400
+
+tgt_global_position_ind = 0
+# Header definition
+# Example header fileds:
+# Fn:dictionary.bin\n
+# Ver:0.2\n
+# Authors:Jackson\n
+# ActiveChunkTableFlag:A\n
+# ChunkTableAOffset:300
+# ChunkTableBOffset:400
+# ChunkSize:65535\n
+# CRC32:\n
+tgt_header_delemitor_str = ":"
+tgt_header_fn_str = "Fn"
+tgt_header_version_str = "Ver"
+tgt_header_author_str = "Authors"
+tgt_header_actf_str = "ActiveChunkTableFlag"
+tgt_header_ctao_str = "ChunkTableAOffset"
+tgt_header_ctbo_str = "ChunkTableBOffset"
+tgt_header_chunk_size_str= "ChunkSize"
+tgt_header_crc_str = "CRC32"
+
+# Chunk Table offset
+tgt_ctable_flag_offset = 0
+tgt_ctable_flag_fld_siz = 2 # bytes
+tgt_ctable_chk_base_offset = 2
+tgt_ctable_chk_base_fld_size = 2 # bytes, 65535 maximize
+tgt_ctable_chk_acroyn_fld_size = 2
+tgt_ctable_chk_offset_fld_size = 2
+tgt_ctable_chk_size_fld_size = 2
+
+# Internal function definition
+def generate_header(fn, ver, authors, actf, ctao, ctbo, csize):
+ '''Generate target file header.
+ @param fn: file name
+ @param ver: dictionary version
+ @param authors: dictionary authors
+ @param actf: active chunk table flag (A/B)
+ @param ctao: chunk table A offset
+ @param ctbo: chunk table B offset
+ @param csize: chunk size (fixed)
+
+ @return header: header string with crc32 computed
+ '''
+ crc32 = None
+ header = None
+ header += tgt_header_fn_str + tgt_header_delemitor_str + fn + tgt_delimitor
+ header += tgt_header_version_str + tgt_header_delemitor_str + ver + tgt_delimitor
+ header += tgt_header_version_str + tgt_header_delemitor_str + authors + tgt_delimitor
+ crc32 = crc32(header); # FIXME: should we crc the timestamp?
+ header += tgt_header_version_str + tgt_header_delemitor_str + actf + tgt_delimitor
+ header += tgt_header_version_str + tgt_header_delemitor_str + ctao + tgt_delimitor
+ header += tgt_header_version_str + tgt_header_delemitor_str + ctbo + tgt_delimitor
+ header += tgt_header_version_str + tgt_header_delemitor_str + csize + tgt_delimitor
+ header += tgt_header_version_str + tgt_header_delemitor_str + crc32
+
+ PRINT(generate_header.__doc__)
+ return header
+
+def generate_st1():
+ '''Generate static table 1.
+ '''
+ PRINT(generate_st1.__doc__)
+
+def generate_st2():
+ '''Generate static table 2.
+ '''
+ PRINT(generate_st2.__doc__)
+
+def generate_ctable_a():
+ '''Chunk Table A generation.
+ Example chunk table:
+ 0------------2------------4--------6--------8------10-------12-------14----16
+ | Table flag | Chunk Base | Acroyn | Offset | Size | Acroyn | Offset | Size |
+ '''
+ PRINT(generate_ctable_a.__doc__)
+
+def generate_ctable_b():
+ '''Chunk Table B generation.
+ Example chunk table:
+ 0------------2------------4--------6--------8------10-------12-------14----16
+ | Table flag | Chunk Base | Acroyn | Offset | Size | Acroyn | Offset | Size |
+ '''
+ PRINT(generate_ctable_b.__doc__)
+
+def generate_dictionary():
+ '''
+ Normally, target data file have 2 dictionary map for data integrity. The
+ active map is used for memory holding, and reflash the inactivie map under
+ some mechanism.
+ '''
+ PRINT(generate_dictionary.__doc__)
+
+def gen_dummy_dict_binmap():
+ '''Generate dummy dictionary bin map.
+
+ '''
+ generate_header("dictionary.bin", "0.2", "Jackson", "A", 300, 400, 65535);
+ PRINT(gen_dummy_dict_binmap.__doc__)
+
+def convert(src, dest):
+ '''Convertion from original text format dictionary to binary map.
+
+ @param src : text format dictionary
+ @param dest: binary map dictionary
+
+ @return None
+ '''
+ PRINT(convert.__doc__)
+
+def check(src):
+ '''Check syntax format of orignal text format dictionary
+
+ @param src : text format dictionary
+
+ @return True without syntax error, False else.
+ '''
+ PRINT(check.__doc__)
+
+def main(argv):
+ '''Main business logic
+
+ @param argv : sys.argv[1:]
+ @return error code if any
+ '''
+
+ # handle parameter parse
+ valid_args = "hvVt:c:s:d"
+ valid_long_args = ["help", "version", "source", "target", "check", "dummy"]
+ src = None
+ dest = None
+
+ try:
+ opts, args = getopt.getopt(argv, valid_args, valid_long_args)
+ except getopt.GetoptError, err:
+ print str(err)
+ license()
+ usage()
+ sys.exit(ERR_PARAMS)
+ output = None
+ verbose = False
+ for o, a in opts:
+ if o in ("-s", "--source"):
+ if a == None:
+ assert False, "No dictionary source specified"
+ usage()
+ sys.exit(ERR_PARAMS)
+ # no dest specified, use same filename as src to store file
+ if dest == None:
+ basename = os.path.basename(src)
+ dest = os.path.splitext(basename)[0]
+ dest = os.path.join(dest, ".bin")
+ src = a
+ convert(src, dest)
+ elif o in ("-t", "--target"):
+ dest = a
+ elif o in ("-d", "--dummy"):
+ gen_dummy_dict_binmap()
+ elif o in ("-c", "--check"):
+ check(a)
+ elif o == "-v":
+ verbose = True
+ elif o in ("-h", "--help"):
+ usage()
+ sys.exit()
+ elif o in ("-V", "--version"):
+ version()
+ else:
+ usage()
+
+if __name__ == "__main__":
+ main(sys.argv[1:])