2 * http://stardict.sourceforge.net
3 * Copyright (C) 2003-2003 Hu Zheng <huzheng_001@163.com>
4 * This file is a modify version of dictd-1.9.7's data.c
7 * Created: Tue Jul 16 12:45:41 1996 by faith@dict.org
8 * Revised: Sat Mar 30 10:46:06 2002 by faith@dict.org
9 * Copyright 1996, 1997, 1998, 2000, 2002 Rickard E. Faith (faith@dict.org)
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 3 of the License, or
15 * (at your option) any later version.
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Library General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
27 //#define HAVE_MMAP //it will defined in config.h. this can be done by configure.in with a AC_FUNC_MMAP.
47 #include "dictziplib.hpp"
51 #define BUFFERSIZE 10240
54 * Output buffer must be greater than or
55 * equal to 110% of input buffer size, plus
58 #define OUT_BUFFER_SIZE 0xffffL
60 #define IN_BUFFER_SIZE ((unsigned long)((double)(OUT_BUFFER_SIZE - 12) * 0.89))
62 /* For gzip-compatible header, as defined in RFC 1952 */
64 /* Magic for GZIP (rfc1952) */
65 #define GZ_MAGIC1 0x1f /* First magic byte */
66 #define GZ_MAGIC2 0x8b /* Second magic byte */
68 /* FLaGs (bitmapped), from rfc1952 */
69 #define GZ_FTEXT 0x01 /* Set for ASCII text */
70 #define GZ_FHCRC 0x02 /* Header CRC16 */
71 #define GZ_FEXTRA 0x04 /* Optional field (random access index) */
72 #define GZ_FNAME 0x08 /* Original name */
73 #define GZ_COMMENT 0x10 /* Zero-terminated, human-readable comment */
74 #define GZ_MAX 2 /* Maximum compression */
75 #define GZ_FAST 4 /* Fasted compression */
77 /* These are from rfc1952 */
78 #define GZ_OS_FAT 0 /* FAT filesystem (MS-DOS, OS/2, NT/Win32) */
79 #define GZ_OS_AMIGA 1 /* Amiga */
80 #define GZ_OS_VMS 2 /* VMS (or OpenVMS) */
81 #define GZ_OS_UNIX 3 /* Unix */
82 #define GZ_OS_VMCMS 4 /* VM/CMS */
83 #define GZ_OS_ATARI 5 /* Atari TOS */
84 #define GZ_OS_HPFS 6 /* HPFS filesystem (OS/2, NT) */
85 #define GZ_OS_MAC 7 /* Macintosh */
86 #define GZ_OS_Z 8 /* Z-System */
87 #define GZ_OS_CPM 9 /* CP/M */
88 #define GZ_OS_TOPS20 10 /* TOPS-20 */
89 #define GZ_OS_NTFS 11 /* NTFS filesystem (NT) */
90 #define GZ_OS_QDOS 12 /* QDOS */
91 #define GZ_OS_ACORN 13 /* Acorn RISCOS */
92 #define GZ_OS_UNKNOWN 255 /* unknown */
94 #define GZ_RND_S1 'R' /* First magic for random access format */
95 #define GZ_RND_S2 'A' /* Second magic for random access format */
97 #define GZ_ID1 0 /* GZ_MAGIC1 */
98 #define GZ_ID2 1 /* GZ_MAGIC2 */
99 #define GZ_CM 2 /* Compression Method (Z_DEFALTED) */
100 #define GZ_FLG 3 /* FLaGs (see above) */
101 #define GZ_MTIME 4 /* Modification TIME */
102 #define GZ_XFL 8 /* eXtra FLags (GZ_MAX or GZ_FAST) */
103 #define GZ_OS 9 /* Operating System */
104 #define GZ_XLEN 10 /* eXtra LENgth (16bit) */
105 #define GZ_FEXTRA_START 12 /* Start of extra fields */
106 #define GZ_SI1 12 /* Subfield ID1 */
107 #define GZ_SI2 13 /* Subfield ID2 */
108 #define GZ_SUBLEN 14 /* Subfield length (16bit) */
109 #define GZ_VERSION 16 /* Version for subfield format */
110 #define GZ_CHUNKLEN 18 /* Chunk length (16bit) */
111 #define GZ_CHUNKCNT 20 /* Number of chunks (16bit) */
112 #define GZ_RNDDATA 22 /* Random access data (16bit) */
114 #define DICT_UNKNOWN 0
120 int dictData::read_header(const std::string &fname, int computeCRC)
123 int id1, id2, si1, si2;
124 char buffer[BUFFERSIZE];
125 int extraLength, subLength;
130 unsigned long crc = crc32( 0L, Z_NULL, 0 );
132 unsigned long offset;
134 if (!(str = fopen(fname.c_str(), "rb"))) {
135 //err_fatal_errno( __FUNCTION__,
136 // "Cannot open data file \"%s\" for read\n", filename );
139 this->headerLength = GZ_XLEN - 1;
140 this->type = DICT_UNKNOWN;
145 if (id1 != GZ_MAGIC1 || id2 != GZ_MAGIC2) {
146 this->type = DICT_TEXT;
147 #if defined(_MSC_VER)
148 fstat( _fileno( str ), &sb );
150 fstat( fileno( str ), &sb );
152 this->compressedLength = this->length = sb.st_size;
153 this->origFilename = fname;
154 this->mtime = sb.st_mtime;
157 while (!feof( str )) {
158 if ((count = fread( buffer, 1, BUFFERSIZE, str ))) {
159 crc = crc32(crc, (Bytef *)buffer, count);
167 this->type = DICT_GZIP;
169 this->method = getc( str );
170 this->flags = getc( str );
171 this->mtime = getc( str ) << 0;
172 this->mtime |= getc( str ) << 8;
173 this->mtime |= getc( str ) << 16;
174 this->mtime |= getc( str ) << 24;
175 this->extraFlags = getc( str );
176 this->os = getc( str );
178 if (this->flags & GZ_FEXTRA) {
179 extraLength = getc( str ) << 0;
180 extraLength |= getc( str ) << 8;
181 this->headerLength += extraLength + 2;
185 if (si1 == GZ_RND_S1 || si2 == GZ_RND_S2) {
186 subLength = getc( str ) << 0;
187 subLength |= getc( str ) << 8;
188 this->version = getc( str ) << 0;
189 this->version |= getc( str ) << 8;
191 if (this->version != 1) {
192 //err_internal( __FUNCTION__,
193 // "dzip header version %d not supported\n",
197 this->chunkLength = getc( str ) << 0;
198 this->chunkLength |= getc( str ) << 8;
199 this->chunkCount = getc( str ) << 0;
200 this->chunkCount |= getc( str ) << 8;
202 if (this->chunkCount <= 0) {
206 this->chunks = (int *)malloc(sizeof( this->chunks[0] )
207 * this->chunkCount );
208 for (i = 0; i < this->chunkCount; i++) {
209 this->chunks[i] = getc( str ) << 0;
210 this->chunks[i] |= getc( str ) << 8;
212 this->type = DICT_DZIP;
214 fseek( str, this->headerLength, SEEK_SET );
218 if (this->flags & GZ_FNAME) { /* FIXME! Add checking against header len */
220 while ((c = getc( str )) && c != EOF)
224 this->origFilename = buffer;
225 this->headerLength += this->origFilename.length() + 1;
227 this->origFilename = "";
230 if (this->flags & GZ_COMMENT) { /* FIXME! Add checking for header len */
232 while ((c = getc( str )) && c != EOF)
236 headerLength += comment.length()+1;
241 if (this->flags & GZ_FHCRC) {
244 this->headerLength += 2;
247 if (ftell( str ) != this->headerLength + 1) {
248 //err_internal( __FUNCTION__,
249 // "File position (%lu) != header length + 1 (%d)\n",
250 // ftell( str ), this->headerLength + 1 );
253 fseek( str, -8, SEEK_END );
254 this->crc = getc( str ) << 0;
255 this->crc |= getc( str ) << 8;
256 this->crc |= getc( str ) << 16;
257 this->crc |= getc( str ) << 24;
258 this->length = getc( str ) << 0;
259 this->length |= getc( str ) << 8;
260 this->length |= getc( str ) << 16;
261 this->length |= getc( str ) << 24;
262 this->compressedLength = ftell( str );
264 /* Compute offsets */
265 this->offsets = (unsigned long *)malloc( sizeof( this->offsets[0] )
266 * this->chunkCount );
267 for (offset = this->headerLength + 1, i = 0;
268 i < this->chunkCount;
270 this->offsets[i] = offset;
271 offset += this->chunks[i];
278 bool dictData::open(const std::string& fname, int computeCRC)
284 this->initialized = 0;
285 if (!g_file_test(fname.c_str(),
286 GFileTest(G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR))) {
287 //err_warning( __FUNCTION__,
288 // "%s is not a regular file -- ignoring\n", fname );
292 if (read_header(fname, computeCRC)) {
293 //err_fatal( __FUNCTION__,
294 // "\"%s\" not in text or dzip format\n", fname );
298 #if defined(_MSC_VER)
299 if ((fd = ::_open(fname.c_str(), O_RDONLY )) < 0) {
301 if ((fd = ::open(fname.c_str(), O_RDONLY )) < 0) {
303 //err_fatal_errno( __FUNCTION__,
304 // "Cannot open data file \"%s\"\n", fname );
307 if (fstat(fd, &sb)) {
308 //err_fatal_errno( __FUNCTION__,
309 // "Cannot stat data file \"%s\"\n", fname );
313 this->size = sb.st_size;
314 #if defined(_MSC_VER)
319 if (!mapfile.open(fname.c_str(), size))
322 this->start=mapfile.begin();
323 this->end = this->start + this->size;
325 for (j = 0; j < DICT_CACHE_SIZE; j++) {
328 cache[j].inBuffer = NULL;
335 void dictData::close()
344 if (this->initialized) {
345 if (inflateEnd( &this->zStream )) {
346 //err_internal( __FUNCTION__,
347 // "Cannot shut down inflation engine: %s\n",
348 // this->zStream.msg );
352 for (i = 0; i < DICT_CACHE_SIZE; ++i){
353 if (this -> cache [i].inBuffer)
354 free (this -> cache [i].inBuffer);
358 void dictData::read(char *buffer, unsigned long start, unsigned long size)
364 char outBuffer[OUT_BUFFER_SIZE];
365 int firstChunk, lastChunk;
366 int firstOffset, lastOffset;
368 int found, target, lastStamp;
369 static int stamp = 0;
373 //buffer = malloc( size + 1 );
376 // ("dict_data_read( %p, %lu, %lu )\n",
380 switch (this->type) {
382 //err_fatal( __FUNCTION__,
383 // "Cannot seek on pure gzip format files.\n"
384 // "Use plain text (for performance)"
385 // " or dzip format (for space savings).\n" );
388 memcpy( buffer, this->start + start, size );
389 //buffer[size] = '\0';
392 if (!this->initialized) {
394 this->zStream.zalloc = NULL;
395 this->zStream.zfree = NULL;
396 this->zStream.opaque = NULL;
397 this->zStream.next_in = 0;
398 this->zStream.avail_in = 0;
399 this->zStream.next_out = NULL;
400 this->zStream.avail_out = 0;
401 if (inflateInit2( &this->zStream, -15 ) != Z_OK) {
402 //err_internal( __FUNCTION__,
403 // "Cannot initialize inflation engine: %s\n",
404 //this->zStream.msg );
407 firstChunk = start / this->chunkLength;
408 firstOffset = start - firstChunk * this->chunkLength;
409 lastChunk = end / this->chunkLength;
410 lastOffset = end - lastChunk * this->chunkLength;
412 // (" start = %lu, end = %lu\n"
413 //"firstChunk = %d, firstOffset = %d,"
414 //" lastChunk = %d, lastOffset = %d\n",
415 //start, end, firstChunk, firstOffset, lastChunk, lastOffset ));
416 for (pt = buffer, i = firstChunk; i <= lastChunk; i++) {
422 for (j = 0; j < DICT_CACHE_SIZE; j++) {
424 if (this->cache[j].chunk == i) {
430 if (this->cache[j].stamp < lastStamp) {
431 lastStamp = this->cache[j].stamp;
436 this->cache[target].stamp = ++stamp;
438 count = this->cache[target].count;
439 inBuffer = this->cache[target].inBuffer;
441 this->cache[target].chunk = i;
442 if (!this->cache[target].inBuffer)
443 this->cache[target].inBuffer = (char *)malloc( IN_BUFFER_SIZE );
444 inBuffer = this->cache[target].inBuffer;
446 if (this->chunks[i] >= OUT_BUFFER_SIZE ) {
447 //err_internal( __FUNCTION__,
448 // "this->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n",
449 // i, this->chunks[i], OUT_BUFFER_SIZE );
451 memcpy( outBuffer, this->start + this->offsets[i], this->chunks[i] );
453 this->zStream.next_in = (Bytef *)outBuffer;
454 this->zStream.avail_in = this->chunks[i];
455 this->zStream.next_out = (Bytef *)inBuffer;
456 this->zStream.avail_out = IN_BUFFER_SIZE;
457 if (inflate( &this->zStream, Z_PARTIAL_FLUSH ) != Z_OK) {
458 //err_fatal( __FUNCTION__, "inflate: %s\n", this->zStream.msg );
460 if (this->zStream.avail_in) {
461 //err_internal( __FUNCTION__,
462 // "inflate did not flush (%d pending, %d avail)\n",
463 // this->zStream.avail_in, this->zStream.avail_out );
466 count = IN_BUFFER_SIZE - this->zStream.avail_out;
468 this->cache[target].count = count;
471 if (i == firstChunk) {
472 if (i == lastChunk) {
473 memcpy( pt, inBuffer + firstOffset, lastOffset-firstOffset);
474 pt += lastOffset - firstOffset;
476 if (count != this->chunkLength ) {
477 //err_internal( __FUNCTION__,
478 // "Length = %d instead of %d\n",
479 //count, this->chunkLength );
481 memcpy( pt, inBuffer + firstOffset,
482 this->chunkLength - firstOffset );
483 pt += this->chunkLength - firstOffset;
485 } else if (i == lastChunk) {
486 memcpy( pt, inBuffer, lastOffset );
489 assert( count == this->chunkLength );
490 memcpy( pt, inBuffer, this->chunkLength );
491 pt += this->chunkLength;
497 //err_fatal( __FUNCTION__, "Cannot read unknown file type\n" );