Imported version 0.2-1
[mstardict] / src / lib / stddict.cpp
1 /*
2  * This file part of StarDict - A international dictionary for GNOME.
3  * http://stardict.sourceforge.net
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Library General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18  */
19
20 /*
21  * Implementation of class to work with standard StarDict's dictionaries
22  * lookup word, get articles and so on.
23  *
24  * Notice: read doc/DICTFILE_FORMAT for the dictionary
25  * file's format information!
26  */
27 #ifdef HAVE_CONFIG_H
28 #  include "config.h"
29 #endif
30
31 #include <glib/gi18n.h>
32 #include <glib/gstdio.h>
33
34 #include "common.hpp"
35 #include "distance.h"
36 #include "kmp.h"
37 #include "mapfile.hpp"
38
39 #include "stddict.hpp"
40 #include <algorithm>
41 #include "getuint32.h"
42
43 static inline gint stardict_strcmp(const gchar *s1, const gchar *s2)
44 {
45   gint a=g_ascii_strcasecmp(s1, s2);
46   if (a == 0)
47     return strcmp(s1, s2);
48   else
49     return a;
50 }
51
52 static gint stardict_collate(const gchar *str1, const gchar *str2, CollateFunctions func)
53 {
54         gint x = utf8_collate(str1, str2, func);
55         if (x == 0)
56                 return strcmp(str1, str2);
57         else
58                 return x;
59 }
60
61 gint stardict_server_collate(const gchar *str1, const gchar *str2, int EnableCollationLevel, CollateFunctions func, int servercollatefunc)
62 {
63         if (EnableCollationLevel == 0)
64                 return stardict_strcmp(str1, str2);
65         if (EnableCollationLevel == 1)
66                 return stardict_collate(str1, str2, func);
67         if (servercollatefunc == 0)
68                 return stardict_strcmp(str1, str2);
69         return stardict_collate(str1, str2, (CollateFunctions)(servercollatefunc-1));
70 }
71
72 gint stardict_casecmp(const gchar *s1, const gchar *s2, int EnableCollationLevel, CollateFunctions func, int servercollatefunc)
73 {
74         if (EnableCollationLevel == 0)
75                 return g_ascii_strcasecmp(s1, s2);
76         if (EnableCollationLevel == 1)
77                 return utf8_collate(s1, s2, func);
78         if (servercollatefunc == 0)
79                 return g_ascii_strcasecmp(s1, s2);
80         return utf8_collate(s1, s2, (CollateFunctions)(servercollatefunc-1));
81 }
82
83 static inline gint prefix_match (const gchar *s1, const gchar *s2)
84 {
85     gint ret=-1;
86     gunichar u1, u2;
87     do {
88         u1 = g_utf8_get_char(s1);
89         u2 = g_utf8_get_char(s2);
90         s1 = g_utf8_next_char(s1);
91         s2 = g_utf8_next_char(s2);
92         ret++;
93     } while (u1 && g_unichar_tolower(u1) == g_unichar_tolower(u2));
94     return ret;
95 }
96
97 static inline bool bIsVowel(gchar inputchar)
98 {
99   gchar ch = g_ascii_toupper(inputchar);
100   return( ch=='A' || ch=='E' || ch=='I' || ch=='O' || ch=='U' );
101 }
102
103
104 bool bIsPureEnglish(const gchar *str)
105 {
106   // i think this should work even when it is UTF8 string :).
107   for (int i=0; str[i]!=0; i++)
108     //if(str[i]<0)
109     //if(str[i]<32 || str[i]>126) // tab equal 9,so this is not OK.
110     // Better use isascii() but not str[i]<0 while char is default unsigned in arm
111     if (!isascii(str[i]))
112             return false;
113   return true;
114 }
115
116 class offset_index : public index_file {
117 public:
118         offset_index();
119         ~offset_index();
120         bool load(const std::string& url, gulong wc, gulong fsize,
121                   bool CreateCacheFile, int EnableCollationLevel,
122                   CollateFunctions _CollateFunction, show_progress_t *sp);
123         void get_data(glong idx);
124         const gchar *get_key_and_data(glong idx);
125 private:
126         const gchar *get_key(glong idx);
127         bool lookup(const char *str, glong &idx, glong &idx_suggest);
128
129         static const gint ENTR_PER_PAGE=32;
130
131         cache_file oft_file;
132         FILE *idxfile;
133         gulong npages;
134
135         gchar wordentry_buf[256+sizeof(guint32)*2]; // The length of "word_str" should be less than 256. See doc/DICTFILE_FORMAT.
136         struct index_entry {
137                 glong idx;
138                 std::string keystr;
139                 void assign(glong i, const std::string& str) {
140                         idx=i;
141                         keystr.assign(str);
142                 }
143         };
144         index_entry first, last, middle, real_last;
145
146         struct page_entry {
147                 gchar *keystr;
148                 guint32 off, size;
149         };
150         std::vector<gchar> page_data;
151         struct page_t {
152                 glong idx;
153                 page_entry entries[ENTR_PER_PAGE];
154
155                 page_t(): idx(-1) {}
156                 void fill(gchar *data, gint nent, glong idx_);
157         } page;
158         gulong load_page(glong page_idx);
159         const gchar *read_first_on_page_key(glong page_idx);
160         const gchar *get_first_on_page_key(glong page_idx);
161 };
162
163 class wordlist_index : public index_file {
164 public:
165         wordlist_index();
166         ~wordlist_index();
167         bool load(const std::string& url, gulong wc, gulong fsize,
168                   bool CreateCacheFile, int EnableCollationLevel,
169                   CollateFunctions _CollateFunction, show_progress_t *sp);
170         void get_data(glong idx);
171         const gchar *get_key_and_data(glong idx);
172 private:
173         const gchar *get_key(glong idx);
174         bool lookup(const char *str, glong &idx, glong &idx_suggest);
175
176         gchar *idxdatabuf;
177         std::vector<gchar *> wordlist;
178 };
179
180 offset_index::offset_index() : oft_file(CacheFileType_oft)
181 {
182         clt_file = NULL;
183         idxfile = NULL;
184 }
185
186 offset_index::~offset_index()
187 {
188         delete clt_file;
189         if (idxfile)
190                 fclose(idxfile);
191 }
192
193 void offset_index::page_t::fill(gchar *data, gint nent, glong idx_)
194 {
195         idx=idx_;
196         gchar *p=data;
197         glong len;
198         for (gint i=0; i<nent; ++i) {
199                 entries[i].keystr=p;
200                 len=strlen(p);
201                 p+=len+1;
202                 entries[i].off=g_ntohl(get_uint32(p));
203                 p+=sizeof(guint32);
204                 entries[i].size=g_ntohl(get_uint32(p));
205                 p+=sizeof(guint32);
206         }
207 }
208
209 inline const gchar *offset_index::read_first_on_page_key(glong page_idx)
210 {
211         fseek(idxfile, oft_file.wordoffset[page_idx], SEEK_SET);
212         guint32 page_size=oft_file.wordoffset[page_idx+1]-oft_file.wordoffset[page_idx];
213         gulong minsize = sizeof(wordentry_buf);
214         if (page_size < minsize)
215                 minsize = page_size;
216         fread(wordentry_buf, minsize, 1, idxfile); //TODO: check returned values, deal with word entry that strlen>255.
217         return wordentry_buf;
218 }
219
220 inline const gchar *offset_index::get_first_on_page_key(glong page_idx)
221 {
222         if (page_idx<middle.idx) {
223                 if (page_idx==first.idx)
224                         return first.keystr.c_str();
225                 return read_first_on_page_key(page_idx);
226         } else if (page_idx>middle.idx) {
227                 if (page_idx==last.idx)
228                         return last.keystr.c_str();
229                 return read_first_on_page_key(page_idx);
230         } else
231                 return middle.keystr.c_str();
232 }
233
234 cache_file::cache_file(CacheFileType _cachefiletype)
235 {
236         wordoffset = NULL;
237         mf = NULL;
238         cachefiletype = _cachefiletype;
239 }
240
241
242 cache_file::~cache_file()
243 {
244         if (mf)
245                 delete mf;
246         else
247                 g_free(wordoffset);
248 }
249
250 #define OFFSETFILE_MAGIC_DATA "StarDict's oft file\nversion=2.4.8\n"
251 #define COLLATIONFILE_MAGIC_DATA "StarDict's clt file\nversion=2.4.8\n"
252
253 MapFile* cache_file::get_cache_loadfile(const gchar *filename, const std::string &url, const std::string &saveurl, CollateFunctions cltfunc, glong filedatasize, int next)
254 {
255         struct stat cachestat;
256         if (g_stat(filename, &cachestat)!=0)
257                 return NULL;
258         MapFile *mf = new MapFile;
259         if (!mf->open(filename, cachestat.st_size)) {
260                 delete mf;
261                 return NULL;
262         }
263
264         gchar *p = mf->begin();
265         gboolean has_prefix;
266         if (cachefiletype == CacheFileType_oft)
267                 has_prefix = g_str_has_prefix(p, OFFSETFILE_MAGIC_DATA);
268         else
269                 has_prefix = g_str_has_prefix(p, COLLATIONFILE_MAGIC_DATA);
270         if (!has_prefix) {
271                 delete mf;
272                 return NULL;
273         }
274         if (cachefiletype == CacheFileType_oft)
275                 p+= sizeof(OFFSETFILE_MAGIC_DATA)-1-1;
276         else
277                 p+= sizeof(COLLATIONFILE_MAGIC_DATA)-1-1;
278         gchar *p2;
279         p2 = strstr(p, "\nurl=");
280         if (!p2) {
281                 delete mf;
282                 return NULL;
283         }
284         p2+=sizeof("\nurl=")-1;
285         gchar *p3;
286         p3 = strchr(p2, '\n');
287         if (!p3) {
288                 delete mf;
289                 return NULL;
290         }
291         gchar *tmpstr;
292         tmpstr = (gchar *)g_memdup(p2, p3-p2+1);
293         tmpstr[p3-p2] = '\0';
294         if (saveurl == tmpstr) {
295                 g_free(tmpstr);
296                 if (cachefiletype == CacheFileType_clt) {
297                         p2 = strstr(p, "\nfunc=");
298                         if (!p2) {
299                                 delete mf;
300                                 return NULL;
301                         }
302                         p2+=sizeof("\nfunc=")-1;
303                         p3 = strchr(p2, '\n');
304                         if (!p3) {
305                                 delete mf;
306                                 return NULL;
307                         }
308                         tmpstr = (gchar *)g_memdup(p2, p3-p2+1);
309                         tmpstr[p3-p2] = '\0';
310                         if (atoi(tmpstr)!=cltfunc) {
311                                 g_free(tmpstr);
312                                 delete mf;
313                                 return NULL;
314                         }
315                         g_free(tmpstr);
316                 }
317                 if (cachestat.st_size!=glong(filedatasize + strlen(mf->begin()) +1)) {
318                         delete mf;
319                         return NULL;
320                 }
321                 struct stat idxstat;
322                 if (g_stat(url.c_str(), &idxstat)!=0) {
323                         delete mf;
324                         return NULL;
325                 }
326                 if (cachestat.st_mtime<idxstat.st_mtime) {
327                         delete mf;
328                         return NULL;
329                 }
330                 //g_print("Using map file: %s\n", filename);
331                 return mf;
332         }
333         g_free(tmpstr);
334         delete mf;
335         gchar *basename = g_path_get_basename(saveurl.c_str());
336         p = strrchr(basename, '.');
337         if (!p) {
338                 g_free(basename);
339                 return NULL;
340         }
341         *p='\0';
342         gchar *extendname = p+1;
343         gchar *dirname = g_path_get_dirname(filename);
344         gchar *nextfilename;
345         if (cachefiletype == CacheFileType_oft)
346                 nextfilename = g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s(%d).%s.oft", dirname, basename, next, extendname);
347         else if (cachefiletype == CacheFileType_clt)
348                 nextfilename = g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s(%d).%s.clt", dirname, basename, next, extendname);
349         else
350                 nextfilename = g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s(%d).%s.%d.clt", dirname, basename, next, extendname, cltfunc);
351         MapFile *out = get_cache_loadfile(nextfilename, url, saveurl, cltfunc, filedatasize, next+1);
352         g_free(basename);
353         g_free(dirname);
354         g_free(nextfilename);
355         return out;
356 }
357
358 bool cache_file::load_cache(const std::string& url, const std::string& saveurl, CollateFunctions cltfunc, glong filedatasize)
359 {
360         std::string oftfilename;
361         if (cachefiletype == CacheFileType_oft)
362                 oftfilename=saveurl+".oft";
363         else if (cachefiletype == CacheFileType_clt)
364                 oftfilename=saveurl+".clt";
365         else {
366                 gchar *func = g_strdup_printf("%d", cltfunc);
367                 oftfilename=saveurl+'.'+func+".clt";
368                 g_free(func);
369         }
370         for (int i=0;i<2;i++) {
371                 if (i==1) {
372                         if (!get_cache_filename(saveurl, oftfilename, false, cltfunc))
373                                 break;
374                 }
375                 mf = get_cache_loadfile(oftfilename.c_str(), url, saveurl, cltfunc, filedatasize, 2);
376                 if (!mf)
377                         continue;
378                 wordoffset = (guint32 *)(mf->begin()+strlen(mf->begin())+1);
379                 return true;
380         }
381         return false;
382 }
383
384 bool cache_file::get_cache_filename(const std::string& url, std::string &cachefilename, bool create, CollateFunctions cltfunc)
385 {
386         if (create) {
387                 if (!g_file_test(g_get_user_cache_dir(), G_FILE_TEST_EXISTS) &&
388                     g_mkdir(g_get_user_cache_dir(), 0700)==-1)
389                         return false;
390         }
391
392         std::string cache_dir=g_get_user_cache_dir();
393         cache_dir += G_DIR_SEPARATOR_S "stardict";
394
395         if (create) {
396                 if (!g_file_test(cache_dir.c_str(), G_FILE_TEST_EXISTS)) {
397                         if (g_mkdir(cache_dir.c_str(), 0700)==-1)
398                                 return false;
399                 } else if (!g_file_test(cache_dir.c_str(), G_FILE_TEST_IS_DIR))
400                         return false;
401         }
402
403         gchar *base=g_path_get_basename(url.c_str());
404         if (cachefiletype == CacheFileType_oft) {
405                 cachefilename = cache_dir+G_DIR_SEPARATOR_S+base+".oft";
406         } else if (cachefiletype == CacheFileType_clt) {
407                 cachefilename = cache_dir+G_DIR_SEPARATOR_S+base+".clt";
408         } else {
409                 gchar *func = g_strdup_printf("%d", cltfunc);
410                 cachefilename = cache_dir+G_DIR_SEPARATOR_S+base+'.'+func+".clt";
411                 g_free(func);
412         }
413         g_free(base);
414         return true;
415 }
416
417 FILE* cache_file::get_cache_savefile(const gchar *filename, const std::string &url, int next, std::string &cfilename, CollateFunctions cltfunc)
418 {
419         cfilename = filename;
420         struct stat oftstat;
421         if (g_stat(filename, &oftstat)!=0) {
422                 return fopen(filename, "wb");
423         }
424         MapFile mf;
425         if (!mf.open(filename, oftstat.st_size)) {
426                 return fopen(filename, "wb");
427         }
428         gchar *p = mf.begin();
429         bool has_prefix;
430         if (cachefiletype == CacheFileType_oft)
431                 has_prefix = g_str_has_prefix(p, OFFSETFILE_MAGIC_DATA);
432         else
433                 has_prefix = g_str_has_prefix(p, COLLATIONFILE_MAGIC_DATA);
434         if (!has_prefix) {
435                 mf.close();
436                 return fopen(filename, "wb");
437         }
438         if (cachefiletype == CacheFileType_oft)
439                 p+= sizeof(OFFSETFILE_MAGIC_DATA)-1-1;
440         else
441                 p+= sizeof(COLLATIONFILE_MAGIC_DATA)-1-1;
442         gchar *p2;
443         p2 = strstr(p, "\nurl=");
444         if (!p2) {
445                 mf.close();
446                 return fopen(filename, "wb");
447         }
448         p2+=sizeof("\nurl=")-1;
449         gchar *p3;
450         p3 = strchr(p2, '\n');
451         if (!p3) {
452                 mf.close();
453                 return fopen(filename, "wb");
454         }
455         gchar *tmpstr;
456         tmpstr = (gchar *)g_memdup(p2, p3-p2+1);
457         tmpstr[p3-p2] = '\0';
458         if (url == tmpstr) {
459                 g_free(tmpstr);
460                 mf.close();
461                 return fopen(filename, "wb");
462         }
463         g_free(tmpstr);
464         mf.close();
465         gchar *basename = g_path_get_basename(url.c_str());
466         p = strrchr(basename, '.');
467         if (!p) {
468                 g_free(basename);
469                 return NULL;
470         }
471         *p='\0';
472         gchar *extendname = p+1;
473         gchar *dirname = g_path_get_dirname(filename);
474         gchar *nextfilename;
475         if (cachefiletype == CacheFileType_oft)
476                 nextfilename = g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s(%d).%s.oft", dirname, basename, next, extendname);
477         else if (cachefiletype == CacheFileType_clt)
478                 nextfilename = g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s(%d).%s.clt", dirname, basename, next, extendname);
479         else
480                 nextfilename = g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s(%d).%s.%d.clt", dirname, basename, next, extendname, cltfunc);
481         FILE *out = get_cache_savefile(nextfilename, url, next+1, cfilename, cltfunc);
482         g_free(basename);
483         g_free(dirname);
484         g_free(nextfilename);
485         return out;
486 }
487
488 bool cache_file::save_cache(const std::string& url, CollateFunctions cltfunc, gulong npages)
489 {
490         std::string oftfilename;
491         if (cachefiletype == CacheFileType_oft) {
492                 oftfilename=url+".oft";
493         } else if (cachefiletype == CacheFileType_clt) {
494                 oftfilename=url+".clt";
495         } else {
496                 gchar *func = g_strdup_printf("%d", cltfunc);
497                 oftfilename=url+'.'+func+".clt";
498                 g_free(func);
499         }
500         for (int i=0;i<2;i++) {
501                 if (i==1) {
502                         if (!get_cache_filename(url, oftfilename, true, cltfunc))
503                                 break;
504                 }
505                 std::string cfilename;
506                 FILE *out= get_cache_savefile(oftfilename.c_str(), url, 2, cfilename, cltfunc);
507                 if (!out)
508                         continue;
509                 if (cachefiletype == CacheFileType_oft)
510                         fwrite(OFFSETFILE_MAGIC_DATA, 1, sizeof(OFFSETFILE_MAGIC_DATA)-1, out);
511                 else
512                         fwrite(COLLATIONFILE_MAGIC_DATA, 1, sizeof(COLLATIONFILE_MAGIC_DATA)-1, out);
513                 fwrite("url=", 1, sizeof("url=")-1, out);
514                 fwrite(url.c_str(), 1, url.length(), out);
515                 if (cachefiletype == CacheFileType_clt) {
516 #ifdef _MSC_VER
517                         fprintf_s(out, "\nfunc=%d", cltfunc);
518 #else
519                         fprintf(out, "\nfunc=%d", cltfunc);
520 #endif
521                 }
522                 fwrite("\n", 1, 2, out);
523                 fwrite(wordoffset, sizeof(guint32), npages, out);
524                 fclose(out);
525                 g_print("Save cache file: %s\n", cfilename.c_str());
526                 return true;
527         }
528         return false;
529 }
530
531 collation_file::collation_file(idxsyn_file *_idx_file, CacheFileType _cachefiletype) : cache_file(_cachefiletype)
532 {
533         idx_file = _idx_file;
534 }
535
536 const gchar *collation_file::GetWord(glong idx)
537 {
538         return idx_file->get_key(wordoffset[idx]);
539 }
540
541 glong collation_file::GetOrigIndex(glong cltidx)
542 {
543         return wordoffset[cltidx];
544 }
545
546 bool collation_file::lookup(const char *sWord, glong &idx, glong &idx_suggest)
547 {
548         bool bFound=false;
549         glong iTo=idx_file->wordcount-1;
550         if (stardict_collate(sWord, GetWord(0), CollateFunction)<0) {
551                 idx = 0;
552                 idx_suggest = 0;
553         } else if (stardict_collate(sWord, GetWord(iTo), CollateFunction) >0) {
554                 idx = INVALID_INDEX;
555                 idx_suggest = iTo;
556         } else {
557                 glong iThisIndex=0;
558                 glong iFrom=0;
559                 gint cmpint;
560                 while (iFrom<=iTo) {
561                         iThisIndex=(iFrom+iTo)/2;
562                         cmpint = stardict_collate(sWord, GetWord(iThisIndex), CollateFunction);
563                         if (cmpint>0)
564                                 iFrom=iThisIndex+1;
565                         else if (cmpint<0)
566                                 iTo=iThisIndex-1;
567                         else {
568                                 bFound=true;
569                                 break;
570                         }
571                 }
572                 if (!bFound) {
573                         idx = iFrom;    //next
574                         idx_suggest = iFrom;
575                         gint best, back;
576                         best = prefix_match (sWord, GetWord(idx_suggest));
577                         for (;;) {
578                                 if ((iTo=idx_suggest-1) < 0)
579                                         break;
580                                 back = prefix_match (sWord, GetWord(iTo));
581                                 if (!back || back < best)
582                                         break;
583                                 best = back;
584                                 idx_suggest = iTo;
585                         }
586                 } else {
587                         idx = iThisIndex;
588                         idx_suggest = iThisIndex;
589                 }
590         }
591         return bFound;
592 }
593
594 struct sort_collation_index_user_data {
595         idxsyn_file *idx_file;
596         CollateFunctions cltfunc;
597 };
598
599 static gint sort_collation_index(gconstpointer a, gconstpointer b, gpointer user_data)
600 {
601         sort_collation_index_user_data *data = (sort_collation_index_user_data*)user_data;
602         gchar *str1 = g_strdup(data->idx_file->get_key(*((guint32 *)a)));
603         const gchar *str2 = data->idx_file->get_key(*((guint32 *)b));
604         gint x = stardict_collate(str1, str2, data->cltfunc);
605         g_free(str1);
606         if (x==0)
607                 return *((guint32 *)a) - *((guint32 *)b);
608         else
609                 return x;
610 }
611
612 idxsyn_file::idxsyn_file()
613 {
614         memset(clt_files, 0, sizeof(clt_files));
615 }
616
617 const gchar *idxsyn_file::getWord(glong idx, int EnableCollationLevel, int servercollatefunc)
618 {
619         if (EnableCollationLevel == 0)
620                 return get_key(idx);
621         if (EnableCollationLevel == 1)
622                 return clt_file->GetWord(idx);
623         if (servercollatefunc == 0)
624                 return get_key(idx);
625         collate_load((CollateFunctions)(servercollatefunc-1));
626         return clt_files[servercollatefunc-1]->GetWord(idx);
627 }
628
629 bool idxsyn_file::Lookup(const char *str, glong &idx, glong &idx_suggest, int EnableCollationLevel, int servercollatefunc)
630 {
631         if (EnableCollationLevel == 0)
632                 return lookup(str, idx, idx_suggest);
633         if (EnableCollationLevel == 1)
634                 return clt_file->lookup(str, idx, idx_suggest);
635         if (servercollatefunc == 0)
636                 return lookup(str, idx, idx_suggest);
637         collate_load((CollateFunctions)(servercollatefunc-1));
638         return clt_files[servercollatefunc-1]->lookup(str, idx, idx_suggest);
639 }
640
641 void idxsyn_file::collate_sort(const std::string& url,
642                                const std::string& saveurl,
643                                CollateFunctions collf,
644                                show_progress_t *sp)
645 {
646         clt_file = new collation_file(this, CacheFileType_clt);
647         clt_file->CollateFunction = collf;
648         if (!clt_file->load_cache(url, saveurl, collf, wordcount*sizeof(guint32))) {
649                 sp->notify_about_start(_("Sorting, please wait..."));
650                 clt_file->wordoffset = (guint32 *)g_malloc(wordcount*sizeof(guint32));
651                 for (glong i=0; i<wordcount; i++)
652                         clt_file->wordoffset[i] = i;
653                 sort_collation_index_user_data data;
654                 data.idx_file = this;
655                 data.cltfunc = collf;
656                 g_qsort_with_data(clt_file->wordoffset, wordcount, sizeof(guint32), sort_collation_index, &data);
657                 if (!clt_file->save_cache(saveurl, collf, wordcount))
658                         g_printerr("Cache update failed.\n");
659         }
660 }
661
662 void idxsyn_file::collate_save_info(const std::string& _url, const std::string& _saveurl)
663 {
664         url = _url;
665         saveurl = _saveurl;
666 }
667
668 void idxsyn_file::collate_load(CollateFunctions collf)
669 {
670         if (clt_files[collf])
671                 return;
672         clt_files[collf] = new collation_file(this, CacheFileType_server_clt);
673         clt_files[collf]->CollateFunction = collf;
674         if (!clt_files[collf]->load_cache(url, saveurl, collf, wordcount*sizeof(guint32))) {
675                 clt_files[collf]->wordoffset = (guint32 *)g_malloc(wordcount*sizeof(guint32));
676                 for (glong i=0; i<wordcount; i++)
677                         clt_files[collf]->wordoffset[i] = i;
678                 sort_collation_index_user_data data;
679                 data.idx_file = this;
680                 data.cltfunc = collf;
681                 g_qsort_with_data(clt_files[collf]->wordoffset, wordcount, sizeof(guint32), sort_collation_index, &data);
682                 if (!clt_files[collf]->save_cache(saveurl, collf, wordcount))
683                         g_printerr("Cache update failed.\n");
684         }
685 }
686
687 bool offset_index::load(const std::string& url, gulong wc, gulong fsize,
688                         bool CreateCacheFile, int EnableCollationLevel,
689                         CollateFunctions _CollateFunction, show_progress_t *sp)
690 {
691         wordcount=wc;
692         npages=(wc-1)/ENTR_PER_PAGE+2;
693         if (!oft_file.load_cache(url, url, _CollateFunction, npages*sizeof(guint32))) {
694                 MapFile map_file;
695                 if (!map_file.open(url.c_str(), fsize))
696                         return false;
697                 const gchar *idxdatabuffer=map_file.begin();
698                 oft_file.wordoffset = (guint32 *)g_malloc(npages*sizeof(guint32));
699                 const gchar *p1 = idxdatabuffer;
700                 gulong index_size;
701                 guint32 j=0;
702                 for (guint32 i=0; i<wc; i++) {
703                         index_size=strlen(p1) +1 + 2*sizeof(guint32);
704                         if (i % ENTR_PER_PAGE==0) {
705                                 oft_file.wordoffset[j]=p1-idxdatabuffer;
706                                 ++j;
707                         }
708                         p1 += index_size;
709                 }
710                 oft_file.wordoffset[j]=p1-idxdatabuffer;
711                 map_file.close();
712                 if (CreateCacheFile) {
713                         if (!oft_file.save_cache(url, _CollateFunction, npages))
714                                 g_printerr("Cache update failed.\n");
715                 }
716         }
717
718         if (!(idxfile = fopen(url.c_str(), "rb"))) {
719                 return false;
720         }
721
722         first.assign(0, read_first_on_page_key(0));
723         last.assign(npages-2, read_first_on_page_key(npages-2));
724         middle.assign((npages-2)/2, read_first_on_page_key((npages-2)/2));
725         real_last.assign(wc-1, get_key(wc-1));
726
727         if (EnableCollationLevel == 0) {
728         } else if (EnableCollationLevel == 1) {
729                 collate_sort(url, url, _CollateFunction, sp);
730         } else if (EnableCollationLevel == 2) {
731                 collate_save_info(url, url);
732         }
733
734         return true;
735 }
736
737 inline gulong offset_index::load_page(glong page_idx)
738 {
739         gulong nentr=ENTR_PER_PAGE;
740         if (page_idx==glong(npages-2))
741                 if ((nentr=wordcount%ENTR_PER_PAGE)==0)
742                         nentr=ENTR_PER_PAGE;
743
744
745         if (page_idx!=page.idx) {
746                 page_data.resize(oft_file.wordoffset[page_idx+1]-oft_file.wordoffset[page_idx]);
747                 fseek(idxfile, oft_file.wordoffset[page_idx], SEEK_SET);
748                 fread(&page_data[0], 1, page_data.size(), idxfile);
749                 page.fill(&page_data[0], nentr, page_idx);
750         }
751
752         return nentr;
753 }
754
755 const gchar *offset_index::get_key(glong idx)
756 {
757         load_page(idx/ENTR_PER_PAGE);
758         glong idx_in_page=idx%ENTR_PER_PAGE;
759         wordentry_offset=page.entries[idx_in_page].off;
760         wordentry_size=page.entries[idx_in_page].size;
761
762         return page.entries[idx_in_page].keystr;
763 }
764
765 void offset_index::get_data(glong idx)
766 {
767         get_key(idx);
768 }
769
770 const gchar *offset_index::get_key_and_data(glong idx)
771 {
772         return get_key(idx);
773 }
774
775 bool offset_index::lookup(const char *str, glong &idx, glong &idx_suggest)
776 {
777         bool bFound=false;
778         glong iFrom;
779         glong iTo=npages-2;
780         gint cmpint;
781         glong iThisIndex;
782         if (stardict_strcmp(str, first.keystr.c_str())<0) {
783                 idx = 0;
784                 idx_suggest = 0;
785                 return false;
786         } else if (stardict_strcmp(str, real_last.keystr.c_str()) >0) {
787                 idx = INVALID_INDEX;
788                 idx_suggest = iTo;
789                 return false;
790         } else {
791                 iFrom=0;
792                 iThisIndex=0;
793                 while (iFrom<=iTo) {
794                         iThisIndex=(iFrom+iTo)/2;
795                         cmpint = stardict_strcmp(str, get_first_on_page_key(iThisIndex));
796                         if (cmpint>0)
797                                 iFrom=iThisIndex+1;
798                         else if (cmpint<0)
799                                 iTo=iThisIndex-1;
800                         else {
801                                 bFound=true;
802                                 break;
803                         }
804                 }
805                 if (!bFound) {
806                         idx = iTo;    //prev
807                 } else {
808                         idx = iThisIndex;
809                 }
810         }
811         if (!bFound) {
812                 gulong netr=load_page(idx);
813                 iFrom=1; // Needn't search the first word anymore.
814                 iTo=netr-1;
815                 iThisIndex=0;
816                 while (iFrom<=iTo) {
817                         iThisIndex=(iFrom+iTo)/2;
818                         cmpint = stardict_strcmp(str, page.entries[iThisIndex].keystr);
819                         if (cmpint>0)
820                                 iFrom=iThisIndex+1;
821                         else if (cmpint<0)
822                                 iTo=iThisIndex-1;
823                         else {
824                                 bFound=true;
825                                 break;
826                         }
827                 }
828                 idx*=ENTR_PER_PAGE;
829                 if (!bFound) {
830                         idx += iFrom;    //next
831                         idx_suggest = idx;
832                         gint best, back;
833                         best = prefix_match (str, page.entries[idx_suggest % ENTR_PER_PAGE].keystr);
834                         for (;;) {
835                                 if ((iTo=idx_suggest-1) < 0)
836                                         break;
837                                 if (idx_suggest % ENTR_PER_PAGE == 0)
838                                         load_page(iTo / ENTR_PER_PAGE);
839                                 back = prefix_match (str, page.entries[iTo % ENTR_PER_PAGE].keystr);
840                                 if (!back || back < best)
841                                         break;
842                                 best = back;
843                                 idx_suggest = iTo;
844                         }
845                 } else {
846                         idx += iThisIndex;
847                         idx_suggest = idx;
848                 }
849         } else {
850                 idx*=ENTR_PER_PAGE;
851                 idx_suggest = idx;
852         }
853         return bFound;
854 }
855
856 wordlist_index::wordlist_index()
857 {
858         clt_file = NULL;
859         idxdatabuf = NULL;
860 }
861
862 wordlist_index::~wordlist_index()
863 {
864         delete clt_file;
865         g_free(idxdatabuf);
866 }
867
868 bool wordlist_index::load(const std::string& url, gulong wc, gulong fsize,
869                           bool CreateCacheFile, int EnableCollationLevel,
870                           CollateFunctions _CollateFunction, show_progress_t *sp)
871 {
872         wordcount=wc;
873         gzFile in = gzopen(url.c_str(), "rb");
874         if (in == NULL)
875                 return false;
876
877         idxdatabuf = (gchar *)g_malloc(fsize);
878
879         gulong len = gzread(in, idxdatabuf, fsize);
880         gzclose(in);
881         if (len < 0)
882                 return false;
883
884         if (len != fsize)
885                 return false;
886
887         wordlist.resize(wc+1);
888         gchar *p1 = idxdatabuf;
889         guint32 i;
890         for (i=0; i<wc; i++) {
891                 wordlist[i] = p1;
892                 p1 += strlen(p1) +1 + 2*sizeof(guint32);
893         }
894         wordlist[wc] = p1;
895
896         if (EnableCollationLevel == 0) {
897         } else {
898                 std::string saveurl = url;
899                 saveurl.erase(saveurl.length()-sizeof(".gz")+1, sizeof(".gz")-1);
900                 if (EnableCollationLevel == 1) {
901                         collate_sort(url, saveurl, _CollateFunction, sp);
902                 } else if (EnableCollationLevel == 2) {
903                         collate_save_info(url, saveurl);
904                 }
905         }
906         return true;
907 }
908
909 const gchar *wordlist_index::get_key(glong idx)
910 {
911         return wordlist[idx];
912 }
913
914 void wordlist_index::get_data(glong idx)
915 {
916         gchar *p1 = wordlist[idx]+strlen(wordlist[idx])+sizeof(gchar);
917         wordentry_offset = g_ntohl(get_uint32(p1));
918         p1 += sizeof(guint32);
919         wordentry_size = g_ntohl(get_uint32(p1));
920 }
921
922 const gchar *wordlist_index::get_key_and_data(glong idx)
923 {
924         get_data(idx);
925         return get_key(idx);
926 }
927
928 bool wordlist_index::lookup(const char *str, glong &idx, glong &idx_suggest)
929 {
930         bool bFound=false;
931         glong iTo=wordlist.size()-2;
932
933         if (stardict_strcmp(str, get_key(0))<0) {
934                 idx = 0;
935                 idx_suggest = 0;
936         } else if (stardict_strcmp(str, get_key(iTo)) >0) {
937                 idx = INVALID_INDEX;
938                 idx_suggest = iTo;
939         } else {
940                 glong iThisIndex=0;
941                 glong iFrom=0;
942                 gint cmpint;
943                 while (iFrom<=iTo) {
944                         iThisIndex=(iFrom+iTo)/2;
945                         cmpint = stardict_strcmp(str, get_key(iThisIndex));
946                         if (cmpint>0)
947                                 iFrom=iThisIndex+1;
948                         else if (cmpint<0)
949                                 iTo=iThisIndex-1;
950                         else {
951                                 bFound=true;
952                                 break;
953                         }
954                 }
955                 if (!bFound) {
956                         idx = iFrom;    //next
957                         idx_suggest = iFrom;
958                         gint best, back;
959                         best = prefix_match (str, get_key(idx_suggest));
960                         for (;;) {
961                                 if ((iTo=idx_suggest-1) < 0)
962                                         break;
963                                 back = prefix_match (str, get_key(iTo));
964                                 if (!back || back < best)
965                                         break;
966                                 best = back;
967                                 idx_suggest = iTo;
968                         }
969                 } else {
970                         idx = iThisIndex;
971                         idx_suggest = iThisIndex;
972                 }
973         }
974         return bFound;
975 }
976
977 //===================================================================
978 void synonym_file::page_t::fill(gchar *data, gint nent, glong idx_)
979 {
980         idx=idx_;
981         gchar *p=data;
982         glong len;
983         for (gint i=0; i<nent; ++i) {
984                 entries[i].keystr=p;
985                 len=strlen(p);
986                 p+=len+1;
987                 entries[i].index=g_ntohl(get_uint32(p));
988                 p+=sizeof(guint32);
989         }
990 }
991
992 synonym_file::synonym_file() : oft_file(CacheFileType_oft)
993 {
994         clt_file = NULL;
995 }
996
997 synonym_file::~synonym_file()
998 {
999         delete clt_file;
1000         if (synfile)
1001                 fclose(synfile);
1002 }
1003
1004 inline const gchar *synonym_file::read_first_on_page_key(glong page_idx)
1005 {
1006         fseek(synfile, oft_file.wordoffset[page_idx], SEEK_SET);
1007         guint32 page_size=oft_file.wordoffset[page_idx+1]-oft_file.wordoffset[page_idx];
1008         gulong minsize = sizeof(wordentry_buf);
1009         if (page_size < minsize)
1010                 minsize = page_size;
1011         fread(wordentry_buf, minsize, 1, synfile); //TODO: check returned values, deal with word entry that strlen>255.
1012         return wordentry_buf;
1013 }
1014
1015 inline const gchar *synonym_file::get_first_on_page_key(glong page_idx)
1016 {
1017         if (page_idx<middle.idx) {
1018                 if (page_idx==first.idx)
1019                         return first.keystr.c_str();
1020                 return read_first_on_page_key(page_idx);
1021         } else if (page_idx>middle.idx) {
1022                 if (page_idx==last.idx)
1023                         return last.keystr.c_str();
1024                 return read_first_on_page_key(page_idx);
1025         } else
1026                 return middle.keystr.c_str();
1027 }
1028
1029 bool synonym_file::load(const std::string& url, gulong wc, bool CreateCacheFile,
1030                         int EnableCollationLevel, CollateFunctions _CollateFunction,
1031                         show_progress_t *sp)
1032 {
1033         wordcount=wc;
1034         npages=(wc-1)/ENTR_PER_PAGE+2;
1035         if (!oft_file.load_cache(url, url, _CollateFunction, npages*sizeof(guint32))) {
1036                 struct stat stats;
1037                 if (stat (url.c_str(), &stats) == -1)
1038                         return false;
1039                 MapFile map_file;
1040                 if (!map_file.open(url.c_str(), stats.st_size))
1041                         return false;
1042                 const gchar *syndatabuffer=map_file.begin();
1043                 oft_file.wordoffset = (guint32 *)g_malloc(npages*sizeof(guint32));
1044                 const gchar *p1 = syndatabuffer;
1045                 gulong index_size;
1046                 guint32 j=0;
1047                 for (guint32 i=0; i<wc; i++) {
1048                         index_size=strlen(p1) +1 + sizeof(guint32);
1049                         if (i % ENTR_PER_PAGE==0) {
1050                                 oft_file.wordoffset[j]=p1-syndatabuffer;
1051                                 ++j;
1052                         }
1053                         p1 += index_size;
1054                 }
1055                 oft_file.wordoffset[j]=p1-syndatabuffer;
1056                 map_file.close();
1057                 if (CreateCacheFile) {
1058                         if (!oft_file.save_cache(url, _CollateFunction, npages))
1059                                 g_printerr("Cache update failed.\n");
1060                 }
1061         }
1062
1063         if (!(synfile = fopen(url.c_str(), "rb"))) {
1064                 return false;
1065         }
1066
1067         first.assign(0, read_first_on_page_key(0));
1068         last.assign(npages-2, read_first_on_page_key(npages-2));
1069         middle.assign((npages-2)/2, read_first_on_page_key((npages-2)/2));
1070         real_last.assign(wc-1, get_key(wc-1));
1071
1072         if (EnableCollationLevel == 0) {
1073         } else if (EnableCollationLevel == 1)
1074                 collate_sort(url, url, _CollateFunction, sp);
1075         else if (EnableCollationLevel == 2) {
1076                 collate_save_info(url, url);
1077         }
1078
1079         return true;
1080 }
1081
1082 inline gulong synonym_file::load_page(glong page_idx)
1083 {
1084         gulong nentr=ENTR_PER_PAGE;
1085         if (page_idx==glong(npages-2))
1086                 if ((nentr=wordcount%ENTR_PER_PAGE)==0)
1087                         nentr=ENTR_PER_PAGE;
1088
1089
1090         if (page_idx!=page.idx) {
1091                 page_data.resize(oft_file.wordoffset[page_idx+1]-oft_file.wordoffset[page_idx]);
1092                 fseek(synfile, oft_file.wordoffset[page_idx], SEEK_SET);
1093                 fread(&page_data[0], 1, page_data.size(), synfile);
1094                 page.fill(&page_data[0], nentr, page_idx);
1095         }
1096
1097         return nentr;
1098 }
1099
1100 const gchar *synonym_file::get_key(glong idx)
1101 {
1102         load_page(idx/ENTR_PER_PAGE);
1103         glong idx_in_page=idx%ENTR_PER_PAGE;
1104         wordentry_index=page.entries[idx_in_page].index;
1105
1106         return page.entries[idx_in_page].keystr;
1107 }
1108
1109 bool synonym_file::lookup(const char *str, glong &idx, glong &idx_suggest)
1110 {
1111         bool bFound=false;
1112         glong iFrom;
1113         glong iTo=npages-2;
1114         gint cmpint;
1115         glong iThisIndex;
1116         if (stardict_strcmp(str, first.keystr.c_str())<0) {
1117                 idx = 0;
1118                 idx_suggest = 0;
1119                 return false;
1120         } else if (stardict_strcmp(str, real_last.keystr.c_str()) >0) {
1121                 idx = INVALID_INDEX;
1122                 idx_suggest = iTo;
1123                 return false;
1124         } else {
1125                 iFrom=0;
1126                 iThisIndex=0;
1127                 while (iFrom<=iTo) {
1128                         iThisIndex=(iFrom+iTo)/2;
1129                         cmpint = stardict_strcmp(str, get_first_on_page_key(iThisIndex));
1130                         if (cmpint>0)
1131                                 iFrom=iThisIndex+1;
1132                         else if (cmpint<0)
1133                                 iTo=iThisIndex-1;
1134                         else {
1135                                 bFound=true;
1136                                 break;
1137                         }
1138                 }
1139                 if (!bFound)
1140                         idx = iTo;    //prev
1141                 else
1142                         idx = iThisIndex;
1143         }
1144         if (!bFound) {
1145                 gulong netr=load_page(idx);
1146                 iFrom=1; // Needn't search the first word anymore.
1147                 iTo=netr-1;
1148                 iThisIndex=0;
1149                 while (iFrom<=iTo) {
1150                         iThisIndex=(iFrom+iTo)/2;
1151                         cmpint = stardict_strcmp(str, page.entries[iThisIndex].keystr);
1152                         if (cmpint>0)
1153                                 iFrom=iThisIndex+1;
1154                         else if (cmpint<0)
1155                                 iTo=iThisIndex-1;
1156                         else {
1157                                 bFound=true;
1158                                 break;
1159                         }
1160                 }
1161                 idx*=ENTR_PER_PAGE;
1162                 if (!bFound) {
1163                         idx += iFrom;    //next
1164                         idx_suggest = idx;
1165                         gint best, back;
1166                         best = prefix_match (str, page.entries[idx_suggest % ENTR_PER_PAGE].keystr);
1167                         for (;;) {
1168                                 if ((iTo=idx_suggest-1) < 0)
1169                                         break;
1170                                 if (idx_suggest % ENTR_PER_PAGE == 0)
1171                                         load_page(iTo / ENTR_PER_PAGE);
1172                                 back = prefix_match (str, page.entries[iTo % ENTR_PER_PAGE].keystr);
1173                                 if (!back || back < best)
1174                                         break;
1175                                 best = back;
1176                                 idx_suggest = iTo;
1177                         }
1178                 } else {
1179                         idx += iThisIndex;
1180                         idx_suggest = idx;
1181                 }
1182         } else {
1183                 idx*=ENTR_PER_PAGE;
1184                 idx_suggest = idx;
1185         }
1186         return bFound;
1187 }
1188
1189 //===================================================================
1190 Dict::Dict()
1191 {
1192         storage = NULL;
1193 }
1194
1195 Dict::~Dict()
1196 {
1197         delete storage;
1198 }
1199
1200 bool Dict::load(const std::string& ifofilename, bool CreateCacheFile,
1201                 int EnableCollationLevel, CollateFunctions CollateFunction,
1202                 show_progress_t *sp)
1203 {
1204         gulong idxfilesize;
1205         glong wordcount, synwordcount;
1206         if (!load_ifofile(ifofilename, idxfilesize, wordcount, synwordcount))
1207                 return false;
1208         sp->notify_about_start(_("Loading..."));
1209         std::string fullfilename(ifofilename);
1210         fullfilename.replace(fullfilename.length()-sizeof("ifo")+1, sizeof("ifo")-1, "dict.dz");
1211
1212         if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) {
1213                 dictdzfile.reset(new dictData);
1214                 if (!dictdzfile->open(fullfilename, 0)) {
1215                         //g_print("open file %s failed!\n",fullfilename);
1216                         return false;
1217                 }
1218         } else {
1219                 fullfilename.erase(fullfilename.length()-sizeof(".dz")+1, sizeof(".dz")-1);
1220                 dictfile = fopen(fullfilename.c_str(),"rb");
1221                 if (!dictfile) {
1222                         //g_print("open file %s failed!\n",fullfilename);
1223                         return false;
1224                 }
1225         }
1226
1227         fullfilename=ifofilename;
1228         fullfilename.replace(fullfilename.length()-sizeof("ifo")+1, sizeof("ifo")-1, "idx.gz");
1229
1230         if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) {
1231                 idx_file.reset(new wordlist_index);
1232         } else {
1233                 fullfilename.erase(fullfilename.length()-sizeof(".gz")+1, sizeof(".gz")-1);
1234                 idx_file.reset(new offset_index);
1235         }
1236
1237         if (!idx_file->load(fullfilename, wordcount, idxfilesize,
1238                             CreateCacheFile, EnableCollationLevel,
1239                             CollateFunction, sp))
1240                 return false;
1241
1242         if (synwordcount) {
1243                 fullfilename=ifofilename;
1244                 fullfilename.replace(fullfilename.length()-sizeof("ifo")+1, sizeof("ifo")-1, "syn");
1245                 if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) {
1246                         syn_file.reset(new synonym_file);
1247                         if (!syn_file->load(fullfilename, synwordcount,
1248                                             CreateCacheFile, EnableCollationLevel,
1249                                             CollateFunction, sp))
1250                                 return false;
1251                 }
1252         }
1253
1254         bool has_res = false;
1255         gchar *dirname = g_path_get_dirname(ifofilename.c_str());
1256         fullfilename = dirname;
1257         fullfilename += G_DIR_SEPARATOR_S "res";
1258         if (g_file_test(fullfilename.c_str(), G_FILE_TEST_IS_DIR)) {
1259                 has_res = true;
1260         } else {
1261                 fullfilename = dirname;
1262                 fullfilename += G_DIR_SEPARATOR_S "res.rifo";
1263                 if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) {
1264                         has_res = true;
1265                 }
1266         }
1267         if (has_res) {
1268                 storage = new ResourceStorage();
1269                 bool failed = storage->load(dirname);
1270                 if (failed) {
1271                         delete storage;
1272                         storage = NULL;
1273                 }
1274         }
1275         g_free(dirname);
1276
1277         g_print("bookname: %s , wordcount %lu\n", bookname.c_str(), wordcount);
1278         return true;
1279 }
1280
1281 bool Dict::load_ifofile(const std::string& ifofilename, gulong &idxfilesize, glong &wordcount, glong &synwordcount)
1282 {
1283         DictInfo dict_info;
1284         if (!dict_info.load_from_ifo_file(ifofilename, false))
1285                 return false;
1286         if (dict_info.wordcount==0)
1287                 return false;
1288
1289         ifo_file_name=dict_info.ifo_file_name;
1290         bookname=dict_info.bookname;
1291
1292         idxfilesize=dict_info.index_file_size;
1293         wordcount=dict_info.wordcount;
1294         synwordcount=dict_info.synwordcount;
1295
1296         sametypesequence=dict_info.sametypesequence;
1297         dicttype=dict_info.dicttype;
1298
1299         return true;
1300 }
1301
1302 glong Dict::nsynarticles()
1303 {
1304         if (syn_file.get() == NULL)
1305                 return 0;
1306         return syn_file->wordcount;
1307 }
1308
1309 bool Dict::GetWordPrev(glong idx, glong &pidx, bool isidx, int EnableCollationLevel, int servercollatefunc)
1310 {
1311         idxsyn_file *is_file;
1312         if (isidx)
1313                 is_file = idx_file.get();
1314         else
1315                 is_file = syn_file.get();
1316         if (idx==INVALID_INDEX) {
1317                 pidx = is_file->wordcount-1;
1318                 return true;
1319         }
1320         pidx = idx;
1321         gchar *cWord = g_strdup(is_file->getWord(pidx, EnableCollationLevel, servercollatefunc));
1322         const gchar *pWord;
1323         bool found=false;
1324         while (pidx>0) {
1325                 pWord = is_file->getWord(pidx-1, EnableCollationLevel, servercollatefunc);
1326                 if (strcmp(pWord, cWord)!=0) {
1327                         found=true;
1328                         break;
1329                 }
1330                 pidx--;
1331         }
1332         g_free(cWord);
1333         if (found) {
1334                 pidx--;
1335                 return true;
1336         } else {
1337                 return false;
1338         }
1339 }
1340
1341 void Dict::GetWordNext(glong &idx, bool isidx, int EnableCollationLevel, int servercollatefunc)
1342 {
1343         idxsyn_file *is_file;
1344         if (isidx)
1345                 is_file = idx_file.get();
1346         else
1347                 is_file = syn_file.get();
1348         gchar *cWord = g_strdup(is_file->getWord(idx, EnableCollationLevel, servercollatefunc));
1349         const gchar *pWord;
1350         bool found=false;
1351         while (idx < is_file->wordcount-1) {
1352                 pWord = is_file->getWord(idx+1, EnableCollationLevel, servercollatefunc);
1353                 if (strcmp(pWord, cWord)!=0) {
1354                         found=true;
1355                         break;
1356                 }
1357                 idx++;
1358         }
1359         g_free(cWord);
1360         if (found)
1361                 idx++;
1362         else
1363                 idx=INVALID_INDEX;
1364 }
1365
1366 gint Dict::GetOrigWordCount(glong& idx, bool isidx)
1367 {
1368         idxsyn_file *is_file;
1369         if (isidx)
1370                 is_file = idx_file.get();
1371         else
1372                 is_file = syn_file.get();
1373         gchar *cWord = g_strdup(is_file->get_key(idx));
1374         const gchar *pWord;
1375         gint count = 1;
1376         glong idx1 = idx;
1377         while (idx1>0) {
1378                 pWord = is_file->get_key(idx1-1);
1379                 if (strcmp(pWord, cWord)!=0)
1380                         break;
1381                 count++;
1382                 idx1--;
1383         }
1384         glong idx2=idx;
1385         while (idx2<is_file->wordcount-1) {
1386                 pWord = is_file->get_key(idx2+1);
1387                 if (strcmp(pWord, cWord)!=0)
1388                         break;
1389                 count++;
1390                 idx2++;
1391         }
1392         idx=idx1;
1393         g_free(cWord);
1394         return count;
1395 }
1396
1397 bool Dict::LookupSynonym(const char *str, glong &synidx, glong &synidx_suggest, int EnableCollationLevel, int servercollatefunc)
1398 {
1399         if (syn_file.get() == NULL) {
1400                 synidx = UNSET_INDEX;
1401                 synidx_suggest = UNSET_INDEX;
1402                 return false;
1403         }
1404         return syn_file->Lookup(str, synidx, synidx_suggest, EnableCollationLevel, servercollatefunc);
1405 }
1406
1407 bool Dict::LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen)
1408 {
1409         int iIndexCount=0;
1410         for (glong i=0; i<narticles() && iIndexCount<iBuffLen-1; i++)
1411                 // Need to deal with same word in index? But this will slow down processing in most case.
1412                 if (g_pattern_match_string(pspec, idx_file->getWord(i, 0, 0)))
1413                         aIndex[iIndexCount++]=i;
1414         aIndex[iIndexCount]= -1; // -1 is the end.
1415         return (iIndexCount>0);
1416 }
1417
1418 bool Dict::LookupWithRuleSynonym(GPatternSpec *pspec, glong *aIndex, int iBuffLen)
1419 {
1420         if (syn_file.get() == NULL)
1421                 return false;
1422         int iIndexCount=0;
1423         for (glong i=0; i<nsynarticles() && iIndexCount<iBuffLen-1; i++)
1424                 // Need to deal with same word in index? But this will slow down processing in most case.
1425                 if (g_pattern_match_string(pspec, syn_file->getWord(i, 0, 0)))
1426                         aIndex[iIndexCount++]=i;
1427         aIndex[iIndexCount]= -1; // -1 is the end.
1428         return (iIndexCount>0);
1429 }
1430
1431 bool Dict::LookupWithRegex(GRegex *regex, glong *aIndex, int iBuffLen)
1432 {
1433         int iIndexCount=0;
1434         for (glong i=0; i<narticles() && iIndexCount<iBuffLen-1; i++)
1435                 // Need to deal with same word in index? But this will slow down processing in most case.
1436                 if (g_regex_match(regex, idx_file->getWord(i, 0, 0), (GRegexMatchFlags)0, NULL))
1437                         aIndex[iIndexCount++]=i;
1438         aIndex[iIndexCount]= -1; // -1 is the end.
1439         return (iIndexCount>0);
1440 }
1441
1442 bool Dict::LookupWithRegexSynonym(GRegex *regex, glong *aIndex, int iBuffLen)
1443 {
1444         if (syn_file.get() == NULL)
1445                 return false;
1446         int iIndexCount=0;
1447         for (glong i=0; i<nsynarticles() && iIndexCount<iBuffLen-1; i++)
1448                 // Need to deal with same word in index? But this will slow down processing in most case.
1449                 if (g_regex_match(regex, syn_file->getWord(i, 0, 0), (GRegexMatchFlags)0, NULL))
1450                         aIndex[iIndexCount++]=i;
1451         aIndex[iIndexCount]= -1; // -1 is the end.
1452         return (iIndexCount>0);
1453 }
1454
1455 //===================================================================
1456 show_progress_t Libs::default_show_progress;
1457
1458 Libs::Libs(show_progress_t *sp, bool create, int enablelevel, int function)
1459 {
1460 #ifdef SD_SERVER_CODE
1461         root_info_item = NULL;
1462 #endif
1463         set_show_progress(sp);
1464         CreateCacheFile = create;
1465         EnableCollationLevel = enablelevel;
1466         CollateFunction = (CollateFunctions)function;
1467         iMaxFuzzyDistance  = MAX_FUZZY_DISTANCE; //need to read from cfg.
1468         if (EnableCollationLevel == 0) {
1469         } else if (EnableCollationLevel == 1) {
1470                 if (utf8_collate_init(CollateFunction))
1471                         printf("Init collate function failed!\n");
1472         } else if (EnableCollationLevel == 2){
1473                 if (utf8_collate_init_all())
1474                         printf("Init collate functions failed!\n");
1475         }
1476 }
1477
1478 Libs::~Libs()
1479 {
1480 #ifdef SD_SERVER_CODE
1481         if (root_info_item)
1482                 delete root_info_item;
1483 #endif
1484         for (std::vector<Dict *>::iterator p=oLib.begin(); p!=oLib.end(); ++p)
1485                 delete *p;
1486         utf8_collate_end();
1487 }
1488
1489 bool Libs::load_dict(const std::string& url, show_progress_t *sp)
1490 {
1491         Dict *lib=new Dict;
1492         if (lib->load(url, CreateCacheFile, EnableCollationLevel,
1493                       CollateFunction, sp)) {
1494                 oLib.push_back(lib);
1495                 return true;
1496         } else {
1497                 delete lib;
1498                 return false;
1499         }
1500 }
1501
1502 #ifdef SD_SERVER_CODE
1503 void Libs::LoadFromXML()
1504 {
1505         root_info_item = new DictInfoItem();
1506         root_info_item->isdir = 1;
1507         root_info_item->dir = new DictInfoDirItem();
1508         root_info_item->dir->name='/';
1509         LoadXMLDir("/usr/share/stardict/dic", root_info_item);
1510         GenLinkDict(root_info_item);
1511 }
1512
1513 void Libs::GenLinkDict(DictInfoItem *info_item)
1514 {
1515         std::list<std::list<DictInfoItem *>::iterator> eraselist;
1516         for (std::list<DictInfoItem *>::iterator i = info_item->dir->info_item_list.begin(); i!= info_item->dir->info_item_list.end(); ++i) {
1517                 if ((*i)->isdir == 1) {
1518                         GenLinkDict(*i);
1519                 } else if ((*i)->isdir == 2) {
1520                         std::map<std::string, DictInfoDictItem *>::iterator uid_iter;
1521                         uid_iter = uidmap.find(*((*i)->linkuid));
1522                         if (uid_iter!=uidmap.end()) {
1523                                 delete (*i)->linkuid;
1524                                 (*i)->dict = uid_iter->second;
1525                         } else {
1526                                 g_print("Error, linkdict uid not found! %s\n", (*i)->linkuid->c_str());
1527                                 delete (*i)->linkuid;
1528                                 eraselist.push_back(i);
1529                         }
1530                 }
1531         }
1532         for (std::list<std::list<DictInfoItem *>::iterator>::iterator i = eraselist.begin(); i!= eraselist.end(); ++i) {
1533                 info_item->dir->info_item_list.erase(*i);
1534         }
1535 }
1536
1537 void Libs::func_parse_start_element(GMarkupParseContext *context, const gchar *element_name, const gchar **attribute_names, const gchar **attribute_values, gpointer user_data, GError **error)
1538 {
1539         if (strcmp(element_name, "dict")==0) {
1540                 ParseUserData *Data = (ParseUserData *)user_data;
1541                 Data->indict = true;
1542                 Data->path.clear();
1543                 Data->uid.clear();
1544                 Data->level.clear();
1545                 Data->download.clear();
1546                 Data->from.clear();
1547                 Data->to.clear();
1548         } else if (strcmp(element_name, "linkdict")==0) {
1549                 ParseUserData *Data = (ParseUserData *)user_data;
1550                 Data->inlinkdict = true;
1551                 Data->linkuid.clear();
1552         }
1553 }
1554
1555 void Libs::func_parse_end_element(GMarkupParseContext *context, const gchar *element_name, gpointer user_data, GError **error)
1556 {
1557         if (strcmp(element_name, "dict")==0) {
1558                 ParseUserData *Data = (ParseUserData *)user_data;
1559                 Data->indict = false;
1560                 if (!Data->path.empty() && !Data->uid.empty()) {
1561                         std::string url;
1562                         url = Data->dir;
1563                         url += G_DIR_SEPARATOR;
1564                         url += Data->path;
1565                         if (Data->oLibs->load_dict(url, Data->oLibs->show_progress)) {
1566                                 DictInfoItem *sub_info_item = new DictInfoItem();
1567                                 sub_info_item->isdir = 0;
1568                                 sub_info_item->dict = new DictInfoDictItem();
1569                                 sub_info_item->dict->uid = Data->uid;
1570                                 sub_info_item->dict->download = Data->download;
1571                                 sub_info_item->dict->from = Data->from;
1572                                 sub_info_item->dict->to = Data->to;
1573                                 if (Data->level.empty())
1574                                         sub_info_item->dict->level = 0;
1575                                 else
1576                                         sub_info_item->dict->level = atoi(Data->level.c_str());
1577                                 sub_info_item->dict->id = Data->oLibs->oLib.size()-1;
1578                                 Data->info_item->dir->info_item_list.push_back(sub_info_item);
1579                                 Data->oLibs->uidmap[Data->uid] = sub_info_item->dict;
1580                         }
1581                 }
1582         } else if (strcmp(element_name, "linkdict")==0) {
1583                 ParseUserData *Data = (ParseUserData *)user_data;
1584                 Data->inlinkdict = false;
1585                 if (!Data->linkuid.empty()) {
1586                         DictInfoItem *sub_info_item = new DictInfoItem();
1587                         sub_info_item->isdir = 2;
1588                         sub_info_item->linkuid = new std::string(Data->linkuid);
1589                         Data->info_item->dir->info_item_list.push_back(sub_info_item);
1590                 }
1591         }
1592 }
1593
1594 void Libs::func_parse_text(GMarkupParseContext *context, const gchar *text, gsize text_len, gpointer user_data, GError **error)
1595 {
1596         const gchar *element = g_markup_parse_context_get_element(context);
1597         if (!element)
1598                 return;
1599         ParseUserData *Data = (ParseUserData *)user_data;
1600         if (strcmp(element, "subdir")==0) {
1601                 std::string subdir;
1602                 subdir = Data->dir;
1603                 subdir += G_DIR_SEPARATOR;
1604                 subdir.append(text, text_len);
1605                 DictInfoItem *sub_info_item = new DictInfoItem();
1606                 sub_info_item->isdir = 1;
1607                 sub_info_item->dir = new DictInfoDirItem();
1608                 sub_info_item->dir->name.assign(text, text_len);
1609                 Data->oLibs->LoadXMLDir(subdir.c_str(), sub_info_item);
1610                 Data->info_item->dir->info_item_list.push_back(sub_info_item);
1611         } else if (strcmp(element, "dirname")==0) {
1612                 Data->info_item->dir->dirname.assign(text, text_len);
1613         } else if (strcmp(element, "path")==0) {
1614                 Data->path.assign(text, text_len);
1615         } else if (strcmp(element, "uid")==0) {
1616                 if (Data->indict) {
1617                         std::string uid(text, text_len);
1618                         if (uid.find_first_of(' ')!=std::string::npos) {
1619                                 g_print("Error: uid contains space! %s: %s\n", Data->dir, uid.c_str());
1620                         } else {
1621                                 std::map<std::string, DictInfoDictItem *>::iterator uid_iter;
1622                                 uid_iter = Data->oLibs->uidmap.find(uid);
1623                                 if (uid_iter!=Data->oLibs->uidmap.end()) {
1624                                         g_print("Error: uid duplicated! %s: %s\n", Data->dir, uid.c_str());
1625                                 } else {
1626                                         Data->uid = uid;
1627                                 }
1628                         }
1629                 } else if (Data->inlinkdict) {
1630                         Data->linkuid.assign(text, text_len);
1631                 }
1632         } else if (strcmp(element, "level")==0) {
1633                 Data->level.assign(text, text_len);
1634         } else if (strcmp(element, "download")==0) {
1635                 Data->download.assign(text, text_len);
1636         } else if (strcmp(element, "from")==0) {
1637                 Data->from.assign(text, text_len);
1638         } else if (strcmp(element, "to")==0) {
1639                 Data->to.assign(text, text_len);
1640         }
1641 }
1642
1643 void Libs::LoadXMLDir(const char *dir, DictInfoItem *info_item)
1644 {
1645         std::string filename;
1646         filename = dir;
1647         filename += G_DIR_SEPARATOR_S "stardictd.xml";
1648         struct stat filestat;
1649         if (g_stat(filename.c_str(), &filestat)!=0)
1650                 return;
1651         MapFile mf;
1652         if (!mf.open(filename.c_str(), filestat.st_size))
1653                 return;
1654         ParseUserData Data;
1655         Data.oLibs = this;
1656         Data.dir = dir;
1657         Data.info_item = info_item;
1658         Data.indict = false;
1659         Data.inlinkdict = false;
1660         GMarkupParser parser;
1661         parser.start_element = func_parse_start_element;
1662         parser.end_element = func_parse_end_element;
1663         parser.text = func_parse_text;
1664         parser.passthrough = NULL;
1665         parser.error = NULL;
1666         GMarkupParseContext* context = g_markup_parse_context_new(&parser, (GMarkupParseFlags)0, &Data, NULL);
1667         g_markup_parse_context_parse(context, mf.begin(), filestat.st_size, NULL);
1668         g_markup_parse_context_end_parse(context, NULL);
1669         g_markup_parse_context_free(context);
1670         mf.close();
1671         info_item->dir->dictcount = 0;
1672         for (std::list<DictInfoItem *>::iterator i = info_item->dir->info_item_list.begin(); i!= info_item->dir->info_item_list.end(); ++i) {
1673                 if ((*i)->isdir == 1) {
1674                         info_item->dir->dictcount += (*i)->dir->dictcount;
1675                 } else if ((*i)->isdir == 0) {
1676                         info_item->dir->dictcount++;
1677                 }
1678         }
1679 }
1680
1681 const std::string &Libs::get_fromto_info() {
1682         if(cache_fromto.empty()){
1683                 std::map<std::string, std::list<FromTo> > map_fromto;
1684                 gen_fromto_info(root_info_item, map_fromto);
1685                 cache_fromto+="<lang>";
1686                 for (std::map<std::string, std::list<FromTo> >::iterator map_it = map_fromto.begin(); map_it != map_fromto.end(); ++map_it){
1687                         cache_fromto+="<from lang=\"";
1688                         cache_fromto+=map_it->first;
1689                         cache_fromto+="\">";
1690                         std::list<FromTo> &fromTo = map_it->second;
1691                         for (std::list<FromTo>::iterator i = fromTo.begin() ; i!= fromTo.end(); ++i){
1692                                 cache_fromto+="<to lang=\"";
1693                                 cache_fromto+= i->to;
1694                                 cache_fromto+="\">";
1695                                 std::list<FromToInfo> &fromtoinfo = i->fromto_info;
1696                                 for (std::list<FromToInfo>::iterator j = fromtoinfo.begin() ; j!= fromtoinfo.end(); ++j){
1697                                         cache_fromto+="<dict><uid>";
1698                                         cache_fromto+=j->uid;
1699                                         cache_fromto+="</uid><bookname>";
1700                                         cache_fromto+= j->bookname;
1701                                         cache_fromto+="</bookname></dict>";
1702                                 }
1703                                 cache_fromto+="</to>";
1704                         }
1705                         cache_fromto+="</from>";
1706                 }
1707                 cache_fromto+="</lang>";
1708         }
1709         return cache_fromto;
1710 }
1711
1712 void Libs::gen_fromto_info(struct DictInfoItem *info_item, std::map<std::string, std::list<FromTo> > &map_fromto) {
1713         gchar *etext;
1714         for(std::list<DictInfoItem *>::iterator i = info_item->dir->info_item_list.begin() ; i!= info_item->dir->info_item_list.end(); ++i){
1715                 if ((*i)->isdir == 1) {
1716                         gen_fromto_info((*i), map_fromto);
1717                 } else {
1718                         std::string from_str     = (*i)->dict->from;
1719                         std::string to_str       = (*i)->dict->to;
1720                         if(from_str.empty() || to_str.empty()){
1721                                 continue;
1722                         }
1723                         std::string uid_str      = (*i)->dict->uid;
1724                         etext = g_markup_escape_text(oLib[(*i)->dict->id]->dict_name().c_str(), -1);
1725                         std::string bookname_str = etext;
1726                         g_free(etext);
1727                         std::map<std::string, std::list<FromTo> >::iterator fromto1 = map_fromto.find(from_str);
1728                         if (fromto1==map_fromto.end()) {
1729                                 //if an from_str element not already in map,  add new from_str to map
1730                                 FromToInfo fromtoinfo;
1731                                 fromtoinfo.uid = uid_str;
1732                                 fromtoinfo.bookname = bookname_str;
1733                                 std::list<FromToInfo> list_fromtoinfo ;
1734                                 list_fromtoinfo.push_back(fromtoinfo);
1735                                 FromTo new_fromTo;
1736                                 new_fromTo.to = to_str;
1737                                 new_fromTo.fromto_info = list_fromtoinfo;
1738                                 std::list<FromTo> list_fromTo;
1739                                 list_fromTo.push_back(new_fromTo);
1740                                 map_fromto[from_str] = list_fromTo;
1741                         } else {
1742                                 // else if from_str already in map, so comparison to_str and from_to1 , then choose insert.
1743                                 std::list<FromTo> &fromTo_list = fromto1->second;
1744                                 std::string from_name1 = fromto1->first;
1745                                 bool found = false;
1746                                 for (std::list<FromTo>::iterator new_fromTo = fromTo_list.begin(); new_fromTo != fromTo_list.end(); ++new_fromTo) {
1747                                         if(to_str == new_fromTo->to) {
1748                                                 std::list<FromToInfo> &fromtoinfo1 = new_fromTo->fromto_info;
1749                                                 FromToInfo fromtoinfo;
1750                                                 fromtoinfo.uid = uid_str;
1751                                                 fromtoinfo.bookname = bookname_str;
1752                                                 fromtoinfo1.push_back(fromtoinfo);
1753                                                 found = true;
1754                                                 break;
1755                                         }
1756                                 }
1757                                 if(!found){
1758                                         FromToInfo fromtoinfo;
1759                                         fromtoinfo.uid = uid_str;
1760                                         fromtoinfo.bookname = bookname_str;
1761                                         std::list<FromToInfo> fromtoinfo1;
1762                                         fromtoinfo1.push_back(fromtoinfo);
1763                                         FromTo fromTo;
1764                                         fromTo.to = to_str;
1765                                         fromTo.fromto_info = fromtoinfo1;
1766                                         fromTo_list.push_back(fromTo);
1767                                 }                               
1768                         }
1769                 }
1770         }
1771 }
1772
1773 const std::string *Libs::get_dir_info(const char *path)
1774 {
1775         if (path[0]!='/')
1776                 return NULL;
1777         DictInfoItem *info_item = root_info_item;
1778         std::string item;
1779         const char *p = path+1;
1780         const char *p1;
1781         bool found;
1782         do {
1783                 p1 = strchr(p, '/');
1784                 if (p1) {
1785                         item.assign(p, p1-p);
1786                         if (!item.empty()) {
1787                                 found = false;
1788                                 for (std::list<DictInfoItem *>::iterator i = info_item->dir->info_item_list.begin(); i!= info_item->dir->info_item_list.end(); ++i) {
1789                                         if ((*i)->isdir == 1) {
1790                                                 if ((*i)->dir->name == item) {
1791                                                         info_item = (*i);
1792                                                         found = true;
1793                                                         break;
1794                                                 }
1795                                         }
1796                                 }
1797                                 if (!found)
1798                                         return NULL;
1799                         }
1800                         p = p1+1;
1801                 }
1802         } while (p1);
1803         if (*p)
1804                 return NULL; // Not end by '/'.
1805         DictInfoDirItem *dir = info_item->dir;
1806         if (dir->info_string.empty()) {
1807                 dir->info_string += "<parent>";
1808                 dir->info_string += path;
1809                 dir->info_string += "</parent>";
1810                 gchar *etext;
1811                 for (std::list<DictInfoItem *>::iterator i = info_item->dir->info_item_list.begin(); i!= info_item->dir->info_item_list.end(); ++i) {
1812                         if ((*i)->isdir == 1) {
1813                                 dir->info_string += "<dir><name>";
1814                                 dir->info_string += (*i)->dir->name;
1815                                 dir->info_string += "</name><dirname>";
1816                                 dir->info_string += (*i)->dir->dirname;
1817                                 dir->info_string += "</dirname><dictcount>";
1818                                 gchar *dictcount = g_strdup_printf("%u", (*i)->dir->dictcount);
1819                                 dir->info_string += dictcount;
1820                                 g_free(dictcount);
1821                                 dir->info_string += "</dictcount></dir>";
1822                         } else {
1823                                 dir->info_string += "<dict>";
1824                                 if ((*i)->isdir == 2)
1825                                         dir->info_string += "<islink>1</islink>";
1826                                 if ((*i)->dict->level != 0) {
1827                                         dir->info_string += "<level>";
1828                                         gchar *level = g_strdup_printf("%u", (*i)->dict->level);
1829                                         dir->info_string += level;
1830                                         g_free(level);
1831                                         dir->info_string += "</level>";
1832                                 }
1833                                 dir->info_string += "<uid>";
1834                                 dir->info_string += (*i)->dict->uid;
1835                                 dir->info_string += "</uid><bookname>";
1836                                 etext = g_markup_escape_text(oLib[(*i)->dict->id]->dict_name().c_str(), -1);
1837                                 dir->info_string += etext;
1838                                 g_free(etext);
1839                                 dir->info_string += "</bookname><wordcount>";
1840                                 gchar *wc = g_strdup_printf("%ld", oLib[(*i)->dict->id]->narticles());
1841                                 dir->info_string += wc;
1842                                 g_free(wc);
1843                                 dir->info_string += "</wordcount></dict>";
1844                         }
1845                 }
1846         }
1847         return &(dir->info_string);
1848 }
1849
1850 int Libs::get_dict_level(const char *uid)
1851 {
1852         std::map<std::string, DictInfoDictItem *>::iterator uid_iter;
1853         uid_iter = uidmap.find(uid);
1854         if (uid_iter==uidmap.end())
1855                 return -1;
1856         return uid_iter->second->level;
1857 }
1858
1859 std::string Libs::get_dicts_list(const char *dictmask, int max_dict_count, int userLevel)
1860 {
1861         std::list<std::string> uid_list;
1862         std::string uid;
1863         const char *p, *p1;
1864         p = dictmask;
1865         do {
1866                 p1 = strchr(p, ' ');
1867                 if (p1) {
1868                         uid.assign(p, p1-p);
1869                         if (!uid.empty())
1870                                 uid_list.push_back(uid);
1871                         p = p1+1;
1872                 }
1873         } while (p1);
1874         uid = p;
1875         if (!uid.empty())
1876                 uid_list.push_back(uid);
1877
1878         std::string dictmask_str;
1879         int count = 0;
1880         const std::string *info_string;
1881         int level;
1882         for (std::list<std::string>::iterator i = uid_list.begin(); i!= uid_list.end(); ++i) {
1883                 level = get_dict_level((*i).c_str());
1884                 if (level < 0 || level > userLevel)
1885                         continue;
1886                 info_string = get_dict_info(i->c_str(), true);
1887                 if (info_string) {
1888                         if (count>=max_dict_count)
1889                                 break;
1890                         dictmask_str += info_string->c_str();
1891                         count++;
1892                 }
1893         }
1894         return dictmask_str;
1895 }
1896
1897 const std::string *Libs::get_dict_info(const char *uid, bool is_short)
1898 {
1899         std::map<std::string, DictInfoDictItem *>::iterator uid_iter;
1900         uid_iter = uidmap.find(uid);
1901         if (uid_iter==uidmap.end())
1902                 return NULL;
1903         DictInfoDictItem *dict;
1904         dict = uid_iter->second;
1905         if (is_short) {
1906                 if (dict->short_info_string.empty()) {
1907                         gchar *etext;
1908                         dict->short_info_string += "<dict><uid>";
1909                         dict->short_info_string += uid;
1910                         dict->short_info_string += "</uid><bookname>";
1911                         etext = g_markup_escape_text(oLib[dict->id]->dict_name().c_str(), -1);
1912                         dict->short_info_string += etext;
1913                         g_free(etext);
1914                         dict->short_info_string += "</bookname><wordcount>";
1915                         gchar *wc = g_strdup_printf("%ld", oLib[dict->id]->narticles());
1916                         dict->short_info_string += wc;
1917                         g_free(wc);
1918                         dict->short_info_string += "</wordcount></dict>";
1919                 }
1920                 return &(dict->short_info_string);
1921         } else {
1922                 if (dict->info_string.empty()) {
1923                         gchar *etext;
1924                         DictInfo dict_info;
1925                         if (!dict_info.load_from_ifo_file(oLib[dict->id]->ifofilename(), false))
1926                                 return NULL;
1927                         dict->info_string += "<dictinfo><bookname>";
1928                         etext = g_markup_escape_text(dict_info.bookname.c_str(), -1);
1929                         dict->info_string += etext;
1930                         g_free(etext);
1931                         dict->info_string += "</bookname><wordcount>";
1932                         gchar *wc = g_strdup_printf("%u", dict_info.wordcount);
1933                         dict->info_string += wc;
1934                         g_free(wc);
1935                         dict->info_string += "</wordcount>";
1936                         if (dict_info.synwordcount!=0) {
1937                                 dict->info_string += "<synwordcount>";
1938                                 wc = g_strdup_printf("%u", dict_info.synwordcount);
1939                                 dict->info_string += wc;
1940                                 g_free(wc);
1941                                 dict->info_string += "</synwordcount>";
1942                         }
1943                         dict->info_string += "<author>";
1944                         etext = g_markup_escape_text(dict_info.author.c_str(), -1);
1945                         dict->info_string += etext;
1946                         g_free(etext);
1947                         dict->info_string += "</author><email>";
1948                         etext = g_markup_escape_text(dict_info.email.c_str(), -1);
1949                         dict->info_string += etext;
1950                         g_free(etext);
1951                         dict->info_string += "</email><website>";
1952                         etext = g_markup_escape_text(dict_info.website.c_str(), -1);
1953                         dict->info_string += etext;
1954                         g_free(etext);
1955                         dict->info_string += "</website><description>";
1956                         etext = g_markup_escape_text(dict_info.description.c_str(), -1);
1957                         dict->info_string += etext;
1958                         g_free(etext);
1959                         dict->info_string += "</description><date>";
1960                         etext = g_markup_escape_text(dict_info.date.c_str(), -1);
1961                         dict->info_string += etext;
1962                         g_free(etext);
1963                         dict->info_string += "</date><download>";
1964                         etext = g_markup_escape_text(dict->download.c_str(), -1);
1965                         dict->info_string += etext;
1966                         g_free(etext);
1967                         dict->info_string += "</download></dictinfo>";
1968                 }
1969                 return &(dict->info_string);
1970         }
1971 }
1972
1973 void Libs::SetServerDictMask(std::vector<InstantDictIndex> &dictmask, const char *dicts, int max, int userLevel)
1974 {
1975         InstantDictIndex instance_dict_index;
1976         instance_dict_index.type = InstantDictType_LOCAL;
1977         dictmask.clear();
1978         std::list<std::string> uid_list;
1979         std::string uid;
1980         const char *p, *p1;
1981         p = dicts;
1982         do {
1983                 p1 = strchr(p, ' ');
1984                 if (p1) {
1985                         uid.assign(p, p1-p);
1986                         if (!uid.empty())
1987                                 uid_list.push_back(uid);
1988                         p = p1+1;
1989                 }
1990         } while (p1);
1991         uid = p;
1992         if (!uid.empty())
1993                 uid_list.push_back(uid);
1994         int count = 0;
1995         std::map<std::string, DictInfoDictItem *>::iterator uid_iter;
1996         for (std::list<std::string>::iterator i = uid_list.begin(); i!= uid_list.end(); ++i) {
1997                 uid_iter = uidmap.find(*i);
1998                 if (uid_iter!=uidmap.end()) {
1999                         if (max>=0 && count >= max)
2000                                 break;
2001                         if (userLevel>=0 && (unsigned int)userLevel< uid_iter->second->level)
2002                                 continue;
2003                         instance_dict_index.index = uid_iter->second->id;
2004                         dictmask.push_back(instance_dict_index);
2005                         count++;
2006                 }
2007         }
2008 }
2009
2010 void Libs::LoadCollateFile(std::vector<InstantDictIndex> &dictmask, CollateFunctions cltfuc)
2011 {
2012         for (std::vector<InstantDictIndex>::iterator i = dictmask.begin(); i!=dictmask.end(); ++i) {
2013                 if ((*i).type == InstantDictType_LOCAL) {
2014                         oLib[(*i).index]->idx_file->collate_load(cltfuc);
2015                         if (oLib[(*i).index]->syn_file.get() != NULL)
2016                                 oLib[(*i).index]->syn_file->collate_load(cltfuc);
2017                 }
2018         }
2019 }
2020 #endif
2021
2022 #ifdef SD_CLIENT_CODE
2023 bool Libs::find_lib_by_filename(const char *filename, size_t &iLib)
2024 {
2025         for (std::vector<Dict *>::size_type i =0; i < oLib.size(); i++) {
2026                 if (oLib[i]->ifofilename() == filename) {
2027                         iLib = i;
2028                         return true;
2029                 }
2030         }
2031         return false;
2032 }
2033
2034 void Libs::load(std::list<std::string> &load_list)
2035 {
2036         for (std::list<std::string>::iterator i = load_list.begin(); i != load_list.end(); ++i) {
2037                 load_dict(*i, show_progress);
2038         }
2039 }
2040
2041 void Libs::reload(std::list<std::string> &load_list, int is_coll_enb, int collf)
2042 {
2043         if (is_coll_enb == EnableCollationLevel && collf == CollateFunction) {
2044                 std::vector<Dict *> prev(oLib);
2045                 oLib.clear();
2046                 for (std::list<std::string>::iterator i = load_list.begin(); i != load_list.end(); ++i) {
2047                         std::vector<Dict *>::iterator it;
2048                         for (it=prev.begin(); it!=prev.end(); ++it) {
2049                                 if ((*it)->ifofilename()==*i)
2050                                         break;
2051                         }
2052                         if (it==prev.end()) {
2053                                 load_dict(*i, show_progress);
2054                         } else {
2055                                 Dict *res=*it;
2056                                 prev.erase(it);
2057                                 oLib.push_back(res);
2058                         }
2059                 }
2060                 for (std::vector<Dict *>::iterator it=prev.begin(); it!=prev.end(); ++it) {
2061                         delete *it;
2062                 }
2063         } else {
2064                 for (std::vector<Dict *>::iterator it = oLib.begin(); it != oLib.end(); ++it)
2065                         delete *it;
2066                 oLib.clear();
2067                 EnableCollationLevel = is_coll_enb;
2068                 CollateFunction = CollateFunctions(collf);
2069                 if (EnableCollationLevel == 0) {
2070                 } else if (EnableCollationLevel == 1) {
2071                         if (utf8_collate_init(CollateFunction))
2072                                 printf("Init collate function failed!\n");
2073                 } else if (EnableCollationLevel == 2) {
2074                         if (utf8_collate_init_all())
2075                                 printf("Init collate functions failed!\n");
2076                 }
2077                 load(load_list);
2078         }
2079 }
2080 #endif
2081
2082 glong Libs::CltIndexToOrig(glong cltidx, size_t iLib, int servercollatefunc)
2083 {
2084         if (EnableCollationLevel == 0)
2085                 return cltidx;
2086         if (EnableCollationLevel == 1) {
2087                 if (cltidx == INVALID_INDEX)
2088                         return cltidx;
2089                 return oLib[iLib]->idx_file->clt_file->GetOrigIndex(cltidx);
2090         }
2091         if (servercollatefunc == 0)
2092                 return cltidx;
2093         if (cltidx == INVALID_INDEX)
2094                 return cltidx;
2095         oLib[iLib]->idx_file->collate_load((CollateFunctions)(servercollatefunc-1));
2096         return oLib[iLib]->idx_file->clt_files[servercollatefunc-1]->GetOrigIndex(cltidx);
2097 }
2098
2099 glong Libs::CltSynIndexToOrig(glong cltidx, size_t iLib, int servercollatefunc)
2100 {
2101         if (EnableCollationLevel == 0)
2102                 return cltidx;
2103         if (EnableCollationLevel == 1) {
2104                 if (cltidx == UNSET_INDEX || cltidx == INVALID_INDEX)
2105                         return cltidx;
2106                 return oLib[iLib]->syn_file->clt_file->GetOrigIndex(cltidx);
2107         }
2108         if (servercollatefunc == 0)
2109                 return cltidx;
2110         if (cltidx == UNSET_INDEX || cltidx == INVALID_INDEX)
2111                 return cltidx;
2112         oLib[iLib]->syn_file->collate_load((CollateFunctions)(servercollatefunc-1));
2113         return oLib[iLib]->syn_file->clt_files[servercollatefunc-1]->GetOrigIndex(cltidx);
2114 }
2115
2116 const gchar *Libs::GetSuggestWord(const gchar *sWord, CurrentIndex *iCurrent, std::vector<InstantDictIndex> &dictmask, int servercollatefunc)
2117 {
2118         const gchar *poCurrentWord = NULL;
2119         const gchar *word;
2120         gint best =0;
2121         gint back;
2122         std::vector<InstantDictIndex>::size_type iLib;
2123         std::vector<Dict *>::size_type iRealLib;
2124         for (iLib=0; iLib < dictmask.size(); iLib++) {
2125                 if (dictmask[iLib].type != InstantDictType_LOCAL)
2126                         continue;
2127                 iRealLib = dictmask[iLib].index;
2128                 if ( poCurrentWord == NULL ) {
2129                         poCurrentWord = poGetWord(iCurrent[iLib].idx_suggest, iRealLib, servercollatefunc);
2130                         best = prefix_match (sWord, poCurrentWord);
2131                 } else {
2132                         word = poGetWord(iCurrent[iLib].idx_suggest, iRealLib, servercollatefunc);
2133                         back = prefix_match (sWord, word);
2134                         if (back > best) {
2135                                 best = back;
2136                                 poCurrentWord = word;
2137                         } else if (back == best) {
2138                                 gint x = stardict_server_collate(poCurrentWord, word, EnableCollationLevel, CollateFunction, servercollatefunc);
2139                                 if (x > 0) {
2140                                         poCurrentWord = word;
2141                                 }
2142                         }
2143                 }
2144         }
2145         for (iLib=0; iLib<dictmask.size(); iLib++) {
2146                 if (dictmask[iLib].type != InstantDictType_LOCAL)
2147                         continue;
2148                 if (iCurrent[iLib].synidx_suggest==UNSET_INDEX)
2149                         continue;
2150                 iRealLib = dictmask[iLib].index;
2151                 if ( poCurrentWord == NULL ) {
2152                         poCurrentWord = poGetSynonymWord(iCurrent[iLib].synidx_suggest, iRealLib, servercollatefunc);
2153                         best = prefix_match (sWord, poCurrentWord);
2154                 } else {
2155                         word = poGetSynonymWord(iCurrent[iLib].synidx_suggest, iRealLib, servercollatefunc);
2156                         back = prefix_match (sWord, word);
2157                         if (back > best) {
2158                                 best = back;
2159                                 poCurrentWord = word;
2160                         } else if (back == best) {
2161                                 gint x = stardict_server_collate(poCurrentWord, word, EnableCollationLevel, CollateFunction, servercollatefunc);
2162                                 if (x > 0) {
2163                                         poCurrentWord = word;
2164                                 }
2165                         }
2166                 }
2167         }
2168         return poCurrentWord;
2169 }
2170
2171 const gchar *Libs::poGetCurrentWord(CurrentIndex * iCurrent, std::vector<InstantDictIndex> &dictmask, int servercollatefunc)
2172 {
2173         const gchar *poCurrentWord = NULL;
2174         const gchar *word;
2175         std::vector<InstantDictIndex>::size_type iLib;
2176         std::vector<Dict *>::size_type iRealLib;
2177         for (iLib=0; iLib < dictmask.size(); iLib++) {
2178                 if (dictmask[iLib].type != InstantDictType_LOCAL)
2179                         continue;
2180                 iRealLib = dictmask[iLib].index;
2181                 if (iCurrent[iLib].idx==INVALID_INDEX)
2182                         continue;
2183                 if ( iCurrent[iLib].idx>=narticles(iRealLib) || iCurrent[iLib].idx<0)
2184                         continue;
2185                 if ( poCurrentWord == NULL ) {
2186                         poCurrentWord = poGetWord(iCurrent[iLib].idx, iRealLib, servercollatefunc);
2187                 } else {
2188                         word = poGetWord(iCurrent[iLib].idx, iRealLib, servercollatefunc);
2189                         gint x = stardict_server_collate(poCurrentWord, word, EnableCollationLevel, CollateFunction, servercollatefunc);
2190                         if (x > 0) {
2191                                 poCurrentWord = word;
2192                         }
2193                 }
2194         }
2195         for (iLib=0; iLib<dictmask.size(); iLib++) {
2196                 if (dictmask[iLib].type != InstantDictType_LOCAL)
2197                         continue;
2198                 iRealLib = dictmask[iLib].index;
2199                 if (iCurrent[iLib].synidx==UNSET_INDEX)
2200                         continue;
2201                 if (iCurrent[iLib].synidx==INVALID_INDEX)
2202                         continue;
2203                 if ( iCurrent[iLib].synidx>=nsynarticles(iRealLib) || iCurrent[iLib].synidx<0)
2204                         continue;
2205                 if ( poCurrentWord == NULL ) {
2206                         poCurrentWord = poGetSynonymWord(iCurrent[iLib].synidx, iRealLib, servercollatefunc);
2207                 } else {
2208                         word = poGetSynonymWord(iCurrent[iLib].synidx, iRealLib, servercollatefunc);
2209                         gint x = stardict_server_collate(poCurrentWord, word, EnableCollationLevel, CollateFunction, servercollatefunc);
2210                         if (x > 0) {
2211                                 poCurrentWord = word;
2212                         }
2213                 }
2214         }
2215         return poCurrentWord;
2216 }
2217
2218 const gchar *
2219 Libs::poGetNextWord(const gchar *sWord, CurrentIndex *iCurrent, std::vector<InstantDictIndex> &dictmask, int servercollatefunc)
2220 {
2221         // the input can be:
2222         // (word,iCurrent),read word,write iNext to iCurrent,and return next word. used by TopWin::NextCallback();
2223         // (NULL,iCurrent),read iCurrent,write iNext to iCurrent,and return next word. used by AppCore::ListWords();
2224         const gchar *poCurrentWord = NULL;
2225         std::vector<Dict *>::size_type iCurrentLib=0, iCurrentRealLib=0;
2226         bool isLib = false;
2227         const gchar *word;
2228
2229         std::vector<InstantDictIndex>::size_type iLib;
2230         std::vector<Dict *>::size_type iRealLib;
2231         for (iLib=0; iLib < dictmask.size(); iLib++) {
2232                 if (dictmask[iLib].type != InstantDictType_LOCAL)
2233                         continue;
2234                 iRealLib = dictmask[iLib].index;
2235                 if (sWord) {
2236                         oLib[iRealLib]->Lookup(sWord, iCurrent[iLib].idx, iCurrent[iLib].idx_suggest, EnableCollationLevel, servercollatefunc);
2237                 }
2238                 if (iCurrent[iLib].idx==INVALID_INDEX)
2239                         continue;
2240                 if (iCurrent[iLib].idx>=narticles(iRealLib) || iCurrent[iLib].idx<0)
2241                         continue;
2242                 if (poCurrentWord == NULL ) {
2243                         poCurrentWord = poGetWord(iCurrent[iLib].idx, iRealLib, servercollatefunc);
2244                         iCurrentLib = iLib;
2245                         iCurrentRealLib = iRealLib;
2246                         isLib=true;
2247                 } else {
2248                         gint x;
2249                         word = poGetWord(iCurrent[iLib].idx, iRealLib, servercollatefunc);
2250                         x = stardict_server_collate(poCurrentWord, word, EnableCollationLevel, CollateFunction, servercollatefunc);
2251                         if (x > 0) {
2252                                 poCurrentWord = word;
2253                                 iCurrentLib = iLib;
2254                                 iCurrentRealLib = iRealLib;
2255                                 isLib=true;
2256                         }
2257                 }
2258         }
2259         for (iLib=0; iLib < dictmask.size(); iLib++) {
2260                 if (dictmask[iLib].type != InstantDictType_LOCAL)
2261                         continue;
2262                 iRealLib = dictmask[iLib].index;
2263                 if (sWord) {
2264                         oLib[iRealLib]->LookupSynonym(sWord, iCurrent[iLib].synidx, iCurrent[iLib].synidx_suggest, EnableCollationLevel, servercollatefunc);
2265                 }
2266                 if (iCurrent[iLib].synidx==UNSET_INDEX)
2267                         continue;
2268                 if (iCurrent[iLib].synidx==INVALID_INDEX)
2269                         continue;
2270                 if (iCurrent[iLib].synidx>=nsynarticles(iRealLib) || iCurrent[iLib].synidx<0)
2271                         continue;
2272                 if (poCurrentWord == NULL ) {
2273                         poCurrentWord = poGetSynonymWord(iCurrent[iLib].synidx, iRealLib, servercollatefunc);
2274                         iCurrentLib = iLib;
2275                         iCurrentRealLib = iRealLib;
2276                         isLib=false;
2277                 } else {
2278                         gint x;
2279                         word = poGetSynonymWord(iCurrent[iLib].synidx, iRealLib, servercollatefunc);
2280                         x = stardict_server_collate(poCurrentWord, word, EnableCollationLevel, CollateFunction, servercollatefunc);
2281                         if (x > 0 ) {
2282                                 poCurrentWord = word;
2283                                 iCurrentLib = iLib;
2284                                 iCurrentRealLib = iRealLib;
2285                                 isLib=false;
2286                         }
2287                 }
2288         }
2289         if (poCurrentWord) {
2290                 for (iLib=0; iLib < dictmask.size(); iLib++) {
2291                         if (dictmask[iLib].type != InstantDictType_LOCAL)
2292                                 continue;
2293                         iRealLib = dictmask[iLib].index;
2294                         if (isLib && (iLib == iCurrentLib))
2295                                 continue;
2296                         if (iCurrent[iLib].idx==INVALID_INDEX)
2297                                 continue;
2298                         if (iCurrent[iLib].idx>=narticles(iRealLib) || iCurrent[iLib].idx<0)
2299                                 continue;
2300                         word = poGetWord(iCurrent[iLib].idx, iRealLib, servercollatefunc);
2301                         if (strcmp(poCurrentWord, word) == 0) {
2302                                 GetWordNext(iCurrent[iLib].idx, iRealLib, true, servercollatefunc);
2303                         }
2304                 }
2305                 for (iLib=0; iLib < dictmask.size(); iLib++) {
2306                         if (dictmask[iLib].type != InstantDictType_LOCAL)
2307                                 continue;
2308                         iRealLib = dictmask[iLib].index;
2309                         if ((!isLib) && (iLib == iCurrentLib))
2310                                 continue;
2311                         if (iCurrent[iLib].synidx==UNSET_INDEX)
2312                                 continue;
2313                         if (iCurrent[iLib].synidx==INVALID_INDEX)
2314                                 continue;
2315                         if (iCurrent[iLib].synidx>=nsynarticles(iRealLib) || iCurrent[iLib].synidx<0)
2316                                 continue;
2317                         word = poGetSynonymWord(iCurrent[iLib].synidx, iRealLib, servercollatefunc);
2318                         if (strcmp(poCurrentWord, word) == 0) {
2319                                 GetWordNext(iCurrent[iLib].synidx, iRealLib, false, servercollatefunc);
2320                         }
2321                 }
2322                 //GetWordNext will change poCurrentWord's content, so do it at the last.
2323                 if (isLib) {
2324                         GetWordNext(iCurrent[iCurrentLib].idx, iCurrentRealLib, true, servercollatefunc);
2325                 } else {
2326                         GetWordNext(iCurrent[iCurrentLib].synidx, iCurrentRealLib, false, servercollatefunc);
2327                 }
2328                 poCurrentWord = poGetCurrentWord(iCurrent, dictmask, servercollatefunc);
2329         }
2330         return poCurrentWord;
2331 }
2332
2333 const gchar *
2334 Libs::poGetPreWord(const gchar *sWord, CurrentIndex* iCurrent, std::vector<InstantDictIndex> &dictmask, int servercollatefunc)
2335 {
2336         // used by TopWin::PreviousCallback(); the iCurrent is cached by AppCore::TopWinWordChange();
2337         const gchar *poCurrentWord = NULL;
2338         std::vector<Dict *>::size_type iCurrentLib=0, iCurrentRealLib=0;
2339         bool isLib = false;
2340
2341         const gchar *word;
2342         glong pidx;
2343         std::vector<InstantDictIndex>::size_type iLib;
2344         std::vector<Dict *>::size_type iRealLib;
2345         for (iLib=0;iLib<dictmask.size();iLib++) {
2346                 if (dictmask[iLib].type != InstantDictType_LOCAL)
2347                         continue;
2348                 iRealLib = dictmask[iLib].index;
2349                 if (sWord) {
2350                         oLib[iRealLib]->Lookup(sWord, iCurrent[iLib].idx, iCurrent[iLib].idx_suggest, EnableCollationLevel, servercollatefunc);
2351                 }
2352                 if (iCurrent[iLib].idx!=INVALID_INDEX) {
2353                         if ( iCurrent[iLib].idx>=narticles(iRealLib) || iCurrent[iLib].idx<=0)
2354                                 continue;
2355                 }
2356                 if ( poCurrentWord == NULL ) {
2357                         if (GetWordPrev(iCurrent[iLib].idx, pidx, iRealLib, true, servercollatefunc)) {
2358                                 poCurrentWord = poGetWord(pidx, iRealLib, servercollatefunc);
2359                                 iCurrentLib = iLib;
2360                                 iCurrentRealLib = iRealLib;
2361                                 isLib=true;
2362                         }
2363                 } else {
2364                         if (GetWordPrev(iCurrent[iLib].idx, pidx, iRealLib, true, servercollatefunc)) {
2365                                 gint x;
2366                                 word = poGetWord(pidx, iRealLib, servercollatefunc);
2367                                 x = stardict_server_collate(poCurrentWord, word, EnableCollationLevel, CollateFunction, servercollatefunc);
2368                                 if (x < 0 ) {
2369                                         poCurrentWord = word;
2370                                         iCurrentLib = iLib;
2371                                         iCurrentRealLib = iRealLib;
2372                                         isLib=true;
2373                                 }
2374                         }
2375                 }
2376         }
2377         for (iLib=0;iLib<dictmask.size();iLib++) {
2378                 if (dictmask[iLib].type != InstantDictType_LOCAL)
2379                         continue;
2380                 iRealLib = dictmask[iLib].index;
2381                 if (sWord) {
2382                         oLib[iRealLib]->LookupSynonym(sWord, iCurrent[iLib].synidx, iCurrent[iLib].synidx_suggest, EnableCollationLevel, servercollatefunc);
2383                 }
2384                 if (iCurrent[iLib].synidx==UNSET_INDEX)
2385                         continue;
2386                 if (iCurrent[iLib].synidx!=INVALID_INDEX) {
2387                         if ( iCurrent[iLib].synidx>=nsynarticles(iRealLib) || iCurrent[iLib].synidx<=0)
2388                                 continue;
2389                 }
2390                 if ( poCurrentWord == NULL ) {
2391                         if (GetWordPrev(iCurrent[iLib].synidx, pidx, iRealLib, false, servercollatefunc)) {
2392                                 poCurrentWord = poGetSynonymWord(pidx, iRealLib, servercollatefunc);
2393                                 iCurrentLib = iLib;
2394                                 iCurrentRealLib = iRealLib;
2395                                 isLib=false;
2396                         }
2397                 } else {
2398                         if (GetWordPrev(iCurrent[iLib].synidx, pidx, iRealLib, false, servercollatefunc)) {
2399                                 gint x;
2400                                 word = poGetSynonymWord(pidx,iRealLib, servercollatefunc);
2401                                 x = stardict_server_collate(poCurrentWord, word, EnableCollationLevel, CollateFunction, servercollatefunc);
2402                                 if (x < 0 ) {
2403                                         poCurrentWord = word;
2404                                         iCurrentLib = iLib;
2405                                         iCurrentRealLib = iRealLib;
2406                                         isLib=false;
2407                                 }
2408                         }
2409                 }
2410         }
2411         if (poCurrentWord) {
2412                 for (iLib=0;iLib<dictmask.size();iLib++) {
2413                         if (dictmask[iLib].type != InstantDictType_LOCAL)
2414                                 continue;
2415                         iRealLib = dictmask[iLib].index;
2416                         if (isLib && (iLib == iCurrentLib))
2417                                 continue;
2418                         if (iCurrent[iLib].idx!=INVALID_INDEX) {
2419                                 if (iCurrent[iLib].idx>=narticles(iRealLib) || iCurrent[iLib].idx<=0)
2420                                         continue;
2421                         }
2422                         if (GetWordPrev(iCurrent[iLib].idx, pidx, iRealLib, true, servercollatefunc)) {
2423                                 word = poGetWord(pidx, iRealLib, servercollatefunc);
2424                                 if (strcmp(poCurrentWord, word) == 0) {
2425                                         iCurrent[iLib].idx=pidx;
2426                                 }
2427                         }
2428                 }
2429                 for (iLib=0;iLib<dictmask.size();iLib++) {
2430                         if (dictmask[iLib].type != InstantDictType_LOCAL)
2431                                 continue;
2432                         iRealLib = dictmask[iLib].index;
2433                         if ((!isLib) && (iLib == iCurrentLib))
2434                                 continue;
2435                         if (iCurrent[iLib].synidx==UNSET_INDEX)
2436                                 continue;
2437                         if (iCurrent[iLib].synidx!=INVALID_INDEX) {
2438                                 if (iCurrent[iLib].synidx>=nsynarticles(iRealLib) || iCurrent[iLib].synidx<=0)
2439                                         continue;
2440                         }
2441                         if (GetWordPrev(iCurrent[iLib].synidx, pidx, iRealLib, false, servercollatefunc)) {
2442                                 word = poGetSynonymWord(pidx, iRealLib, servercollatefunc);
2443                                 if (strcmp(poCurrentWord, word) == 0) {
2444                                         iCurrent[iLib].synidx=pidx;
2445                                 }
2446                         }
2447                 }
2448                 if (isLib) {
2449                         GetWordPrev(iCurrent[iCurrentLib].idx, pidx, iCurrentRealLib, true, servercollatefunc);
2450                         iCurrent[iCurrentLib].idx = pidx;
2451                 } else {
2452                         GetWordPrev(iCurrent[iCurrentLib].synidx, pidx, iCurrentRealLib, false, servercollatefunc);
2453                         iCurrent[iCurrentLib].synidx = pidx;
2454                 }
2455         }
2456         return poCurrentWord;
2457 }
2458
2459 bool Libs::LookupSynonymSimilarWord(const gchar* sWord, glong &iSynonymWordIndex, glong &synidx_suggest, size_t iLib, int servercollatefunc)
2460 {
2461         if (oLib[iLib]->syn_file.get() == NULL)
2462                 return false;
2463
2464         glong iIndex;
2465         glong iIndex_suggest;
2466         bool bFound=false;
2467         gchar *casestr;
2468         bool bLookup;
2469
2470         if (!bFound) {
2471                 // to lower case.
2472                 casestr = g_utf8_strdown(sWord, -1);
2473                 if (strcmp(casestr, sWord)) {
2474                         bLookup = oLib[iLib]->LookupSynonym(casestr, iIndex, iIndex_suggest, EnableCollationLevel, servercollatefunc);
2475                         if(bLookup)
2476                                 bFound=true;
2477                 }
2478                 g_free(casestr);
2479                 // to upper case.
2480                 if (!bFound) {
2481                         casestr = g_utf8_strup(sWord, -1);
2482                         if (strcmp(casestr, sWord)) {
2483                                 bLookup = oLib[iLib]->LookupSynonym(casestr, iIndex, iIndex_suggest, EnableCollationLevel, servercollatefunc);
2484                                 if(bLookup)
2485                                         bFound=true;
2486                         }
2487                         g_free(casestr);
2488                 }
2489                 // Upper the first character and lower others.
2490                 if (!bFound) {
2491                         gchar *nextchar = g_utf8_next_char(sWord);
2492                         gchar *firstchar = g_utf8_strup(sWord, nextchar - sWord);
2493                         nextchar = g_utf8_strdown(nextchar, -1);
2494                         casestr = g_strdup_printf("%s%s", firstchar, nextchar);
2495                         g_free(firstchar);
2496                         g_free(nextchar);
2497                         if (strcmp(casestr, sWord)) {
2498                                 bLookup = oLib[iLib]->LookupSynonym(casestr, iIndex, iIndex_suggest, EnableCollationLevel, servercollatefunc);
2499                                 if(bLookup)
2500                                         bFound=true;
2501                         }
2502                         g_free(casestr);
2503                 }
2504                 if (!bFound) {
2505                         iIndex = iSynonymWordIndex;
2506                         glong pidx;
2507                         const gchar *cword;
2508                         do {
2509                                 if (GetWordPrev(iIndex, pidx, iLib, false, servercollatefunc)) {
2510                                         cword = poGetSynonymWord(pidx, iLib, servercollatefunc);
2511                                         if (stardict_casecmp(cword, sWord, EnableCollationLevel, CollateFunction, servercollatefunc)==0) {
2512                                                 iIndex = pidx;
2513                                                 bFound=true;
2514                                         } else {
2515                                                 break;
2516                                         }
2517                                 } else {
2518                                         break;
2519                                 }
2520                         } while (true);
2521                         if (!bFound) {
2522                                 if (iIndex!=INVALID_INDEX) {
2523                                         cword = poGetSynonymWord(iIndex, iLib, servercollatefunc);
2524                                         if (stardict_casecmp(cword, sWord, EnableCollationLevel, CollateFunction, servercollatefunc)==0) {
2525                                                 bFound=true;
2526                                         }
2527                                 }
2528                         }
2529                 }
2530         }
2531         if (bFound) {
2532                 iSynonymWordIndex = iIndex;
2533                 synidx_suggest = iIndex_suggest;
2534         }
2535         return bFound;
2536 }
2537
2538 bool Libs::LookupSimilarWord(const gchar* sWord, glong & iWordIndex, glong &idx_suggest, size_t iLib, int servercollatefunc)
2539 {
2540         glong iIndex;
2541         bool bFound=false;
2542         gchar *casestr;
2543
2544         if (!bFound) {
2545                 // to lower case.
2546                 casestr = g_utf8_strdown(sWord, -1);
2547                 if (strcmp(casestr, sWord)) {
2548                         if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2549                                 bFound=true;
2550                 }
2551                 g_free(casestr);
2552                 // to upper case.
2553                 if (!bFound) {
2554                         casestr = g_utf8_strup(sWord, -1);
2555                         if (strcmp(casestr, sWord)) {
2556                                 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2557                                         bFound=true;
2558                         }
2559                         g_free(casestr);
2560                 }
2561                 // Upper the first character and lower others.
2562                 if (!bFound) {
2563                         gchar *nextchar = g_utf8_next_char(sWord);
2564                         gchar *firstchar = g_utf8_strup(sWord, nextchar - sWord);
2565                         nextchar = g_utf8_strdown(nextchar, -1);
2566                         casestr = g_strdup_printf("%s%s", firstchar, nextchar);
2567                         g_free(firstchar);
2568                         g_free(nextchar);
2569                         if (strcmp(casestr, sWord)) {
2570                                 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2571                                         bFound=true;
2572                         }
2573                         g_free(casestr);
2574                 }
2575                 if (!bFound) {
2576                         iIndex = iWordIndex;
2577                         glong pidx;
2578                         const gchar *cword;
2579                         do {
2580                                 if (GetWordPrev(iIndex, pidx, iLib, true, servercollatefunc)) {
2581                                         cword = poGetWord(pidx, iLib, servercollatefunc);
2582                                         if (stardict_casecmp(cword, sWord, EnableCollationLevel, CollateFunction, servercollatefunc)==0) {
2583                                                 iIndex = pidx;
2584                                                 bFound=true;
2585                                         } else {
2586                                                 break;
2587                                         }
2588                                 } else {
2589                                         break;
2590                                 }
2591                         } while (true);
2592                         if (!bFound) {
2593                                 if (iIndex!=INVALID_INDEX) {
2594                                         cword = poGetWord(iIndex, iLib, servercollatefunc);
2595                                         if (stardict_casecmp(cword, sWord, EnableCollationLevel, CollateFunction, servercollatefunc)==0) {
2596                                                 bFound=true;
2597                                         }
2598                                 }
2599                         }
2600                 }
2601         }
2602
2603         if (bIsPureEnglish(sWord)) {
2604                 // If not Found , try other status of sWord.
2605                 size_t iWordLen=strlen(sWord);
2606                 bool isupcase;
2607
2608                 gchar *sNewWord = (gchar *)g_malloc(iWordLen + 1);
2609
2610                 //cut one char "s" or "d"
2611                 if(!bFound && iWordLen>1) {
2612                         isupcase = sWord[iWordLen-1]=='S' || !strncmp(&sWord[iWordLen-2],"ED",2);
2613                         if (isupcase || sWord[iWordLen-1]=='s' || !strncmp(&sWord[iWordLen-2],"ed",2)) {
2614                                 strcpy(sNewWord,sWord);
2615                                 sNewWord[iWordLen-1]='\0'; // cut "s" or "d"
2616                                 if (oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2617                                         bFound=true;
2618                                 else if (isupcase || g_ascii_isupper(sWord[0])) {
2619                                         casestr = g_ascii_strdown(sNewWord, -1);
2620                                         if (strcmp(casestr, sNewWord)) {
2621                                                 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2622                                                         bFound=true;
2623                                         }
2624                                         g_free(casestr);
2625                                 }
2626                         }
2627                 }
2628
2629                 //cut "ly"
2630                 if(!bFound && iWordLen>2) {
2631                         isupcase = !strncmp(&sWord[iWordLen-2],"LY",2);
2632                         if (isupcase || (!strncmp(&sWord[iWordLen-2],"ly",2))) {
2633                                 strcpy(sNewWord,sWord);
2634                                 sNewWord[iWordLen-2]='\0';  // cut "ly"
2635                                 if (iWordLen>5 && sNewWord[iWordLen-3]==sNewWord[iWordLen-4]
2636                                     && !bIsVowel(sNewWord[iWordLen-4]) &&
2637                                     bIsVowel(sNewWord[iWordLen-5])) {//doubled
2638
2639                                         sNewWord[iWordLen-3]='\0';
2640                                         if(oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2641                                                 bFound=true;
2642                                         else {
2643                                                 if (isupcase || g_ascii_isupper(sWord[0])) {
2644                                                         casestr = g_ascii_strdown(sNewWord, -1);
2645                                                         if (strcmp(casestr, sNewWord)) {
2646                                                                 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2647                                                                         bFound=true;
2648                                                         }
2649                                                         g_free(casestr);
2650                                                 }
2651                                                 if (!bFound)
2652                                                         sNewWord[iWordLen-3]=sNewWord[iWordLen-4];  //restore
2653                                         }
2654                                 }
2655                                 if (!bFound) {
2656                                         if (oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2657                                                 bFound=true;
2658                                         else if (isupcase || g_ascii_isupper(sWord[0])) {
2659                                                 casestr = g_ascii_strdown(sNewWord, -1);
2660                                                 if (strcmp(casestr, sNewWord)) {
2661                                                         if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2662                                                                 bFound=true;
2663                                                 }
2664                                                 g_free(casestr);
2665                                         }
2666                                 }
2667                         }
2668                 }
2669
2670                 //cut "ing"
2671                 if(!bFound && iWordLen>3) {
2672                         isupcase = !strncmp(&sWord[iWordLen-3],"ING",3);
2673                         if (isupcase || !strncmp(&sWord[iWordLen-3],"ing",3) ) {
2674                                 strcpy(sNewWord,sWord);
2675                                 sNewWord[iWordLen-3]='\0';
2676                                 if ( iWordLen>6 && (sNewWord[iWordLen-4]==sNewWord[iWordLen-5])
2677                                      && !bIsVowel(sNewWord[iWordLen-5]) &&
2678                                      bIsVowel(sNewWord[iWordLen-6])) {  //doubled
2679                                         sNewWord[iWordLen-4]='\0';
2680                                         if (oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2681                                                 bFound=true;
2682                                         else {
2683                                                 if (isupcase || g_ascii_isupper(sWord[0])) {
2684                                                         casestr = g_ascii_strdown(sNewWord, -1);
2685                                                         if (strcmp(casestr, sNewWord)) {
2686                                                                 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2687                                                                         bFound=true;
2688                                                         }
2689                                                         g_free(casestr);
2690                                                 }
2691                                                 if (!bFound)
2692                                                         sNewWord[iWordLen-4]=sNewWord[iWordLen-5];  //restore
2693                                         }
2694                                 }
2695                                 if( !bFound ) {
2696                                         if (oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2697                                                 bFound=true;
2698                                         else if (isupcase || g_ascii_isupper(sWord[0])) {
2699                                                 casestr = g_ascii_strdown(sNewWord, -1);
2700                                                 if (strcmp(casestr, sNewWord)) {
2701                                                         if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2702                                                                 bFound=true;
2703                                                 }
2704                                                 g_free(casestr);
2705                                         }
2706                                 }
2707                                 if(!bFound) {
2708                                         if (isupcase)
2709                                                 strcat(sNewWord,"E"); // add a char "E"
2710                                         else
2711                                                 strcat(sNewWord,"e"); // add a char "e"
2712                                         if(oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2713                                                 bFound=true;
2714                                         else if (isupcase || g_ascii_isupper(sWord[0])) {
2715                                                 casestr = g_ascii_strdown(sNewWord, -1);
2716                                                 if (strcmp(casestr, sNewWord)) {
2717                                                         if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2718                                                                 bFound=true;
2719                                                 }
2720                                                 g_free(casestr);
2721                                         }
2722                                 }
2723                         }
2724                 }
2725
2726                 //cut two char "es"
2727                 if(!bFound && iWordLen>3) {
2728                         isupcase = (!strncmp(&sWord[iWordLen-2],"ES",2) &&
2729                                     (sWord[iWordLen-3] == 'S' ||
2730                                      sWord[iWordLen-3] == 'X' ||
2731                                      sWord[iWordLen-3] == 'O' ||
2732                                      (iWordLen >4 && sWord[iWordLen-3] == 'H' &&
2733                                       (sWord[iWordLen-4] == 'C' ||
2734                                        sWord[iWordLen-4] == 'S'))));
2735                         if (isupcase ||
2736                             (!strncmp(&sWord[iWordLen-2],"es",2) &&
2737                              (sWord[iWordLen-3] == 's' || sWord[iWordLen-3] == 'x' ||
2738                               sWord[iWordLen-3] == 'o' ||
2739                               (iWordLen >4 && sWord[iWordLen-3] == 'h' &&
2740                                (sWord[iWordLen-4] == 'c' || sWord[iWordLen-4] == 's'))))) {
2741                                 strcpy(sNewWord,sWord);
2742                                 sNewWord[iWordLen-2]='\0';
2743                                 if(oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2744                                         bFound=true;
2745                                 else if (isupcase || g_ascii_isupper(sWord[0])) {
2746                                         casestr = g_ascii_strdown(sNewWord, -1);
2747                                         if (strcmp(casestr, sNewWord)) {
2748                                                 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2749                                                         bFound=true;
2750                                         }
2751                                         g_free(casestr);
2752                                 }
2753                         }
2754                 }
2755
2756                 //cut "ed"
2757                 if (!bFound && iWordLen>3) {
2758                         isupcase = !strncmp(&sWord[iWordLen-2],"ED",2);
2759                         if (isupcase || !strncmp(&sWord[iWordLen-2],"ed",2)) {
2760                                 strcpy(sNewWord,sWord);
2761                                 sNewWord[iWordLen-2]='\0';
2762                                 if (iWordLen>5 && (sNewWord[iWordLen-3]==sNewWord[iWordLen-4])
2763                                     && !bIsVowel(sNewWord[iWordLen-4]) &&
2764                                     bIsVowel(sNewWord[iWordLen-5])) {//doubled
2765                                         sNewWord[iWordLen-3]='\0';
2766                                         if (oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2767                                                 bFound=true;
2768                                         else {
2769                                                 if (isupcase || g_ascii_isupper(sWord[0])) {
2770                                                         casestr = g_ascii_strdown(sNewWord, -1);
2771                                                         if (strcmp(casestr, sNewWord)) {
2772                                                                 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2773                                                                         bFound=true;
2774                                                         }
2775                                                         g_free(casestr);
2776                                                 }
2777                                                 if (!bFound)
2778                                                         sNewWord[iWordLen-3]=sNewWord[iWordLen-4];  //restore
2779                                         }
2780                                 }
2781                                 if (!bFound) {
2782                                         if (oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2783                                                 bFound=true;
2784                                         else if (isupcase || g_ascii_isupper(sWord[0])) {
2785                                                 casestr = g_ascii_strdown(sNewWord, -1);
2786                                                 if (strcmp(casestr, sNewWord)) {
2787                                                         if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2788                                                                 bFound=true;
2789                                                 }
2790                                                 g_free(casestr);
2791                                         }
2792                                 }
2793                         }
2794                 }
2795
2796                 // cut "ied" , add "y".
2797                 if (!bFound && iWordLen>3) {
2798                         isupcase = !strncmp(&sWord[iWordLen-3],"IED",3);
2799                         if (isupcase || (!strncmp(&sWord[iWordLen-3],"ied",3))) {
2800                                 strcpy(sNewWord,sWord);
2801                                 sNewWord[iWordLen-3]='\0';
2802                                 if (isupcase)
2803                                         strcat(sNewWord,"Y"); // add a char "Y"
2804                                 else
2805                                         strcat(sNewWord,"y"); // add a char "y"
2806                                 if (oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2807                                         bFound=true;
2808                                 else if (isupcase || g_ascii_isupper(sWord[0])) {
2809                                         casestr = g_ascii_strdown(sNewWord, -1);
2810                                         if (strcmp(casestr, sNewWord)) {
2811                                                 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2812                                                         bFound=true;
2813                                         }
2814                                         g_free(casestr);
2815                                 }
2816                         }
2817                 }
2818
2819                 // cut "ies" , add "y".
2820                 if (!bFound && iWordLen>3) {
2821                         isupcase = !strncmp(&sWord[iWordLen-3],"IES",3);
2822                         if (isupcase || (!strncmp(&sWord[iWordLen-3],"ies",3))) {
2823                                 strcpy(sNewWord,sWord);
2824                                 sNewWord[iWordLen-3]='\0';
2825                                 if (isupcase)
2826                                         strcat(sNewWord,"Y"); // add a char "Y"
2827                                 else
2828                                         strcat(sNewWord,"y"); // add a char "y"
2829                                 if(oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2830                                         bFound=true;
2831                                 else if (isupcase || g_ascii_isupper(sWord[0])) {
2832                                         casestr = g_ascii_strdown(sNewWord, -1);
2833                                         if (strcmp(casestr, sNewWord)) {
2834                                                 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2835                                                         bFound=true;
2836                                         }
2837                                         g_free(casestr);
2838                                 }
2839                         }
2840                 }
2841
2842                 // cut "er".
2843                 if (!bFound && iWordLen>2) {
2844                         isupcase = !strncmp(&sWord[iWordLen-2],"ER",2);
2845                         if (isupcase || (!strncmp(&sWord[iWordLen-2],"er",2))) {
2846                                 strcpy(sNewWord,sWord);
2847                                 sNewWord[iWordLen-2]='\0';
2848                                 if(oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2849                                         bFound=true;
2850                                 else if (isupcase || g_ascii_isupper(sWord[0])) {
2851                                         casestr = g_ascii_strdown(sNewWord, -1);
2852                                         if (strcmp(casestr, sNewWord)) {
2853                                                 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2854                                                         bFound=true;
2855                                         }
2856                                         g_free(casestr);
2857                                 }
2858                         }
2859                 }
2860
2861                 // cut "est".
2862                 if (!bFound && iWordLen>3) {
2863                         isupcase = !strncmp(&sWord[iWordLen-3], "EST", 3);
2864                         if (isupcase || (!strncmp(&sWord[iWordLen-3],"est", 3))) {
2865                                 strcpy(sNewWord,sWord);
2866                                 sNewWord[iWordLen-3]='\0';
2867                                 if(oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2868                                         bFound=true;
2869                                 else if (isupcase || g_ascii_isupper(sWord[0])) {
2870                                         casestr = g_ascii_strdown(sNewWord, -1);
2871                                         if (strcmp(casestr, sNewWord)) {
2872                                                 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2873                                                         bFound=true;
2874                                         }
2875                                         g_free(casestr);
2876                                 }
2877                         }
2878                 }
2879
2880                 g_free(sNewWord);
2881         }
2882
2883         if (bFound)
2884                 iWordIndex = iIndex;
2885 #if 0
2886         else {
2887                 //don't change iWordIndex here.
2888                 //when LookupSimilarWord all failed too, we want to use the old LookupWord index to list words.
2889                 //iWordIndex = INVALID_INDEX;
2890         }
2891 #endif
2892         return bFound;
2893 }
2894
2895 bool Libs::SimpleLookupWord(const gchar* sWord, glong & iWordIndex, glong &idx_suggest, size_t iLib, int servercollatefunc)
2896 {
2897         bool bFound = oLib[iLib]->Lookup(sWord, iWordIndex, idx_suggest, EnableCollationLevel, servercollatefunc);
2898         if (!bFound)
2899                 bFound = LookupSimilarWord(sWord, iWordIndex, idx_suggest, iLib, servercollatefunc);
2900         return bFound;
2901 }
2902
2903 bool Libs::SimpleLookupSynonymWord(const gchar* sWord, glong & iWordIndex, glong &synidx_suggest, size_t iLib, int servercollatefunc)
2904 {
2905         bool bFound = oLib[iLib]->LookupSynonym(sWord, iWordIndex, synidx_suggest, EnableCollationLevel, servercollatefunc);
2906         if (!bFound)
2907                 bFound = LookupSynonymSimilarWord(sWord, iWordIndex, synidx_suggest, iLib, servercollatefunc);
2908         return bFound;
2909 }
2910
2911 struct Fuzzystruct {
2912         char * pMatchWord;
2913         int iMatchWordDistance;
2914 };
2915
2916 static inline bool operator<(const Fuzzystruct & lh, const Fuzzystruct & rh) {
2917         if (lh.iMatchWordDistance!=rh.iMatchWordDistance)
2918                 return lh.iMatchWordDistance<rh.iMatchWordDistance;
2919
2920         if (lh.pMatchWord && rh.pMatchWord)
2921                 return stardict_strcmp(lh.pMatchWord, rh.pMatchWord)<0;
2922
2923         return false;
2924 }
2925
2926 static inline void unicode_strdown(gunichar *str)
2927 {
2928         while (*str) {
2929                 *str=g_unichar_tolower(*str);
2930                 ++str;
2931         }
2932 }
2933
2934 bool Libs::LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_size, std::vector<InstantDictIndex> &dictmask)
2935 {
2936         if (sWord[0] == '\0')
2937                 return false;
2938
2939         std::vector<Fuzzystruct> oFuzzystruct(reslist_size);
2940
2941         for (int i=0; i<reslist_size; i++) {
2942                 oFuzzystruct[i].pMatchWord = NULL;
2943                 oFuzzystruct[i].iMatchWordDistance = iMaxFuzzyDistance;
2944         }
2945         int iMaxDistance = iMaxFuzzyDistance;
2946         int iDistance;
2947         bool Found = false;
2948         EditDistance oEditDistance;
2949
2950         glong iCheckWordLen;
2951         const char *sCheck;
2952         gunichar *ucs4_str1, *ucs4_str2;
2953         glong ucs4_str2_len;
2954
2955         ucs4_str2 = g_utf8_to_ucs4_fast(sWord, -1, &ucs4_str2_len);
2956         unicode_strdown(ucs4_str2);
2957
2958         std::vector<Dict *>::size_type iRealLib;
2959         for (std::vector<InstantDictIndex>::size_type iLib=0; iLib<dictmask.size(); iLib++) {
2960                 if (dictmask[iLib].type != InstantDictType_LOCAL)
2961                         continue;
2962                 iRealLib = dictmask[iLib].index;
2963                 for (gint synLib=0; synLib<2; synLib++) {
2964                         if (synLib==1) {
2965                                 if (oLib[iRealLib]->syn_file.get()==NULL)
2966                                         break;
2967                         }
2968                         show_progress->notify_about_work();
2969
2970                         //if (stardict_strcmp(sWord, poGetWord(0,iRealLib))>=0 && stardict_strcmp(sWord, poGetWord(narticles(iRealLib)-1,iRealLib))<=0) {
2971                         //there are Chinese dicts and English dicts...
2972                         if (TRUE) {
2973                                 glong iwords;
2974                                 if (synLib==0)
2975                                         iwords = narticles(iRealLib);
2976                                 else
2977                                         iwords = nsynarticles(iRealLib);
2978                                 for (glong index=0; index<iwords; index++) {
2979                                         // Need to deal with same word in index? But this will slow down processing in most case.
2980                                         if (synLib==0)
2981                                                 sCheck = poGetOrigWord(index,iRealLib);
2982                                         else
2983                                                 sCheck = poGetOrigSynonymWord(index,iRealLib);
2984                                         // tolower and skip too long or too short words
2985                                         iCheckWordLen = g_utf8_strlen(sCheck, -1);
2986                                         if (iCheckWordLen-ucs4_str2_len>=iMaxDistance ||
2987                                             ucs4_str2_len-iCheckWordLen>=iMaxDistance)
2988                                                 continue;
2989                                         ucs4_str1 = g_utf8_to_ucs4_fast(sCheck, -1, NULL);
2990                                         if (iCheckWordLen > ucs4_str2_len)
2991                                                 ucs4_str1[ucs4_str2_len]=0;
2992                                         unicode_strdown(ucs4_str1);
2993
2994                                         iDistance = oEditDistance.CalEditDistance(ucs4_str1, ucs4_str2, iMaxDistance);
2995                                         g_free(ucs4_str1);
2996                                         if (iDistance<iMaxDistance && iDistance < ucs4_str2_len) {
2997                                                 // when ucs4_str2_len=1,2 we need less fuzzy.
2998                                                 Found = true;
2999                                                 bool bAlreadyInList = false;
3000                                                 int iMaxDistanceAt=0;
3001                                                 for (int j=0; j<reslist_size; j++) {
3002                                                         if (oFuzzystruct[j].pMatchWord &&
3003                                                             strcmp(oFuzzystruct[j].pMatchWord,sCheck)==0 ) {//already in list
3004                                                                 bAlreadyInList = true;
3005                                                                 break;
3006                                                         }
3007                                                         //find the position,it will certainly be found (include the first time) as iMaxDistance is set by last time.
3008                                                         if (oFuzzystruct[j].iMatchWordDistance == iMaxDistance ) {
3009                                                                 iMaxDistanceAt = j;
3010                                                         }
3011                                                 }
3012                                                 if (!bAlreadyInList) {
3013                                                         if (oFuzzystruct[iMaxDistanceAt].pMatchWord)
3014                                                                 g_free(oFuzzystruct[iMaxDistanceAt].pMatchWord);
3015                                                         oFuzzystruct[iMaxDistanceAt].pMatchWord = g_strdup(sCheck);
3016                                                         oFuzzystruct[iMaxDistanceAt].iMatchWordDistance = iDistance;
3017                                                         // calc new iMaxDistance
3018                                                         iMaxDistance = iDistance;
3019                                                         for (int j=0; j<reslist_size; j++) {
3020                                                                 if (oFuzzystruct[j].iMatchWordDistance > iMaxDistance)
3021                                                                         iMaxDistance = oFuzzystruct[j].iMatchWordDistance;
3022                                                         } // calc new iMaxDistance
3023                                                 }   // add to list
3024                                         }   // find one
3025                                 }   // each word
3026                         }   // ok for search
3027                 }  // synLib
3028         }   // each lib
3029         g_free(ucs4_str2);
3030
3031         if (Found)// sort with distance
3032                 std::sort(oFuzzystruct.begin(), oFuzzystruct.end());
3033
3034         for (gint i=0; i<reslist_size; ++i)
3035                 reslist[i]=oFuzzystruct[i].pMatchWord;
3036
3037         return Found;
3038 }
3039
3040 static inline bool less_for_compare(const char *lh, const char *rh) {
3041         return stardict_strcmp(lh, rh)<0;
3042 }
3043
3044 gint Libs::LookupWithRule(const gchar *word, gchar **ppMatchWord, std::vector<InstantDictIndex> &dictmask)
3045 {
3046         glong aiIndex[MAX_MATCH_ITEM_PER_LIB+1];
3047         gint iMatchCount = 0;
3048         GPatternSpec *pspec = g_pattern_spec_new(word);
3049
3050         const gchar * sMatchWord;
3051         bool bAlreadyInList;
3052         std::vector<Dict *>::size_type iRealLib;
3053         for (std::vector<InstantDictIndex>::size_type iLib=0; iLib<dictmask.size(); iLib++) {
3054                 //if(oLibs.LookdupWordsWithRule(pspec,aiIndex,MAX_MATCH_ITEM_PER_LIB+1-iMatchCount,iLib))
3055                 // -iMatchCount,so save time,but may got less result and the word may repeat.
3056                 if (dictmask[iLib].type != InstantDictType_LOCAL)
3057                         continue;
3058                 iRealLib = dictmask[iLib].index;
3059                 if (oLib[iRealLib]->LookupWithRule(pspec, aiIndex, MAX_MATCH_ITEM_PER_LIB+1)) {
3060                         show_progress->notify_about_work();
3061                         for (int i=0; aiIndex[i]!=-1; i++) {
3062                                 sMatchWord = poGetOrigWord(aiIndex[i],iRealLib);
3063                                 bAlreadyInList = false;
3064                                 for (int j=0; j<iMatchCount; j++) {
3065                                         if (strcmp(ppMatchWord[j],sMatchWord)==0) {//already in list
3066                                                 bAlreadyInList = true;
3067                                                 break;
3068                                         }
3069                                 }
3070                                 if (!bAlreadyInList)
3071                                         ppMatchWord[iMatchCount++] = g_strdup(sMatchWord);
3072                         }
3073                 }
3074                 if (oLib[iRealLib]->LookupWithRuleSynonym(pspec, aiIndex, MAX_MATCH_ITEM_PER_LIB+1)) {
3075                         show_progress->notify_about_work();
3076                         for (int i=0; aiIndex[i]!=-1; i++) {
3077                                 sMatchWord = poGetOrigSynonymWord(aiIndex[i],iRealLib);
3078                                 bAlreadyInList = false;
3079                                 for (int j=0; j<iMatchCount; j++) {
3080                                         if (strcmp(ppMatchWord[j],sMatchWord)==0) {//already in list
3081                                                 bAlreadyInList = true;
3082                                                 break;
3083                                         }
3084                                 }
3085                                 if (!bAlreadyInList)
3086                                         ppMatchWord[iMatchCount++] = g_strdup(sMatchWord);
3087                         }
3088                 }
3089         }
3090         g_pattern_spec_free(pspec);
3091
3092         if (iMatchCount)// sort it.
3093                 std::sort(ppMatchWord, ppMatchWord+iMatchCount, less_for_compare);
3094         return iMatchCount;
3095 }
3096
3097 gint Libs::LookupWithRegex(const gchar *word, gchar **ppMatchWord, std::vector<InstantDictIndex> &dictmask)
3098 {
3099         glong aiIndex[MAX_MATCH_ITEM_PER_LIB+1];
3100         gint iMatchCount = 0;
3101         GRegex *regex = g_regex_new(word, G_REGEX_OPTIMIZE, (GRegexMatchFlags)0, NULL);
3102
3103         const gchar * sMatchWord;
3104         bool bAlreadyInList;
3105         std::vector<Dict *>::size_type iRealLib;
3106         for (std::vector<InstantDictIndex>::size_type iLib=0; iLib<dictmask.size(); iLib++) {
3107                 //if(oLibs.LookdupWordsWithRule(pspec,aiIndex,MAX_MATCH_ITEM_PER_LIB+1-iMatchCount,iLib))
3108                 // -iMatchCount,so save time,but may got less result and the word may repeat.
3109                 if (dictmask[iLib].type != InstantDictType_LOCAL)
3110                         continue;
3111                 iRealLib = dictmask[iLib].index;
3112                 if (oLib[iRealLib]->LookupWithRegex(regex, aiIndex, MAX_MATCH_ITEM_PER_LIB+1)) {
3113                         show_progress->notify_about_work();
3114                         for (int i=0; aiIndex[i]!=-1; i++) {
3115                                 sMatchWord = poGetOrigWord(aiIndex[i],iRealLib);
3116                                 bAlreadyInList = false;
3117                                 for (int j=0; j<iMatchCount; j++) {
3118                                         if (strcmp(ppMatchWord[j],sMatchWord)==0) {//already in list
3119                                                 bAlreadyInList = true;
3120                                                 break;
3121                                         }
3122                                 }
3123                                 if (!bAlreadyInList)
3124                                         ppMatchWord[iMatchCount++] = g_strdup(sMatchWord);
3125                         }
3126                 }
3127                 if (oLib[iRealLib]->LookupWithRegexSynonym(regex, aiIndex, MAX_MATCH_ITEM_PER_LIB+1)) {
3128                         show_progress->notify_about_work();
3129                         for (int i=0; aiIndex[i]!=-1; i++) {
3130                                 sMatchWord = poGetOrigSynonymWord(aiIndex[i],iRealLib);
3131                                 bAlreadyInList = false;
3132                                 for (int j=0; j<iMatchCount; j++) {
3133                                         if (strcmp(ppMatchWord[j],sMatchWord)==0) {//already in list
3134                                                 bAlreadyInList = true;
3135                                                 break;
3136                                         }
3137                                 }
3138                                 if (!bAlreadyInList)
3139                                         ppMatchWord[iMatchCount++] = g_strdup(sMatchWord);
3140                         }
3141                 }
3142         }
3143         g_regex_unref(regex);
3144
3145         if (iMatchCount)// sort it.
3146                 std::sort(ppMatchWord, ppMatchWord+iMatchCount, less_for_compare);
3147         return iMatchCount;
3148 }
3149
3150 bool Libs::LookupData(const gchar *sWord, std::vector<gchar *> *reslist, updateSearchDialog_func search_func, gpointer search_data, bool *cancel, std::vector<InstantDictIndex> &dictmask)
3151 {
3152         std::vector<std::string> SearchWords;
3153         std::string SearchWord;
3154         const char *p=sWord;
3155         while (*p) {
3156                 if (*p=='\\') {
3157                         p++;
3158                         switch (*p) {
3159                         case ' ':
3160                                 SearchWord+=' ';
3161                                 break;
3162                         case '\\':
3163                                 SearchWord+='\\';
3164                                 break;
3165                         case 't':
3166                                 SearchWord+='\t';
3167                                 break;
3168                         case 'n':
3169                                 SearchWord+='\n';
3170                                 break;
3171                         default:
3172                                 SearchWord+=*p;
3173                         }
3174                 } else if (*p == ' ') {
3175                         if (!SearchWord.empty()) {
3176                                 SearchWords.push_back(SearchWord);
3177                                 SearchWord.clear();
3178                         }
3179                 } else {
3180                         SearchWord+=*p;
3181                 }
3182                 p++;
3183         }
3184         if (!SearchWord.empty()) {
3185                 SearchWords.push_back(SearchWord);
3186                 SearchWord.clear();
3187         }
3188         if (SearchWords.empty())
3189                 return false;
3190
3191         glong search_count=0;
3192         glong total_count=0;
3193         if (search_func) {
3194                 for (std::vector<InstantDictIndex>::size_type i=0; i<dictmask.size(); ++i) {
3195                         if (dictmask[i].type == InstantDictType_LOCAL)
3196                                 total_count += narticles(dictmask[i].index);
3197                 }
3198         }
3199
3200         guint32 max_size =0;
3201         gchar *origin_data = NULL;
3202         std::vector<InstantDictIndex>::size_type iRealLib;
3203         for (std::vector<InstantDictIndex>::size_type i=0; i<dictmask.size(); ++i) {
3204                 if (dictmask[i].type != InstantDictType_LOCAL)
3205                         continue;
3206                 iRealLib = dictmask[i].index;
3207                 if (!oLib[iRealLib]->containSearchData())
3208                         continue;
3209                 const gulong iwords = narticles(iRealLib);
3210                 const gchar *key;
3211                 guint32 offset, size;
3212                 for (gulong j=0; j<iwords; ++j) {
3213                         if (search_func) {
3214                                 if (*cancel)
3215                                         goto search_out;
3216                                 if (search_count % 10000 == 0) {
3217                                         search_func(search_data, (gdouble)search_count/(gdouble)total_count);
3218                                 }
3219                                 search_count++;
3220                         }
3221                         oLib[iRealLib]->get_key_and_data(j, &key, &offset, &size);
3222                         if (size>max_size) {
3223                                 origin_data = (gchar *)g_realloc(origin_data, size);
3224                                 max_size = size;
3225                         }
3226                         if (oLib[iRealLib]->SearchData(SearchWords, offset, size, origin_data)) {
3227                                 if (reslist[i].empty() || strcmp(reslist[i].back(), key))
3228                                         reslist[i].push_back(g_strdup(key));
3229                         }
3230                 }
3231         }
3232 search_out:
3233         g_free(origin_data);
3234         KMP_end();
3235
3236         std::vector<InstantDictIndex>::size_type i;
3237         for (i=0; i<dictmask.size(); ++i)
3238                 if (!reslist[i].empty())
3239                         break;
3240
3241         return i!=dictmask.size();
3242 }
3243
3244 int Libs::GetStorageType(size_t iLib)
3245 {
3246         if (oLib[iLib]->storage == NULL)
3247                 return -1;
3248         return oLib[iLib]->storage->is_file_or_db;
3249 }
3250
3251 const char *Libs::GetStorageFilePath(size_t iLib, const char *key)
3252 {
3253         if (oLib[iLib]->storage == NULL)
3254                 return NULL;
3255         return oLib[iLib]->storage->get_file_path(key);
3256 }
3257
3258 const char *Libs::GetStorageFileContent(size_t iLib, const char *key)
3259 {
3260         if (oLib[iLib]->storage == NULL)
3261                 return NULL;
3262         return oLib[iLib]->storage->get_file_content(key);
3263 }