2 * This file part of StarDict - A international dictionary for GNOME.
3 * http://stardict.sourceforge.net
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Library General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 * Implementation of class to work with standard StarDict's dictionaries
22 * lookup word, get articles and so on.
24 * Notice: read doc/DICTFILE_FORMAT for the dictionary
25 * file's format information!
31 #include <glib/gi18n.h>
32 #include <glib/gstdio.h>
37 #include "mapfile.hpp"
39 #include "stddict.hpp"
41 #include "getuint32.h"
43 static inline gint stardict_strcmp(const gchar *s1, const gchar *s2)
45 gint a=g_ascii_strcasecmp(s1, s2);
47 return strcmp(s1, s2);
52 static gint stardict_collate(const gchar *str1, const gchar *str2, CollateFunctions func)
54 gint x = utf8_collate(str1, str2, func);
56 return strcmp(str1, str2);
61 gint stardict_server_collate(const gchar *str1, const gchar *str2, int EnableCollationLevel, CollateFunctions func, int servercollatefunc)
63 if (EnableCollationLevel == 0)
64 return stardict_strcmp(str1, str2);
65 if (EnableCollationLevel == 1)
66 return stardict_collate(str1, str2, func);
67 if (servercollatefunc == 0)
68 return stardict_strcmp(str1, str2);
69 return stardict_collate(str1, str2, (CollateFunctions)(servercollatefunc-1));
72 gint stardict_casecmp(const gchar *s1, const gchar *s2, int EnableCollationLevel, CollateFunctions func, int servercollatefunc)
74 if (EnableCollationLevel == 0)
75 return g_ascii_strcasecmp(s1, s2);
76 if (EnableCollationLevel == 1)
77 return utf8_collate(s1, s2, func);
78 if (servercollatefunc == 0)
79 return g_ascii_strcasecmp(s1, s2);
80 return utf8_collate(s1, s2, (CollateFunctions)(servercollatefunc-1));
83 static inline gint prefix_match (const gchar *s1, const gchar *s2)
88 u1 = g_utf8_get_char(s1);
89 u2 = g_utf8_get_char(s2);
90 s1 = g_utf8_next_char(s1);
91 s2 = g_utf8_next_char(s2);
93 } while (u1 && g_unichar_tolower(u1) == g_unichar_tolower(u2));
97 static inline bool bIsVowel(gchar inputchar)
99 gchar ch = g_ascii_toupper(inputchar);
100 return( ch=='A' || ch=='E' || ch=='I' || ch=='O' || ch=='U' );
104 bool bIsPureEnglish(const gchar *str)
106 // i think this should work even when it is UTF8 string :).
107 for (int i=0; str[i]!=0; i++)
109 //if(str[i]<32 || str[i]>126) // tab equal 9,so this is not OK.
110 // Better use isascii() but not str[i]<0 while char is default unsigned in arm
111 if (!isascii(str[i]))
116 class offset_index : public index_file {
120 bool load(const std::string& url, gulong wc, gulong fsize,
121 bool CreateCacheFile, int EnableCollationLevel,
122 CollateFunctions _CollateFunction, show_progress_t *sp);
123 void get_data(glong idx);
124 const gchar *get_key_and_data(glong idx);
126 const gchar *get_key(glong idx);
127 bool lookup(const char *str, glong &idx, glong &idx_suggest);
129 static const gint ENTR_PER_PAGE=32;
135 gchar wordentry_buf[256+sizeof(guint32)*2]; // The length of "word_str" should be less than 256. See doc/DICTFILE_FORMAT.
139 void assign(glong i, const std::string& str) {
144 index_entry first, last, middle, real_last;
150 std::vector<gchar> page_data;
153 page_entry entries[ENTR_PER_PAGE];
156 void fill(gchar *data, gint nent, glong idx_);
158 gulong load_page(glong page_idx);
159 const gchar *read_first_on_page_key(glong page_idx);
160 const gchar *get_first_on_page_key(glong page_idx);
163 class wordlist_index : public index_file {
167 bool load(const std::string& url, gulong wc, gulong fsize,
168 bool CreateCacheFile, int EnableCollationLevel,
169 CollateFunctions _CollateFunction, show_progress_t *sp);
170 void get_data(glong idx);
171 const gchar *get_key_and_data(glong idx);
173 const gchar *get_key(glong idx);
174 bool lookup(const char *str, glong &idx, glong &idx_suggest);
177 std::vector<gchar *> wordlist;
180 offset_index::offset_index() : oft_file(CacheFileType_oft)
186 offset_index::~offset_index()
193 void offset_index::page_t::fill(gchar *data, gint nent, glong idx_)
198 for (gint i=0; i<nent; ++i) {
202 entries[i].off=g_ntohl(get_uint32(p));
204 entries[i].size=g_ntohl(get_uint32(p));
209 inline const gchar *offset_index::read_first_on_page_key(glong page_idx)
211 fseek(idxfile, oft_file.wordoffset[page_idx], SEEK_SET);
212 guint32 page_size=oft_file.wordoffset[page_idx+1]-oft_file.wordoffset[page_idx];
213 gulong minsize = sizeof(wordentry_buf);
214 if (page_size < minsize)
216 fread(wordentry_buf, minsize, 1, idxfile); //TODO: check returned values, deal with word entry that strlen>255.
217 return wordentry_buf;
220 inline const gchar *offset_index::get_first_on_page_key(glong page_idx)
222 if (page_idx<middle.idx) {
223 if (page_idx==first.idx)
224 return first.keystr.c_str();
225 return read_first_on_page_key(page_idx);
226 } else if (page_idx>middle.idx) {
227 if (page_idx==last.idx)
228 return last.keystr.c_str();
229 return read_first_on_page_key(page_idx);
231 return middle.keystr.c_str();
234 cache_file::cache_file(CacheFileType _cachefiletype)
238 cachefiletype = _cachefiletype;
242 cache_file::~cache_file()
250 #define OFFSETFILE_MAGIC_DATA "StarDict's oft file\nversion=2.4.8\n"
251 #define COLLATIONFILE_MAGIC_DATA "StarDict's clt file\nversion=2.4.8\n"
253 MapFile* cache_file::get_cache_loadfile(const gchar *filename, const std::string &url, const std::string &saveurl, CollateFunctions cltfunc, glong filedatasize, int next)
255 struct stat cachestat;
256 if (g_stat(filename, &cachestat)!=0)
258 MapFile *mf = new MapFile;
259 if (!mf->open(filename, cachestat.st_size)) {
264 gchar *p = mf->begin();
266 if (cachefiletype == CacheFileType_oft)
267 has_prefix = g_str_has_prefix(p, OFFSETFILE_MAGIC_DATA);
269 has_prefix = g_str_has_prefix(p, COLLATIONFILE_MAGIC_DATA);
274 if (cachefiletype == CacheFileType_oft)
275 p+= sizeof(OFFSETFILE_MAGIC_DATA)-1-1;
277 p+= sizeof(COLLATIONFILE_MAGIC_DATA)-1-1;
279 p2 = strstr(p, "\nurl=");
284 p2+=sizeof("\nurl=")-1;
286 p3 = strchr(p2, '\n');
292 tmpstr = (gchar *)g_memdup(p2, p3-p2+1);
293 tmpstr[p3-p2] = '\0';
294 if (saveurl == tmpstr) {
296 if (cachefiletype == CacheFileType_clt) {
297 p2 = strstr(p, "\nfunc=");
302 p2+=sizeof("\nfunc=")-1;
303 p3 = strchr(p2, '\n');
308 tmpstr = (gchar *)g_memdup(p2, p3-p2+1);
309 tmpstr[p3-p2] = '\0';
310 if (atoi(tmpstr)!=cltfunc) {
317 if (cachestat.st_size!=glong(filedatasize + strlen(mf->begin()) +1)) {
322 if (g_stat(url.c_str(), &idxstat)!=0) {
326 if (cachestat.st_mtime<idxstat.st_mtime) {
330 //g_print("Using map file: %s\n", filename);
335 gchar *basename = g_path_get_basename(saveurl.c_str());
336 p = strrchr(basename, '.');
342 gchar *extendname = p+1;
343 gchar *dirname = g_path_get_dirname(filename);
345 if (cachefiletype == CacheFileType_oft)
346 nextfilename = g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s(%d).%s.oft", dirname, basename, next, extendname);
347 else if (cachefiletype == CacheFileType_clt)
348 nextfilename = g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s(%d).%s.clt", dirname, basename, next, extendname);
350 nextfilename = g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s(%d).%s.%d.clt", dirname, basename, next, extendname, cltfunc);
351 MapFile *out = get_cache_loadfile(nextfilename, url, saveurl, cltfunc, filedatasize, next+1);
354 g_free(nextfilename);
358 bool cache_file::load_cache(const std::string& url, const std::string& saveurl, CollateFunctions cltfunc, glong filedatasize)
360 std::string oftfilename;
361 if (cachefiletype == CacheFileType_oft)
362 oftfilename=saveurl+".oft";
363 else if (cachefiletype == CacheFileType_clt)
364 oftfilename=saveurl+".clt";
366 gchar *func = g_strdup_printf("%d", cltfunc);
367 oftfilename=saveurl+'.'+func+".clt";
370 for (int i=0;i<2;i++) {
372 if (!get_cache_filename(saveurl, oftfilename, false, cltfunc))
375 mf = get_cache_loadfile(oftfilename.c_str(), url, saveurl, cltfunc, filedatasize, 2);
378 wordoffset = (guint32 *)(mf->begin()+strlen(mf->begin())+1);
384 bool cache_file::get_cache_filename(const std::string& url, std::string &cachefilename, bool create, CollateFunctions cltfunc)
387 if (!g_file_test(g_get_user_cache_dir(), G_FILE_TEST_EXISTS) &&
388 g_mkdir(g_get_user_cache_dir(), 0700)==-1)
392 std::string cache_dir=g_get_user_cache_dir();
393 cache_dir += G_DIR_SEPARATOR_S "stardict";
396 if (!g_file_test(cache_dir.c_str(), G_FILE_TEST_EXISTS)) {
397 if (g_mkdir(cache_dir.c_str(), 0700)==-1)
399 } else if (!g_file_test(cache_dir.c_str(), G_FILE_TEST_IS_DIR))
403 gchar *base=g_path_get_basename(url.c_str());
404 if (cachefiletype == CacheFileType_oft) {
405 cachefilename = cache_dir+G_DIR_SEPARATOR_S+base+".oft";
406 } else if (cachefiletype == CacheFileType_clt) {
407 cachefilename = cache_dir+G_DIR_SEPARATOR_S+base+".clt";
409 gchar *func = g_strdup_printf("%d", cltfunc);
410 cachefilename = cache_dir+G_DIR_SEPARATOR_S+base+'.'+func+".clt";
417 FILE* cache_file::get_cache_savefile(const gchar *filename, const std::string &url, int next, std::string &cfilename, CollateFunctions cltfunc)
419 cfilename = filename;
421 if (g_stat(filename, &oftstat)!=0) {
422 return fopen(filename, "wb");
425 if (!mf.open(filename, oftstat.st_size)) {
426 return fopen(filename, "wb");
428 gchar *p = mf.begin();
430 if (cachefiletype == CacheFileType_oft)
431 has_prefix = g_str_has_prefix(p, OFFSETFILE_MAGIC_DATA);
433 has_prefix = g_str_has_prefix(p, COLLATIONFILE_MAGIC_DATA);
436 return fopen(filename, "wb");
438 if (cachefiletype == CacheFileType_oft)
439 p+= sizeof(OFFSETFILE_MAGIC_DATA)-1-1;
441 p+= sizeof(COLLATIONFILE_MAGIC_DATA)-1-1;
443 p2 = strstr(p, "\nurl=");
446 return fopen(filename, "wb");
448 p2+=sizeof("\nurl=")-1;
450 p3 = strchr(p2, '\n');
453 return fopen(filename, "wb");
456 tmpstr = (gchar *)g_memdup(p2, p3-p2+1);
457 tmpstr[p3-p2] = '\0';
461 return fopen(filename, "wb");
465 gchar *basename = g_path_get_basename(url.c_str());
466 p = strrchr(basename, '.');
472 gchar *extendname = p+1;
473 gchar *dirname = g_path_get_dirname(filename);
475 if (cachefiletype == CacheFileType_oft)
476 nextfilename = g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s(%d).%s.oft", dirname, basename, next, extendname);
477 else if (cachefiletype == CacheFileType_clt)
478 nextfilename = g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s(%d).%s.clt", dirname, basename, next, extendname);
480 nextfilename = g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s(%d).%s.%d.clt", dirname, basename, next, extendname, cltfunc);
481 FILE *out = get_cache_savefile(nextfilename, url, next+1, cfilename, cltfunc);
484 g_free(nextfilename);
488 bool cache_file::save_cache(const std::string& url, CollateFunctions cltfunc, gulong npages)
490 std::string oftfilename;
491 if (cachefiletype == CacheFileType_oft) {
492 oftfilename=url+".oft";
493 } else if (cachefiletype == CacheFileType_clt) {
494 oftfilename=url+".clt";
496 gchar *func = g_strdup_printf("%d", cltfunc);
497 oftfilename=url+'.'+func+".clt";
500 for (int i=0;i<2;i++) {
502 if (!get_cache_filename(url, oftfilename, true, cltfunc))
505 std::string cfilename;
506 FILE *out= get_cache_savefile(oftfilename.c_str(), url, 2, cfilename, cltfunc);
509 if (cachefiletype == CacheFileType_oft)
510 fwrite(OFFSETFILE_MAGIC_DATA, 1, sizeof(OFFSETFILE_MAGIC_DATA)-1, out);
512 fwrite(COLLATIONFILE_MAGIC_DATA, 1, sizeof(COLLATIONFILE_MAGIC_DATA)-1, out);
513 fwrite("url=", 1, sizeof("url=")-1, out);
514 fwrite(url.c_str(), 1, url.length(), out);
515 if (cachefiletype == CacheFileType_clt) {
517 fprintf_s(out, "\nfunc=%d", cltfunc);
519 fprintf(out, "\nfunc=%d", cltfunc);
522 fwrite("\n", 1, 2, out);
523 fwrite(wordoffset, sizeof(guint32), npages, out);
525 g_print("Save cache file: %s\n", cfilename.c_str());
531 collation_file::collation_file(idxsyn_file *_idx_file, CacheFileType _cachefiletype) : cache_file(_cachefiletype)
533 idx_file = _idx_file;
536 const gchar *collation_file::GetWord(glong idx)
538 return idx_file->get_key(wordoffset[idx]);
541 glong collation_file::GetOrigIndex(glong cltidx)
543 return wordoffset[cltidx];
546 bool collation_file::lookup(const char *sWord, glong &idx, glong &idx_suggest)
549 glong iTo=idx_file->wordcount-1;
550 if (stardict_collate(sWord, GetWord(0), CollateFunction)<0) {
553 } else if (stardict_collate(sWord, GetWord(iTo), CollateFunction) >0) {
561 iThisIndex=(iFrom+iTo)/2;
562 cmpint = stardict_collate(sWord, GetWord(iThisIndex), CollateFunction);
576 best = prefix_match (sWord, GetWord(idx_suggest));
578 if ((iTo=idx_suggest-1) < 0)
580 back = prefix_match (sWord, GetWord(iTo));
581 if (!back || back < best)
588 idx_suggest = iThisIndex;
594 struct sort_collation_index_user_data {
595 idxsyn_file *idx_file;
596 CollateFunctions cltfunc;
599 static gint sort_collation_index(gconstpointer a, gconstpointer b, gpointer user_data)
601 sort_collation_index_user_data *data = (sort_collation_index_user_data*)user_data;
602 gchar *str1 = g_strdup(data->idx_file->get_key(*((guint32 *)a)));
603 const gchar *str2 = data->idx_file->get_key(*((guint32 *)b));
604 gint x = stardict_collate(str1, str2, data->cltfunc);
607 return *((guint32 *)a) - *((guint32 *)b);
612 idxsyn_file::idxsyn_file()
614 memset(clt_files, 0, sizeof(clt_files));
617 const gchar *idxsyn_file::getWord(glong idx, int EnableCollationLevel, int servercollatefunc)
619 if (EnableCollationLevel == 0)
621 if (EnableCollationLevel == 1)
622 return clt_file->GetWord(idx);
623 if (servercollatefunc == 0)
625 collate_load((CollateFunctions)(servercollatefunc-1));
626 return clt_files[servercollatefunc-1]->GetWord(idx);
629 bool idxsyn_file::Lookup(const char *str, glong &idx, glong &idx_suggest, int EnableCollationLevel, int servercollatefunc)
631 if (EnableCollationLevel == 0)
632 return lookup(str, idx, idx_suggest);
633 if (EnableCollationLevel == 1)
634 return clt_file->lookup(str, idx, idx_suggest);
635 if (servercollatefunc == 0)
636 return lookup(str, idx, idx_suggest);
637 collate_load((CollateFunctions)(servercollatefunc-1));
638 return clt_files[servercollatefunc-1]->lookup(str, idx, idx_suggest);
641 void idxsyn_file::collate_sort(const std::string& url,
642 const std::string& saveurl,
643 CollateFunctions collf,
646 clt_file = new collation_file(this, CacheFileType_clt);
647 clt_file->CollateFunction = collf;
648 if (!clt_file->load_cache(url, saveurl, collf, wordcount*sizeof(guint32))) {
649 sp->notify_about_start(_("Sorting, please wait..."));
650 clt_file->wordoffset = (guint32 *)g_malloc(wordcount*sizeof(guint32));
651 for (glong i=0; i<wordcount; i++)
652 clt_file->wordoffset[i] = i;
653 sort_collation_index_user_data data;
654 data.idx_file = this;
655 data.cltfunc = collf;
656 g_qsort_with_data(clt_file->wordoffset, wordcount, sizeof(guint32), sort_collation_index, &data);
657 if (!clt_file->save_cache(saveurl, collf, wordcount))
658 g_printerr("Cache update failed.\n");
662 void idxsyn_file::collate_save_info(const std::string& _url, const std::string& _saveurl)
668 void idxsyn_file::collate_load(CollateFunctions collf)
670 if (clt_files[collf])
672 clt_files[collf] = new collation_file(this, CacheFileType_server_clt);
673 clt_files[collf]->CollateFunction = collf;
674 if (!clt_files[collf]->load_cache(url, saveurl, collf, wordcount*sizeof(guint32))) {
675 clt_files[collf]->wordoffset = (guint32 *)g_malloc(wordcount*sizeof(guint32));
676 for (glong i=0; i<wordcount; i++)
677 clt_files[collf]->wordoffset[i] = i;
678 sort_collation_index_user_data data;
679 data.idx_file = this;
680 data.cltfunc = collf;
681 g_qsort_with_data(clt_files[collf]->wordoffset, wordcount, sizeof(guint32), sort_collation_index, &data);
682 if (!clt_files[collf]->save_cache(saveurl, collf, wordcount))
683 g_printerr("Cache update failed.\n");
687 bool offset_index::load(const std::string& url, gulong wc, gulong fsize,
688 bool CreateCacheFile, int EnableCollationLevel,
689 CollateFunctions _CollateFunction, show_progress_t *sp)
692 npages=(wc-1)/ENTR_PER_PAGE+2;
693 if (!oft_file.load_cache(url, url, _CollateFunction, npages*sizeof(guint32))) {
695 if (!map_file.open(url.c_str(), fsize))
697 const gchar *idxdatabuffer=map_file.begin();
698 oft_file.wordoffset = (guint32 *)g_malloc(npages*sizeof(guint32));
699 const gchar *p1 = idxdatabuffer;
702 for (guint32 i=0; i<wc; i++) {
703 index_size=strlen(p1) +1 + 2*sizeof(guint32);
704 if (i % ENTR_PER_PAGE==0) {
705 oft_file.wordoffset[j]=p1-idxdatabuffer;
710 oft_file.wordoffset[j]=p1-idxdatabuffer;
712 if (CreateCacheFile) {
713 if (!oft_file.save_cache(url, _CollateFunction, npages))
714 g_printerr("Cache update failed.\n");
718 if (!(idxfile = fopen(url.c_str(), "rb"))) {
722 first.assign(0, read_first_on_page_key(0));
723 last.assign(npages-2, read_first_on_page_key(npages-2));
724 middle.assign((npages-2)/2, read_first_on_page_key((npages-2)/2));
725 real_last.assign(wc-1, get_key(wc-1));
727 if (EnableCollationLevel == 0) {
728 } else if (EnableCollationLevel == 1) {
729 collate_sort(url, url, _CollateFunction, sp);
730 } else if (EnableCollationLevel == 2) {
731 collate_save_info(url, url);
737 inline gulong offset_index::load_page(glong page_idx)
739 gulong nentr=ENTR_PER_PAGE;
740 if (page_idx==glong(npages-2))
741 if ((nentr=wordcount%ENTR_PER_PAGE)==0)
745 if (page_idx!=page.idx) {
746 page_data.resize(oft_file.wordoffset[page_idx+1]-oft_file.wordoffset[page_idx]);
747 fseek(idxfile, oft_file.wordoffset[page_idx], SEEK_SET);
748 fread(&page_data[0], 1, page_data.size(), idxfile);
749 page.fill(&page_data[0], nentr, page_idx);
755 const gchar *offset_index::get_key(glong idx)
757 load_page(idx/ENTR_PER_PAGE);
758 glong idx_in_page=idx%ENTR_PER_PAGE;
759 wordentry_offset=page.entries[idx_in_page].off;
760 wordentry_size=page.entries[idx_in_page].size;
762 return page.entries[idx_in_page].keystr;
765 void offset_index::get_data(glong idx)
770 const gchar *offset_index::get_key_and_data(glong idx)
775 bool offset_index::lookup(const char *str, glong &idx, glong &idx_suggest)
782 if (stardict_strcmp(str, first.keystr.c_str())<0) {
786 } else if (stardict_strcmp(str, real_last.keystr.c_str()) >0) {
794 iThisIndex=(iFrom+iTo)/2;
795 cmpint = stardict_strcmp(str, get_first_on_page_key(iThisIndex));
812 gulong netr=load_page(idx);
813 iFrom=1; // Needn't search the first word anymore.
817 iThisIndex=(iFrom+iTo)/2;
818 cmpint = stardict_strcmp(str, page.entries[iThisIndex].keystr);
833 best = prefix_match (str, page.entries[idx_suggest % ENTR_PER_PAGE].keystr);
835 if ((iTo=idx_suggest-1) < 0)
837 if (idx_suggest % ENTR_PER_PAGE == 0)
838 load_page(iTo / ENTR_PER_PAGE);
839 back = prefix_match (str, page.entries[iTo % ENTR_PER_PAGE].keystr);
840 if (!back || back < best)
856 wordlist_index::wordlist_index()
862 wordlist_index::~wordlist_index()
868 bool wordlist_index::load(const std::string& url, gulong wc, gulong fsize,
869 bool CreateCacheFile, int EnableCollationLevel,
870 CollateFunctions _CollateFunction, show_progress_t *sp)
873 gzFile in = gzopen(url.c_str(), "rb");
877 idxdatabuf = (gchar *)g_malloc(fsize);
879 gulong len = gzread(in, idxdatabuf, fsize);
887 wordlist.resize(wc+1);
888 gchar *p1 = idxdatabuf;
890 for (i=0; i<wc; i++) {
892 p1 += strlen(p1) +1 + 2*sizeof(guint32);
896 if (EnableCollationLevel == 0) {
898 std::string saveurl = url;
899 saveurl.erase(saveurl.length()-sizeof(".gz")+1, sizeof(".gz")-1);
900 if (EnableCollationLevel == 1) {
901 collate_sort(url, saveurl, _CollateFunction, sp);
902 } else if (EnableCollationLevel == 2) {
903 collate_save_info(url, saveurl);
909 const gchar *wordlist_index::get_key(glong idx)
911 return wordlist[idx];
914 void wordlist_index::get_data(glong idx)
916 gchar *p1 = wordlist[idx]+strlen(wordlist[idx])+sizeof(gchar);
917 wordentry_offset = g_ntohl(get_uint32(p1));
918 p1 += sizeof(guint32);
919 wordentry_size = g_ntohl(get_uint32(p1));
922 const gchar *wordlist_index::get_key_and_data(glong idx)
928 bool wordlist_index::lookup(const char *str, glong &idx, glong &idx_suggest)
931 glong iTo=wordlist.size()-2;
933 if (stardict_strcmp(str, get_key(0))<0) {
936 } else if (stardict_strcmp(str, get_key(iTo)) >0) {
944 iThisIndex=(iFrom+iTo)/2;
945 cmpint = stardict_strcmp(str, get_key(iThisIndex));
959 best = prefix_match (str, get_key(idx_suggest));
961 if ((iTo=idx_suggest-1) < 0)
963 back = prefix_match (str, get_key(iTo));
964 if (!back || back < best)
971 idx_suggest = iThisIndex;
977 //===================================================================
978 void synonym_file::page_t::fill(gchar *data, gint nent, glong idx_)
983 for (gint i=0; i<nent; ++i) {
987 entries[i].index=g_ntohl(get_uint32(p));
992 synonym_file::synonym_file() : oft_file(CacheFileType_oft)
997 synonym_file::~synonym_file()
1004 inline const gchar *synonym_file::read_first_on_page_key(glong page_idx)
1006 fseek(synfile, oft_file.wordoffset[page_idx], SEEK_SET);
1007 guint32 page_size=oft_file.wordoffset[page_idx+1]-oft_file.wordoffset[page_idx];
1008 gulong minsize = sizeof(wordentry_buf);
1009 if (page_size < minsize)
1010 minsize = page_size;
1011 fread(wordentry_buf, minsize, 1, synfile); //TODO: check returned values, deal with word entry that strlen>255.
1012 return wordentry_buf;
1015 inline const gchar *synonym_file::get_first_on_page_key(glong page_idx)
1017 if (page_idx<middle.idx) {
1018 if (page_idx==first.idx)
1019 return first.keystr.c_str();
1020 return read_first_on_page_key(page_idx);
1021 } else if (page_idx>middle.idx) {
1022 if (page_idx==last.idx)
1023 return last.keystr.c_str();
1024 return read_first_on_page_key(page_idx);
1026 return middle.keystr.c_str();
1029 bool synonym_file::load(const std::string& url, gulong wc, bool CreateCacheFile,
1030 int EnableCollationLevel, CollateFunctions _CollateFunction,
1031 show_progress_t *sp)
1034 npages=(wc-1)/ENTR_PER_PAGE+2;
1035 if (!oft_file.load_cache(url, url, _CollateFunction, npages*sizeof(guint32))) {
1037 if (stat (url.c_str(), &stats) == -1)
1040 if (!map_file.open(url.c_str(), stats.st_size))
1042 const gchar *syndatabuffer=map_file.begin();
1043 oft_file.wordoffset = (guint32 *)g_malloc(npages*sizeof(guint32));
1044 const gchar *p1 = syndatabuffer;
1047 for (guint32 i=0; i<wc; i++) {
1048 index_size=strlen(p1) +1 + sizeof(guint32);
1049 if (i % ENTR_PER_PAGE==0) {
1050 oft_file.wordoffset[j]=p1-syndatabuffer;
1055 oft_file.wordoffset[j]=p1-syndatabuffer;
1057 if (CreateCacheFile) {
1058 if (!oft_file.save_cache(url, _CollateFunction, npages))
1059 g_printerr("Cache update failed.\n");
1063 if (!(synfile = fopen(url.c_str(), "rb"))) {
1067 first.assign(0, read_first_on_page_key(0));
1068 last.assign(npages-2, read_first_on_page_key(npages-2));
1069 middle.assign((npages-2)/2, read_first_on_page_key((npages-2)/2));
1070 real_last.assign(wc-1, get_key(wc-1));
1072 if (EnableCollationLevel == 0) {
1073 } else if (EnableCollationLevel == 1)
1074 collate_sort(url, url, _CollateFunction, sp);
1075 else if (EnableCollationLevel == 2) {
1076 collate_save_info(url, url);
1082 inline gulong synonym_file::load_page(glong page_idx)
1084 gulong nentr=ENTR_PER_PAGE;
1085 if (page_idx==glong(npages-2))
1086 if ((nentr=wordcount%ENTR_PER_PAGE)==0)
1087 nentr=ENTR_PER_PAGE;
1090 if (page_idx!=page.idx) {
1091 page_data.resize(oft_file.wordoffset[page_idx+1]-oft_file.wordoffset[page_idx]);
1092 fseek(synfile, oft_file.wordoffset[page_idx], SEEK_SET);
1093 fread(&page_data[0], 1, page_data.size(), synfile);
1094 page.fill(&page_data[0], nentr, page_idx);
1100 const gchar *synonym_file::get_key(glong idx)
1102 load_page(idx/ENTR_PER_PAGE);
1103 glong idx_in_page=idx%ENTR_PER_PAGE;
1104 wordentry_index=page.entries[idx_in_page].index;
1106 return page.entries[idx_in_page].keystr;
1109 bool synonym_file::lookup(const char *str, glong &idx, glong &idx_suggest)
1116 if (stardict_strcmp(str, first.keystr.c_str())<0) {
1120 } else if (stardict_strcmp(str, real_last.keystr.c_str()) >0) {
1121 idx = INVALID_INDEX;
1127 while (iFrom<=iTo) {
1128 iThisIndex=(iFrom+iTo)/2;
1129 cmpint = stardict_strcmp(str, get_first_on_page_key(iThisIndex));
1145 gulong netr=load_page(idx);
1146 iFrom=1; // Needn't search the first word anymore.
1149 while (iFrom<=iTo) {
1150 iThisIndex=(iFrom+iTo)/2;
1151 cmpint = stardict_strcmp(str, page.entries[iThisIndex].keystr);
1163 idx += iFrom; //next
1166 best = prefix_match (str, page.entries[idx_suggest % ENTR_PER_PAGE].keystr);
1168 if ((iTo=idx_suggest-1) < 0)
1170 if (idx_suggest % ENTR_PER_PAGE == 0)
1171 load_page(iTo / ENTR_PER_PAGE);
1172 back = prefix_match (str, page.entries[iTo % ENTR_PER_PAGE].keystr);
1173 if (!back || back < best)
1189 //===================================================================
1200 bool Dict::load(const std::string& ifofilename, bool CreateCacheFile,
1201 int EnableCollationLevel, CollateFunctions CollateFunction,
1202 show_progress_t *sp)
1205 glong wordcount, synwordcount;
1206 if (!load_ifofile(ifofilename, idxfilesize, wordcount, synwordcount))
1208 sp->notify_about_start(_("Loading..."));
1209 std::string fullfilename(ifofilename);
1210 fullfilename.replace(fullfilename.length()-sizeof("ifo")+1, sizeof("ifo")-1, "dict.dz");
1212 if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) {
1213 dictdzfile.reset(new dictData);
1214 if (!dictdzfile->open(fullfilename, 0)) {
1215 //g_print("open file %s failed!\n",fullfilename);
1219 fullfilename.erase(fullfilename.length()-sizeof(".dz")+1, sizeof(".dz")-1);
1220 dictfile = fopen(fullfilename.c_str(),"rb");
1222 //g_print("open file %s failed!\n",fullfilename);
1227 fullfilename=ifofilename;
1228 fullfilename.replace(fullfilename.length()-sizeof("ifo")+1, sizeof("ifo")-1, "idx.gz");
1230 if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) {
1231 idx_file.reset(new wordlist_index);
1233 fullfilename.erase(fullfilename.length()-sizeof(".gz")+1, sizeof(".gz")-1);
1234 idx_file.reset(new offset_index);
1237 if (!idx_file->load(fullfilename, wordcount, idxfilesize,
1238 CreateCacheFile, EnableCollationLevel,
1239 CollateFunction, sp))
1243 fullfilename=ifofilename;
1244 fullfilename.replace(fullfilename.length()-sizeof("ifo")+1, sizeof("ifo")-1, "syn");
1245 if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) {
1246 syn_file.reset(new synonym_file);
1247 if (!syn_file->load(fullfilename, synwordcount,
1248 CreateCacheFile, EnableCollationLevel,
1249 CollateFunction, sp))
1254 bool has_res = false;
1255 gchar *dirname = g_path_get_dirname(ifofilename.c_str());
1256 fullfilename = dirname;
1257 fullfilename += G_DIR_SEPARATOR_S "res";
1258 if (g_file_test(fullfilename.c_str(), G_FILE_TEST_IS_DIR)) {
1261 fullfilename = dirname;
1262 fullfilename += G_DIR_SEPARATOR_S "res.rifo";
1263 if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) {
1268 storage = new ResourceStorage();
1269 bool failed = storage->load(dirname);
1277 g_print("bookname: %s , wordcount %lu\n", bookname.c_str(), wordcount);
1281 bool Dict::load_ifofile(const std::string& ifofilename, gulong &idxfilesize, glong &wordcount, glong &synwordcount)
1284 if (!dict_info.load_from_ifo_file(ifofilename, false))
1286 if (dict_info.wordcount==0)
1289 ifo_file_name=dict_info.ifo_file_name;
1290 bookname=dict_info.bookname;
1292 idxfilesize=dict_info.index_file_size;
1293 wordcount=dict_info.wordcount;
1294 synwordcount=dict_info.synwordcount;
1296 sametypesequence=dict_info.sametypesequence;
1297 dicttype=dict_info.dicttype;
1302 glong Dict::nsynarticles()
1304 if (syn_file.get() == NULL)
1306 return syn_file->wordcount;
1309 bool Dict::GetWordPrev(glong idx, glong &pidx, bool isidx, int EnableCollationLevel, int servercollatefunc)
1311 idxsyn_file *is_file;
1313 is_file = idx_file.get();
1315 is_file = syn_file.get();
1316 if (idx==INVALID_INDEX) {
1317 pidx = is_file->wordcount-1;
1321 gchar *cWord = g_strdup(is_file->getWord(pidx, EnableCollationLevel, servercollatefunc));
1325 pWord = is_file->getWord(pidx-1, EnableCollationLevel, servercollatefunc);
1326 if (strcmp(pWord, cWord)!=0) {
1341 void Dict::GetWordNext(glong &idx, bool isidx, int EnableCollationLevel, int servercollatefunc)
1343 idxsyn_file *is_file;
1345 is_file = idx_file.get();
1347 is_file = syn_file.get();
1348 gchar *cWord = g_strdup(is_file->getWord(idx, EnableCollationLevel, servercollatefunc));
1351 while (idx < is_file->wordcount-1) {
1352 pWord = is_file->getWord(idx+1, EnableCollationLevel, servercollatefunc);
1353 if (strcmp(pWord, cWord)!=0) {
1366 gint Dict::GetOrigWordCount(glong& idx, bool isidx)
1368 idxsyn_file *is_file;
1370 is_file = idx_file.get();
1372 is_file = syn_file.get();
1373 gchar *cWord = g_strdup(is_file->get_key(idx));
1378 pWord = is_file->get_key(idx1-1);
1379 if (strcmp(pWord, cWord)!=0)
1385 while (idx2<is_file->wordcount-1) {
1386 pWord = is_file->get_key(idx2+1);
1387 if (strcmp(pWord, cWord)!=0)
1397 bool Dict::LookupSynonym(const char *str, glong &synidx, glong &synidx_suggest, int EnableCollationLevel, int servercollatefunc)
1399 if (syn_file.get() == NULL) {
1400 synidx = UNSET_INDEX;
1401 synidx_suggest = UNSET_INDEX;
1404 return syn_file->Lookup(str, synidx, synidx_suggest, EnableCollationLevel, servercollatefunc);
1407 bool Dict::LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen)
1410 for (glong i=0; i<narticles() && iIndexCount<iBuffLen-1; i++)
1411 // Need to deal with same word in index? But this will slow down processing in most case.
1412 if (g_pattern_match_string(pspec, idx_file->getWord(i, 0, 0)))
1413 aIndex[iIndexCount++]=i;
1414 aIndex[iIndexCount]= -1; // -1 is the end.
1415 return (iIndexCount>0);
1418 bool Dict::LookupWithRuleSynonym(GPatternSpec *pspec, glong *aIndex, int iBuffLen)
1420 if (syn_file.get() == NULL)
1423 for (glong i=0; i<nsynarticles() && iIndexCount<iBuffLen-1; i++)
1424 // Need to deal with same word in index? But this will slow down processing in most case.
1425 if (g_pattern_match_string(pspec, syn_file->getWord(i, 0, 0)))
1426 aIndex[iIndexCount++]=i;
1427 aIndex[iIndexCount]= -1; // -1 is the end.
1428 return (iIndexCount>0);
1431 bool Dict::LookupWithRegex(GRegex *regex, glong *aIndex, int iBuffLen)
1434 for (glong i=0; i<narticles() && iIndexCount<iBuffLen-1; i++)
1435 // Need to deal with same word in index? But this will slow down processing in most case.
1436 if (g_regex_match(regex, idx_file->getWord(i, 0, 0), (GRegexMatchFlags)0, NULL))
1437 aIndex[iIndexCount++]=i;
1438 aIndex[iIndexCount]= -1; // -1 is the end.
1439 return (iIndexCount>0);
1442 bool Dict::LookupWithRegexSynonym(GRegex *regex, glong *aIndex, int iBuffLen)
1444 if (syn_file.get() == NULL)
1447 for (glong i=0; i<nsynarticles() && iIndexCount<iBuffLen-1; i++)
1448 // Need to deal with same word in index? But this will slow down processing in most case.
1449 if (g_regex_match(regex, syn_file->getWord(i, 0, 0), (GRegexMatchFlags)0, NULL))
1450 aIndex[iIndexCount++]=i;
1451 aIndex[iIndexCount]= -1; // -1 is the end.
1452 return (iIndexCount>0);
1455 //===================================================================
1456 show_progress_t Libs::default_show_progress;
1458 Libs::Libs(show_progress_t *sp, bool create, int enablelevel, int function)
1460 #ifdef SD_SERVER_CODE
1461 root_info_item = NULL;
1463 set_show_progress(sp);
1464 CreateCacheFile = create;
1465 EnableCollationLevel = enablelevel;
1466 CollateFunction = (CollateFunctions)function;
1467 iMaxFuzzyDistance = MAX_FUZZY_DISTANCE; //need to read from cfg.
1468 if (EnableCollationLevel == 0) {
1469 } else if (EnableCollationLevel == 1) {
1470 if (utf8_collate_init(CollateFunction))
1471 printf("Init collate function failed!\n");
1472 } else if (EnableCollationLevel == 2){
1473 if (utf8_collate_init_all())
1474 printf("Init collate functions failed!\n");
1480 #ifdef SD_SERVER_CODE
1482 delete root_info_item;
1484 for (std::vector<Dict *>::iterator p=oLib.begin(); p!=oLib.end(); ++p)
1489 bool Libs::load_dict(const std::string& url, show_progress_t *sp)
1492 if (lib->load(url, CreateCacheFile, EnableCollationLevel,
1493 CollateFunction, sp)) {
1494 oLib.push_back(lib);
1502 #ifdef SD_SERVER_CODE
1503 void Libs::LoadFromXML()
1505 root_info_item = new DictInfoItem();
1506 root_info_item->isdir = 1;
1507 root_info_item->dir = new DictInfoDirItem();
1508 root_info_item->dir->name='/';
1509 LoadXMLDir("/usr/share/stardict/dic", root_info_item);
1510 GenLinkDict(root_info_item);
1513 void Libs::GenLinkDict(DictInfoItem *info_item)
1515 std::list<std::list<DictInfoItem *>::iterator> eraselist;
1516 for (std::list<DictInfoItem *>::iterator i = info_item->dir->info_item_list.begin(); i!= info_item->dir->info_item_list.end(); ++i) {
1517 if ((*i)->isdir == 1) {
1519 } else if ((*i)->isdir == 2) {
1520 std::map<std::string, DictInfoDictItem *>::iterator uid_iter;
1521 uid_iter = uidmap.find(*((*i)->linkuid));
1522 if (uid_iter!=uidmap.end()) {
1523 delete (*i)->linkuid;
1524 (*i)->dict = uid_iter->second;
1526 g_print("Error, linkdict uid not found! %s\n", (*i)->linkuid->c_str());
1527 delete (*i)->linkuid;
1528 eraselist.push_back(i);
1532 for (std::list<std::list<DictInfoItem *>::iterator>::iterator i = eraselist.begin(); i!= eraselist.end(); ++i) {
1533 info_item->dir->info_item_list.erase(*i);
1537 void Libs::func_parse_start_element(GMarkupParseContext *context, const gchar *element_name, const gchar **attribute_names, const gchar **attribute_values, gpointer user_data, GError **error)
1539 if (strcmp(element_name, "dict")==0) {
1540 ParseUserData *Data = (ParseUserData *)user_data;
1541 Data->indict = true;
1544 Data->level.clear();
1545 Data->download.clear();
1548 } else if (strcmp(element_name, "linkdict")==0) {
1549 ParseUserData *Data = (ParseUserData *)user_data;
1550 Data->inlinkdict = true;
1551 Data->linkuid.clear();
1555 void Libs::func_parse_end_element(GMarkupParseContext *context, const gchar *element_name, gpointer user_data, GError **error)
1557 if (strcmp(element_name, "dict")==0) {
1558 ParseUserData *Data = (ParseUserData *)user_data;
1559 Data->indict = false;
1560 if (!Data->path.empty() && !Data->uid.empty()) {
1563 url += G_DIR_SEPARATOR;
1565 if (Data->oLibs->load_dict(url, Data->oLibs->show_progress)) {
1566 DictInfoItem *sub_info_item = new DictInfoItem();
1567 sub_info_item->isdir = 0;
1568 sub_info_item->dict = new DictInfoDictItem();
1569 sub_info_item->dict->uid = Data->uid;
1570 sub_info_item->dict->download = Data->download;
1571 sub_info_item->dict->from = Data->from;
1572 sub_info_item->dict->to = Data->to;
1573 if (Data->level.empty())
1574 sub_info_item->dict->level = 0;
1576 sub_info_item->dict->level = atoi(Data->level.c_str());
1577 sub_info_item->dict->id = Data->oLibs->oLib.size()-1;
1578 Data->info_item->dir->info_item_list.push_back(sub_info_item);
1579 Data->oLibs->uidmap[Data->uid] = sub_info_item->dict;
1582 } else if (strcmp(element_name, "linkdict")==0) {
1583 ParseUserData *Data = (ParseUserData *)user_data;
1584 Data->inlinkdict = false;
1585 if (!Data->linkuid.empty()) {
1586 DictInfoItem *sub_info_item = new DictInfoItem();
1587 sub_info_item->isdir = 2;
1588 sub_info_item->linkuid = new std::string(Data->linkuid);
1589 Data->info_item->dir->info_item_list.push_back(sub_info_item);
1594 void Libs::func_parse_text(GMarkupParseContext *context, const gchar *text, gsize text_len, gpointer user_data, GError **error)
1596 const gchar *element = g_markup_parse_context_get_element(context);
1599 ParseUserData *Data = (ParseUserData *)user_data;
1600 if (strcmp(element, "subdir")==0) {
1603 subdir += G_DIR_SEPARATOR;
1604 subdir.append(text, text_len);
1605 DictInfoItem *sub_info_item = new DictInfoItem();
1606 sub_info_item->isdir = 1;
1607 sub_info_item->dir = new DictInfoDirItem();
1608 sub_info_item->dir->name.assign(text, text_len);
1609 Data->oLibs->LoadXMLDir(subdir.c_str(), sub_info_item);
1610 Data->info_item->dir->info_item_list.push_back(sub_info_item);
1611 } else if (strcmp(element, "dirname")==0) {
1612 Data->info_item->dir->dirname.assign(text, text_len);
1613 } else if (strcmp(element, "path")==0) {
1614 Data->path.assign(text, text_len);
1615 } else if (strcmp(element, "uid")==0) {
1617 std::string uid(text, text_len);
1618 if (uid.find_first_of(' ')!=std::string::npos) {
1619 g_print("Error: uid contains space! %s: %s\n", Data->dir, uid.c_str());
1621 std::map<std::string, DictInfoDictItem *>::iterator uid_iter;
1622 uid_iter = Data->oLibs->uidmap.find(uid);
1623 if (uid_iter!=Data->oLibs->uidmap.end()) {
1624 g_print("Error: uid duplicated! %s: %s\n", Data->dir, uid.c_str());
1629 } else if (Data->inlinkdict) {
1630 Data->linkuid.assign(text, text_len);
1632 } else if (strcmp(element, "level")==0) {
1633 Data->level.assign(text, text_len);
1634 } else if (strcmp(element, "download")==0) {
1635 Data->download.assign(text, text_len);
1636 } else if (strcmp(element, "from")==0) {
1637 Data->from.assign(text, text_len);
1638 } else if (strcmp(element, "to")==0) {
1639 Data->to.assign(text, text_len);
1643 void Libs::LoadXMLDir(const char *dir, DictInfoItem *info_item)
1645 std::string filename;
1647 filename += G_DIR_SEPARATOR_S "stardictd.xml";
1648 struct stat filestat;
1649 if (g_stat(filename.c_str(), &filestat)!=0)
1652 if (!mf.open(filename.c_str(), filestat.st_size))
1657 Data.info_item = info_item;
1658 Data.indict = false;
1659 Data.inlinkdict = false;
1660 GMarkupParser parser;
1661 parser.start_element = func_parse_start_element;
1662 parser.end_element = func_parse_end_element;
1663 parser.text = func_parse_text;
1664 parser.passthrough = NULL;
1665 parser.error = NULL;
1666 GMarkupParseContext* context = g_markup_parse_context_new(&parser, (GMarkupParseFlags)0, &Data, NULL);
1667 g_markup_parse_context_parse(context, mf.begin(), filestat.st_size, NULL);
1668 g_markup_parse_context_end_parse(context, NULL);
1669 g_markup_parse_context_free(context);
1671 info_item->dir->dictcount = 0;
1672 for (std::list<DictInfoItem *>::iterator i = info_item->dir->info_item_list.begin(); i!= info_item->dir->info_item_list.end(); ++i) {
1673 if ((*i)->isdir == 1) {
1674 info_item->dir->dictcount += (*i)->dir->dictcount;
1675 } else if ((*i)->isdir == 0) {
1676 info_item->dir->dictcount++;
1681 const std::string &Libs::get_fromto_info() {
1682 if(cache_fromto.empty()){
1683 std::map<std::string, std::list<FromTo> > map_fromto;
1684 gen_fromto_info(root_info_item, map_fromto);
1685 cache_fromto+="<lang>";
1686 for (std::map<std::string, std::list<FromTo> >::iterator map_it = map_fromto.begin(); map_it != map_fromto.end(); ++map_it){
1687 cache_fromto+="<from lang=\"";
1688 cache_fromto+=map_it->first;
1689 cache_fromto+="\">";
1690 std::list<FromTo> &fromTo = map_it->second;
1691 for (std::list<FromTo>::iterator i = fromTo.begin() ; i!= fromTo.end(); ++i){
1692 cache_fromto+="<to lang=\"";
1693 cache_fromto+= i->to;
1694 cache_fromto+="\">";
1695 std::list<FromToInfo> &fromtoinfo = i->fromto_info;
1696 for (std::list<FromToInfo>::iterator j = fromtoinfo.begin() ; j!= fromtoinfo.end(); ++j){
1697 cache_fromto+="<dict><uid>";
1698 cache_fromto+=j->uid;
1699 cache_fromto+="</uid><bookname>";
1700 cache_fromto+= j->bookname;
1701 cache_fromto+="</bookname></dict>";
1703 cache_fromto+="</to>";
1705 cache_fromto+="</from>";
1707 cache_fromto+="</lang>";
1709 return cache_fromto;
1712 void Libs::gen_fromto_info(struct DictInfoItem *info_item, std::map<std::string, std::list<FromTo> > &map_fromto) {
1714 for(std::list<DictInfoItem *>::iterator i = info_item->dir->info_item_list.begin() ; i!= info_item->dir->info_item_list.end(); ++i){
1715 if ((*i)->isdir == 1) {
1716 gen_fromto_info((*i), map_fromto);
1718 std::string from_str = (*i)->dict->from;
1719 std::string to_str = (*i)->dict->to;
1720 if(from_str.empty() || to_str.empty()){
1723 std::string uid_str = (*i)->dict->uid;
1724 etext = g_markup_escape_text(oLib[(*i)->dict->id]->dict_name().c_str(), -1);
1725 std::string bookname_str = etext;
1727 std::map<std::string, std::list<FromTo> >::iterator fromto1 = map_fromto.find(from_str);
1728 if (fromto1==map_fromto.end()) {
1729 //if an from_str element not already in map, add new from_str to map
1730 FromToInfo fromtoinfo;
1731 fromtoinfo.uid = uid_str;
1732 fromtoinfo.bookname = bookname_str;
1733 std::list<FromToInfo> list_fromtoinfo ;
1734 list_fromtoinfo.push_back(fromtoinfo);
1736 new_fromTo.to = to_str;
1737 new_fromTo.fromto_info = list_fromtoinfo;
1738 std::list<FromTo> list_fromTo;
1739 list_fromTo.push_back(new_fromTo);
1740 map_fromto[from_str] = list_fromTo;
1742 // else if from_str already in map, so comparison to_str and from_to1 , then choose insert.
1743 std::list<FromTo> &fromTo_list = fromto1->second;
1744 std::string from_name1 = fromto1->first;
1746 for (std::list<FromTo>::iterator new_fromTo = fromTo_list.begin(); new_fromTo != fromTo_list.end(); ++new_fromTo) {
1747 if(to_str == new_fromTo->to) {
1748 std::list<FromToInfo> &fromtoinfo1 = new_fromTo->fromto_info;
1749 FromToInfo fromtoinfo;
1750 fromtoinfo.uid = uid_str;
1751 fromtoinfo.bookname = bookname_str;
1752 fromtoinfo1.push_back(fromtoinfo);
1758 FromToInfo fromtoinfo;
1759 fromtoinfo.uid = uid_str;
1760 fromtoinfo.bookname = bookname_str;
1761 std::list<FromToInfo> fromtoinfo1;
1762 fromtoinfo1.push_back(fromtoinfo);
1765 fromTo.fromto_info = fromtoinfo1;
1766 fromTo_list.push_back(fromTo);
1773 const std::string *Libs::get_dir_info(const char *path)
1777 DictInfoItem *info_item = root_info_item;
1779 const char *p = path+1;
1783 p1 = strchr(p, '/');
1785 item.assign(p, p1-p);
1786 if (!item.empty()) {
1788 for (std::list<DictInfoItem *>::iterator i = info_item->dir->info_item_list.begin(); i!= info_item->dir->info_item_list.end(); ++i) {
1789 if ((*i)->isdir == 1) {
1790 if ((*i)->dir->name == item) {
1804 return NULL; // Not end by '/'.
1805 DictInfoDirItem *dir = info_item->dir;
1806 if (dir->info_string.empty()) {
1807 dir->info_string += "<parent>";
1808 dir->info_string += path;
1809 dir->info_string += "</parent>";
1811 for (std::list<DictInfoItem *>::iterator i = info_item->dir->info_item_list.begin(); i!= info_item->dir->info_item_list.end(); ++i) {
1812 if ((*i)->isdir == 1) {
1813 dir->info_string += "<dir><name>";
1814 dir->info_string += (*i)->dir->name;
1815 dir->info_string += "</name><dirname>";
1816 dir->info_string += (*i)->dir->dirname;
1817 dir->info_string += "</dirname><dictcount>";
1818 gchar *dictcount = g_strdup_printf("%u", (*i)->dir->dictcount);
1819 dir->info_string += dictcount;
1821 dir->info_string += "</dictcount></dir>";
1823 dir->info_string += "<dict>";
1824 if ((*i)->isdir == 2)
1825 dir->info_string += "<islink>1</islink>";
1826 if ((*i)->dict->level != 0) {
1827 dir->info_string += "<level>";
1828 gchar *level = g_strdup_printf("%u", (*i)->dict->level);
1829 dir->info_string += level;
1831 dir->info_string += "</level>";
1833 dir->info_string += "<uid>";
1834 dir->info_string += (*i)->dict->uid;
1835 dir->info_string += "</uid><bookname>";
1836 etext = g_markup_escape_text(oLib[(*i)->dict->id]->dict_name().c_str(), -1);
1837 dir->info_string += etext;
1839 dir->info_string += "</bookname><wordcount>";
1840 gchar *wc = g_strdup_printf("%ld", oLib[(*i)->dict->id]->narticles());
1841 dir->info_string += wc;
1843 dir->info_string += "</wordcount></dict>";
1847 return &(dir->info_string);
1850 int Libs::get_dict_level(const char *uid)
1852 std::map<std::string, DictInfoDictItem *>::iterator uid_iter;
1853 uid_iter = uidmap.find(uid);
1854 if (uid_iter==uidmap.end())
1856 return uid_iter->second->level;
1859 std::string Libs::get_dicts_list(const char *dictmask, int max_dict_count, int userLevel)
1861 std::list<std::string> uid_list;
1866 p1 = strchr(p, ' ');
1868 uid.assign(p, p1-p);
1870 uid_list.push_back(uid);
1876 uid_list.push_back(uid);
1878 std::string dictmask_str;
1880 const std::string *info_string;
1882 for (std::list<std::string>::iterator i = uid_list.begin(); i!= uid_list.end(); ++i) {
1883 level = get_dict_level((*i).c_str());
1884 if (level < 0 || level > userLevel)
1886 info_string = get_dict_info(i->c_str(), true);
1888 if (count>=max_dict_count)
1890 dictmask_str += info_string->c_str();
1894 return dictmask_str;
1897 const std::string *Libs::get_dict_info(const char *uid, bool is_short)
1899 std::map<std::string, DictInfoDictItem *>::iterator uid_iter;
1900 uid_iter = uidmap.find(uid);
1901 if (uid_iter==uidmap.end())
1903 DictInfoDictItem *dict;
1904 dict = uid_iter->second;
1906 if (dict->short_info_string.empty()) {
1908 dict->short_info_string += "<dict><uid>";
1909 dict->short_info_string += uid;
1910 dict->short_info_string += "</uid><bookname>";
1911 etext = g_markup_escape_text(oLib[dict->id]->dict_name().c_str(), -1);
1912 dict->short_info_string += etext;
1914 dict->short_info_string += "</bookname><wordcount>";
1915 gchar *wc = g_strdup_printf("%ld", oLib[dict->id]->narticles());
1916 dict->short_info_string += wc;
1918 dict->short_info_string += "</wordcount></dict>";
1920 return &(dict->short_info_string);
1922 if (dict->info_string.empty()) {
1925 if (!dict_info.load_from_ifo_file(oLib[dict->id]->ifofilename(), false))
1927 dict->info_string += "<dictinfo><bookname>";
1928 etext = g_markup_escape_text(dict_info.bookname.c_str(), -1);
1929 dict->info_string += etext;
1931 dict->info_string += "</bookname><wordcount>";
1932 gchar *wc = g_strdup_printf("%u", dict_info.wordcount);
1933 dict->info_string += wc;
1935 dict->info_string += "</wordcount>";
1936 if (dict_info.synwordcount!=0) {
1937 dict->info_string += "<synwordcount>";
1938 wc = g_strdup_printf("%u", dict_info.synwordcount);
1939 dict->info_string += wc;
1941 dict->info_string += "</synwordcount>";
1943 dict->info_string += "<author>";
1944 etext = g_markup_escape_text(dict_info.author.c_str(), -1);
1945 dict->info_string += etext;
1947 dict->info_string += "</author><email>";
1948 etext = g_markup_escape_text(dict_info.email.c_str(), -1);
1949 dict->info_string += etext;
1951 dict->info_string += "</email><website>";
1952 etext = g_markup_escape_text(dict_info.website.c_str(), -1);
1953 dict->info_string += etext;
1955 dict->info_string += "</website><description>";
1956 etext = g_markup_escape_text(dict_info.description.c_str(), -1);
1957 dict->info_string += etext;
1959 dict->info_string += "</description><date>";
1960 etext = g_markup_escape_text(dict_info.date.c_str(), -1);
1961 dict->info_string += etext;
1963 dict->info_string += "</date><download>";
1964 etext = g_markup_escape_text(dict->download.c_str(), -1);
1965 dict->info_string += etext;
1967 dict->info_string += "</download></dictinfo>";
1969 return &(dict->info_string);
1973 void Libs::SetServerDictMask(std::vector<InstantDictIndex> &dictmask, const char *dicts, int max, int userLevel)
1975 InstantDictIndex instance_dict_index;
1976 instance_dict_index.type = InstantDictType_LOCAL;
1978 std::list<std::string> uid_list;
1983 p1 = strchr(p, ' ');
1985 uid.assign(p, p1-p);
1987 uid_list.push_back(uid);
1993 uid_list.push_back(uid);
1995 std::map<std::string, DictInfoDictItem *>::iterator uid_iter;
1996 for (std::list<std::string>::iterator i = uid_list.begin(); i!= uid_list.end(); ++i) {
1997 uid_iter = uidmap.find(*i);
1998 if (uid_iter!=uidmap.end()) {
1999 if (max>=0 && count >= max)
2001 if (userLevel>=0 && (unsigned int)userLevel< uid_iter->second->level)
2003 instance_dict_index.index = uid_iter->second->id;
2004 dictmask.push_back(instance_dict_index);
2010 void Libs::LoadCollateFile(std::vector<InstantDictIndex> &dictmask, CollateFunctions cltfuc)
2012 for (std::vector<InstantDictIndex>::iterator i = dictmask.begin(); i!=dictmask.end(); ++i) {
2013 if ((*i).type == InstantDictType_LOCAL) {
2014 oLib[(*i).index]->idx_file->collate_load(cltfuc);
2015 if (oLib[(*i).index]->syn_file.get() != NULL)
2016 oLib[(*i).index]->syn_file->collate_load(cltfuc);
2022 #ifdef SD_CLIENT_CODE
2023 bool Libs::find_lib_by_filename(const char *filename, size_t &iLib)
2025 for (std::vector<Dict *>::size_type i =0; i < oLib.size(); i++) {
2026 if (oLib[i]->ifofilename() == filename) {
2034 void Libs::load(std::list<std::string> &load_list)
2036 for (std::list<std::string>::iterator i = load_list.begin(); i != load_list.end(); ++i) {
2037 load_dict(*i, show_progress);
2041 void Libs::reload(std::list<std::string> &load_list, int is_coll_enb, int collf)
2043 if (is_coll_enb == EnableCollationLevel && collf == CollateFunction) {
2044 std::vector<Dict *> prev(oLib);
2046 for (std::list<std::string>::iterator i = load_list.begin(); i != load_list.end(); ++i) {
2047 std::vector<Dict *>::iterator it;
2048 for (it=prev.begin(); it!=prev.end(); ++it) {
2049 if ((*it)->ifofilename()==*i)
2052 if (it==prev.end()) {
2053 load_dict(*i, show_progress);
2057 oLib.push_back(res);
2060 for (std::vector<Dict *>::iterator it=prev.begin(); it!=prev.end(); ++it) {
2064 for (std::vector<Dict *>::iterator it = oLib.begin(); it != oLib.end(); ++it)
2067 EnableCollationLevel = is_coll_enb;
2068 CollateFunction = CollateFunctions(collf);
2069 if (EnableCollationLevel == 0) {
2070 } else if (EnableCollationLevel == 1) {
2071 if (utf8_collate_init(CollateFunction))
2072 printf("Init collate function failed!\n");
2073 } else if (EnableCollationLevel == 2) {
2074 if (utf8_collate_init_all())
2075 printf("Init collate functions failed!\n");
2082 glong Libs::CltIndexToOrig(glong cltidx, size_t iLib, int servercollatefunc)
2084 if (EnableCollationLevel == 0)
2086 if (EnableCollationLevel == 1) {
2087 if (cltidx == INVALID_INDEX)
2089 return oLib[iLib]->idx_file->clt_file->GetOrigIndex(cltidx);
2091 if (servercollatefunc == 0)
2093 if (cltidx == INVALID_INDEX)
2095 oLib[iLib]->idx_file->collate_load((CollateFunctions)(servercollatefunc-1));
2096 return oLib[iLib]->idx_file->clt_files[servercollatefunc-1]->GetOrigIndex(cltidx);
2099 glong Libs::CltSynIndexToOrig(glong cltidx, size_t iLib, int servercollatefunc)
2101 if (EnableCollationLevel == 0)
2103 if (EnableCollationLevel == 1) {
2104 if (cltidx == UNSET_INDEX || cltidx == INVALID_INDEX)
2106 return oLib[iLib]->syn_file->clt_file->GetOrigIndex(cltidx);
2108 if (servercollatefunc == 0)
2110 if (cltidx == UNSET_INDEX || cltidx == INVALID_INDEX)
2112 oLib[iLib]->syn_file->collate_load((CollateFunctions)(servercollatefunc-1));
2113 return oLib[iLib]->syn_file->clt_files[servercollatefunc-1]->GetOrigIndex(cltidx);
2116 const gchar *Libs::GetSuggestWord(const gchar *sWord, CurrentIndex *iCurrent, std::vector<InstantDictIndex> &dictmask, int servercollatefunc)
2118 const gchar *poCurrentWord = NULL;
2122 std::vector<InstantDictIndex>::size_type iLib;
2123 std::vector<Dict *>::size_type iRealLib;
2124 for (iLib=0; iLib < dictmask.size(); iLib++) {
2125 if (dictmask[iLib].type != InstantDictType_LOCAL)
2127 iRealLib = dictmask[iLib].index;
2128 if ( poCurrentWord == NULL ) {
2129 poCurrentWord = poGetWord(iCurrent[iLib].idx_suggest, iRealLib, servercollatefunc);
2130 best = prefix_match (sWord, poCurrentWord);
2132 word = poGetWord(iCurrent[iLib].idx_suggest, iRealLib, servercollatefunc);
2133 back = prefix_match (sWord, word);
2136 poCurrentWord = word;
2137 } else if (back == best) {
2138 gint x = stardict_server_collate(poCurrentWord, word, EnableCollationLevel, CollateFunction, servercollatefunc);
2140 poCurrentWord = word;
2145 for (iLib=0; iLib<dictmask.size(); iLib++) {
2146 if (dictmask[iLib].type != InstantDictType_LOCAL)
2148 if (iCurrent[iLib].synidx_suggest==UNSET_INDEX)
2150 iRealLib = dictmask[iLib].index;
2151 if ( poCurrentWord == NULL ) {
2152 poCurrentWord = poGetSynonymWord(iCurrent[iLib].synidx_suggest, iRealLib, servercollatefunc);
2153 best = prefix_match (sWord, poCurrentWord);
2155 word = poGetSynonymWord(iCurrent[iLib].synidx_suggest, iRealLib, servercollatefunc);
2156 back = prefix_match (sWord, word);
2159 poCurrentWord = word;
2160 } else if (back == best) {
2161 gint x = stardict_server_collate(poCurrentWord, word, EnableCollationLevel, CollateFunction, servercollatefunc);
2163 poCurrentWord = word;
2168 return poCurrentWord;
2171 const gchar *Libs::poGetCurrentWord(CurrentIndex * iCurrent, std::vector<InstantDictIndex> &dictmask, int servercollatefunc)
2173 const gchar *poCurrentWord = NULL;
2175 std::vector<InstantDictIndex>::size_type iLib;
2176 std::vector<Dict *>::size_type iRealLib;
2177 for (iLib=0; iLib < dictmask.size(); iLib++) {
2178 if (dictmask[iLib].type != InstantDictType_LOCAL)
2180 iRealLib = dictmask[iLib].index;
2181 if (iCurrent[iLib].idx==INVALID_INDEX)
2183 if ( iCurrent[iLib].idx>=narticles(iRealLib) || iCurrent[iLib].idx<0)
2185 if ( poCurrentWord == NULL ) {
2186 poCurrentWord = poGetWord(iCurrent[iLib].idx, iRealLib, servercollatefunc);
2188 word = poGetWord(iCurrent[iLib].idx, iRealLib, servercollatefunc);
2189 gint x = stardict_server_collate(poCurrentWord, word, EnableCollationLevel, CollateFunction, servercollatefunc);
2191 poCurrentWord = word;
2195 for (iLib=0; iLib<dictmask.size(); iLib++) {
2196 if (dictmask[iLib].type != InstantDictType_LOCAL)
2198 iRealLib = dictmask[iLib].index;
2199 if (iCurrent[iLib].synidx==UNSET_INDEX)
2201 if (iCurrent[iLib].synidx==INVALID_INDEX)
2203 if ( iCurrent[iLib].synidx>=nsynarticles(iRealLib) || iCurrent[iLib].synidx<0)
2205 if ( poCurrentWord == NULL ) {
2206 poCurrentWord = poGetSynonymWord(iCurrent[iLib].synidx, iRealLib, servercollatefunc);
2208 word = poGetSynonymWord(iCurrent[iLib].synidx, iRealLib, servercollatefunc);
2209 gint x = stardict_server_collate(poCurrentWord, word, EnableCollationLevel, CollateFunction, servercollatefunc);
2211 poCurrentWord = word;
2215 return poCurrentWord;
2219 Libs::poGetNextWord(const gchar *sWord, CurrentIndex *iCurrent, std::vector<InstantDictIndex> &dictmask, int servercollatefunc)
2221 // the input can be:
2222 // (word,iCurrent),read word,write iNext to iCurrent,and return next word. used by TopWin::NextCallback();
2223 // (NULL,iCurrent),read iCurrent,write iNext to iCurrent,and return next word. used by AppCore::ListWords();
2224 const gchar *poCurrentWord = NULL;
2225 std::vector<Dict *>::size_type iCurrentLib=0, iCurrentRealLib=0;
2229 std::vector<InstantDictIndex>::size_type iLib;
2230 std::vector<Dict *>::size_type iRealLib;
2231 for (iLib=0; iLib < dictmask.size(); iLib++) {
2232 if (dictmask[iLib].type != InstantDictType_LOCAL)
2234 iRealLib = dictmask[iLib].index;
2236 oLib[iRealLib]->Lookup(sWord, iCurrent[iLib].idx, iCurrent[iLib].idx_suggest, EnableCollationLevel, servercollatefunc);
2238 if (iCurrent[iLib].idx==INVALID_INDEX)
2240 if (iCurrent[iLib].idx>=narticles(iRealLib) || iCurrent[iLib].idx<0)
2242 if (poCurrentWord == NULL ) {
2243 poCurrentWord = poGetWord(iCurrent[iLib].idx, iRealLib, servercollatefunc);
2245 iCurrentRealLib = iRealLib;
2249 word = poGetWord(iCurrent[iLib].idx, iRealLib, servercollatefunc);
2250 x = stardict_server_collate(poCurrentWord, word, EnableCollationLevel, CollateFunction, servercollatefunc);
2252 poCurrentWord = word;
2254 iCurrentRealLib = iRealLib;
2259 for (iLib=0; iLib < dictmask.size(); iLib++) {
2260 if (dictmask[iLib].type != InstantDictType_LOCAL)
2262 iRealLib = dictmask[iLib].index;
2264 oLib[iRealLib]->LookupSynonym(sWord, iCurrent[iLib].synidx, iCurrent[iLib].synidx_suggest, EnableCollationLevel, servercollatefunc);
2266 if (iCurrent[iLib].synidx==UNSET_INDEX)
2268 if (iCurrent[iLib].synidx==INVALID_INDEX)
2270 if (iCurrent[iLib].synidx>=nsynarticles(iRealLib) || iCurrent[iLib].synidx<0)
2272 if (poCurrentWord == NULL ) {
2273 poCurrentWord = poGetSynonymWord(iCurrent[iLib].synidx, iRealLib, servercollatefunc);
2275 iCurrentRealLib = iRealLib;
2279 word = poGetSynonymWord(iCurrent[iLib].synidx, iRealLib, servercollatefunc);
2280 x = stardict_server_collate(poCurrentWord, word, EnableCollationLevel, CollateFunction, servercollatefunc);
2282 poCurrentWord = word;
2284 iCurrentRealLib = iRealLib;
2289 if (poCurrentWord) {
2290 for (iLib=0; iLib < dictmask.size(); iLib++) {
2291 if (dictmask[iLib].type != InstantDictType_LOCAL)
2293 iRealLib = dictmask[iLib].index;
2294 if (isLib && (iLib == iCurrentLib))
2296 if (iCurrent[iLib].idx==INVALID_INDEX)
2298 if (iCurrent[iLib].idx>=narticles(iRealLib) || iCurrent[iLib].idx<0)
2300 word = poGetWord(iCurrent[iLib].idx, iRealLib, servercollatefunc);
2301 if (strcmp(poCurrentWord, word) == 0) {
2302 GetWordNext(iCurrent[iLib].idx, iRealLib, true, servercollatefunc);
2305 for (iLib=0; iLib < dictmask.size(); iLib++) {
2306 if (dictmask[iLib].type != InstantDictType_LOCAL)
2308 iRealLib = dictmask[iLib].index;
2309 if ((!isLib) && (iLib == iCurrentLib))
2311 if (iCurrent[iLib].synidx==UNSET_INDEX)
2313 if (iCurrent[iLib].synidx==INVALID_INDEX)
2315 if (iCurrent[iLib].synidx>=nsynarticles(iRealLib) || iCurrent[iLib].synidx<0)
2317 word = poGetSynonymWord(iCurrent[iLib].synidx, iRealLib, servercollatefunc);
2318 if (strcmp(poCurrentWord, word) == 0) {
2319 GetWordNext(iCurrent[iLib].synidx, iRealLib, false, servercollatefunc);
2322 //GetWordNext will change poCurrentWord's content, so do it at the last.
2324 GetWordNext(iCurrent[iCurrentLib].idx, iCurrentRealLib, true, servercollatefunc);
2326 GetWordNext(iCurrent[iCurrentLib].synidx, iCurrentRealLib, false, servercollatefunc);
2328 poCurrentWord = poGetCurrentWord(iCurrent, dictmask, servercollatefunc);
2330 return poCurrentWord;
2334 Libs::poGetPreWord(const gchar *sWord, CurrentIndex* iCurrent, std::vector<InstantDictIndex> &dictmask, int servercollatefunc)
2336 // used by TopWin::PreviousCallback(); the iCurrent is cached by AppCore::TopWinWordChange();
2337 const gchar *poCurrentWord = NULL;
2338 std::vector<Dict *>::size_type iCurrentLib=0, iCurrentRealLib=0;
2343 std::vector<InstantDictIndex>::size_type iLib;
2344 std::vector<Dict *>::size_type iRealLib;
2345 for (iLib=0;iLib<dictmask.size();iLib++) {
2346 if (dictmask[iLib].type != InstantDictType_LOCAL)
2348 iRealLib = dictmask[iLib].index;
2350 oLib[iRealLib]->Lookup(sWord, iCurrent[iLib].idx, iCurrent[iLib].idx_suggest, EnableCollationLevel, servercollatefunc);
2352 if (iCurrent[iLib].idx!=INVALID_INDEX) {
2353 if ( iCurrent[iLib].idx>=narticles(iRealLib) || iCurrent[iLib].idx<=0)
2356 if ( poCurrentWord == NULL ) {
2357 if (GetWordPrev(iCurrent[iLib].idx, pidx, iRealLib, true, servercollatefunc)) {
2358 poCurrentWord = poGetWord(pidx, iRealLib, servercollatefunc);
2360 iCurrentRealLib = iRealLib;
2364 if (GetWordPrev(iCurrent[iLib].idx, pidx, iRealLib, true, servercollatefunc)) {
2366 word = poGetWord(pidx, iRealLib, servercollatefunc);
2367 x = stardict_server_collate(poCurrentWord, word, EnableCollationLevel, CollateFunction, servercollatefunc);
2369 poCurrentWord = word;
2371 iCurrentRealLib = iRealLib;
2377 for (iLib=0;iLib<dictmask.size();iLib++) {
2378 if (dictmask[iLib].type != InstantDictType_LOCAL)
2380 iRealLib = dictmask[iLib].index;
2382 oLib[iRealLib]->LookupSynonym(sWord, iCurrent[iLib].synidx, iCurrent[iLib].synidx_suggest, EnableCollationLevel, servercollatefunc);
2384 if (iCurrent[iLib].synidx==UNSET_INDEX)
2386 if (iCurrent[iLib].synidx!=INVALID_INDEX) {
2387 if ( iCurrent[iLib].synidx>=nsynarticles(iRealLib) || iCurrent[iLib].synidx<=0)
2390 if ( poCurrentWord == NULL ) {
2391 if (GetWordPrev(iCurrent[iLib].synidx, pidx, iRealLib, false, servercollatefunc)) {
2392 poCurrentWord = poGetSynonymWord(pidx, iRealLib, servercollatefunc);
2394 iCurrentRealLib = iRealLib;
2398 if (GetWordPrev(iCurrent[iLib].synidx, pidx, iRealLib, false, servercollatefunc)) {
2400 word = poGetSynonymWord(pidx,iRealLib, servercollatefunc);
2401 x = stardict_server_collate(poCurrentWord, word, EnableCollationLevel, CollateFunction, servercollatefunc);
2403 poCurrentWord = word;
2405 iCurrentRealLib = iRealLib;
2411 if (poCurrentWord) {
2412 for (iLib=0;iLib<dictmask.size();iLib++) {
2413 if (dictmask[iLib].type != InstantDictType_LOCAL)
2415 iRealLib = dictmask[iLib].index;
2416 if (isLib && (iLib == iCurrentLib))
2418 if (iCurrent[iLib].idx!=INVALID_INDEX) {
2419 if (iCurrent[iLib].idx>=narticles(iRealLib) || iCurrent[iLib].idx<=0)
2422 if (GetWordPrev(iCurrent[iLib].idx, pidx, iRealLib, true, servercollatefunc)) {
2423 word = poGetWord(pidx, iRealLib, servercollatefunc);
2424 if (strcmp(poCurrentWord, word) == 0) {
2425 iCurrent[iLib].idx=pidx;
2429 for (iLib=0;iLib<dictmask.size();iLib++) {
2430 if (dictmask[iLib].type != InstantDictType_LOCAL)
2432 iRealLib = dictmask[iLib].index;
2433 if ((!isLib) && (iLib == iCurrentLib))
2435 if (iCurrent[iLib].synidx==UNSET_INDEX)
2437 if (iCurrent[iLib].synidx!=INVALID_INDEX) {
2438 if (iCurrent[iLib].synidx>=nsynarticles(iRealLib) || iCurrent[iLib].synidx<=0)
2441 if (GetWordPrev(iCurrent[iLib].synidx, pidx, iRealLib, false, servercollatefunc)) {
2442 word = poGetSynonymWord(pidx, iRealLib, servercollatefunc);
2443 if (strcmp(poCurrentWord, word) == 0) {
2444 iCurrent[iLib].synidx=pidx;
2449 GetWordPrev(iCurrent[iCurrentLib].idx, pidx, iCurrentRealLib, true, servercollatefunc);
2450 iCurrent[iCurrentLib].idx = pidx;
2452 GetWordPrev(iCurrent[iCurrentLib].synidx, pidx, iCurrentRealLib, false, servercollatefunc);
2453 iCurrent[iCurrentLib].synidx = pidx;
2456 return poCurrentWord;
2459 bool Libs::LookupSynonymSimilarWord(const gchar* sWord, glong &iSynonymWordIndex, glong &synidx_suggest, size_t iLib, int servercollatefunc)
2461 if (oLib[iLib]->syn_file.get() == NULL)
2465 glong iIndex_suggest;
2472 casestr = g_utf8_strdown(sWord, -1);
2473 if (strcmp(casestr, sWord)) {
2474 bLookup = oLib[iLib]->LookupSynonym(casestr, iIndex, iIndex_suggest, EnableCollationLevel, servercollatefunc);
2481 casestr = g_utf8_strup(sWord, -1);
2482 if (strcmp(casestr, sWord)) {
2483 bLookup = oLib[iLib]->LookupSynonym(casestr, iIndex, iIndex_suggest, EnableCollationLevel, servercollatefunc);
2489 // Upper the first character and lower others.
2491 gchar *nextchar = g_utf8_next_char(sWord);
2492 gchar *firstchar = g_utf8_strup(sWord, nextchar - sWord);
2493 nextchar = g_utf8_strdown(nextchar, -1);
2494 casestr = g_strdup_printf("%s%s", firstchar, nextchar);
2497 if (strcmp(casestr, sWord)) {
2498 bLookup = oLib[iLib]->LookupSynonym(casestr, iIndex, iIndex_suggest, EnableCollationLevel, servercollatefunc);
2505 iIndex = iSynonymWordIndex;
2509 if (GetWordPrev(iIndex, pidx, iLib, false, servercollatefunc)) {
2510 cword = poGetSynonymWord(pidx, iLib, servercollatefunc);
2511 if (stardict_casecmp(cword, sWord, EnableCollationLevel, CollateFunction, servercollatefunc)==0) {
2522 if (iIndex!=INVALID_INDEX) {
2523 cword = poGetSynonymWord(iIndex, iLib, servercollatefunc);
2524 if (stardict_casecmp(cword, sWord, EnableCollationLevel, CollateFunction, servercollatefunc)==0) {
2532 iSynonymWordIndex = iIndex;
2533 synidx_suggest = iIndex_suggest;
2538 bool Libs::LookupSimilarWord(const gchar* sWord, glong & iWordIndex, glong &idx_suggest, size_t iLib, int servercollatefunc)
2546 casestr = g_utf8_strdown(sWord, -1);
2547 if (strcmp(casestr, sWord)) {
2548 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2554 casestr = g_utf8_strup(sWord, -1);
2555 if (strcmp(casestr, sWord)) {
2556 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2561 // Upper the first character and lower others.
2563 gchar *nextchar = g_utf8_next_char(sWord);
2564 gchar *firstchar = g_utf8_strup(sWord, nextchar - sWord);
2565 nextchar = g_utf8_strdown(nextchar, -1);
2566 casestr = g_strdup_printf("%s%s", firstchar, nextchar);
2569 if (strcmp(casestr, sWord)) {
2570 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2576 iIndex = iWordIndex;
2580 if (GetWordPrev(iIndex, pidx, iLib, true, servercollatefunc)) {
2581 cword = poGetWord(pidx, iLib, servercollatefunc);
2582 if (stardict_casecmp(cword, sWord, EnableCollationLevel, CollateFunction, servercollatefunc)==0) {
2593 if (iIndex!=INVALID_INDEX) {
2594 cword = poGetWord(iIndex, iLib, servercollatefunc);
2595 if (stardict_casecmp(cword, sWord, EnableCollationLevel, CollateFunction, servercollatefunc)==0) {
2603 if (bIsPureEnglish(sWord)) {
2604 // If not Found , try other status of sWord.
2605 size_t iWordLen=strlen(sWord);
2608 gchar *sNewWord = (gchar *)g_malloc(iWordLen + 1);
2610 //cut one char "s" or "d"
2611 if(!bFound && iWordLen>1) {
2612 isupcase = sWord[iWordLen-1]=='S' || !strncmp(&sWord[iWordLen-2],"ED",2);
2613 if (isupcase || sWord[iWordLen-1]=='s' || !strncmp(&sWord[iWordLen-2],"ed",2)) {
2614 strcpy(sNewWord,sWord);
2615 sNewWord[iWordLen-1]='\0'; // cut "s" or "d"
2616 if (oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2618 else if (isupcase || g_ascii_isupper(sWord[0])) {
2619 casestr = g_ascii_strdown(sNewWord, -1);
2620 if (strcmp(casestr, sNewWord)) {
2621 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2630 if(!bFound && iWordLen>2) {
2631 isupcase = !strncmp(&sWord[iWordLen-2],"LY",2);
2632 if (isupcase || (!strncmp(&sWord[iWordLen-2],"ly",2))) {
2633 strcpy(sNewWord,sWord);
2634 sNewWord[iWordLen-2]='\0'; // cut "ly"
2635 if (iWordLen>5 && sNewWord[iWordLen-3]==sNewWord[iWordLen-4]
2636 && !bIsVowel(sNewWord[iWordLen-4]) &&
2637 bIsVowel(sNewWord[iWordLen-5])) {//doubled
2639 sNewWord[iWordLen-3]='\0';
2640 if(oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2643 if (isupcase || g_ascii_isupper(sWord[0])) {
2644 casestr = g_ascii_strdown(sNewWord, -1);
2645 if (strcmp(casestr, sNewWord)) {
2646 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2652 sNewWord[iWordLen-3]=sNewWord[iWordLen-4]; //restore
2656 if (oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2658 else if (isupcase || g_ascii_isupper(sWord[0])) {
2659 casestr = g_ascii_strdown(sNewWord, -1);
2660 if (strcmp(casestr, sNewWord)) {
2661 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2671 if(!bFound && iWordLen>3) {
2672 isupcase = !strncmp(&sWord[iWordLen-3],"ING",3);
2673 if (isupcase || !strncmp(&sWord[iWordLen-3],"ing",3) ) {
2674 strcpy(sNewWord,sWord);
2675 sNewWord[iWordLen-3]='\0';
2676 if ( iWordLen>6 && (sNewWord[iWordLen-4]==sNewWord[iWordLen-5])
2677 && !bIsVowel(sNewWord[iWordLen-5]) &&
2678 bIsVowel(sNewWord[iWordLen-6])) { //doubled
2679 sNewWord[iWordLen-4]='\0';
2680 if (oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2683 if (isupcase || g_ascii_isupper(sWord[0])) {
2684 casestr = g_ascii_strdown(sNewWord, -1);
2685 if (strcmp(casestr, sNewWord)) {
2686 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2692 sNewWord[iWordLen-4]=sNewWord[iWordLen-5]; //restore
2696 if (oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2698 else if (isupcase || g_ascii_isupper(sWord[0])) {
2699 casestr = g_ascii_strdown(sNewWord, -1);
2700 if (strcmp(casestr, sNewWord)) {
2701 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2709 strcat(sNewWord,"E"); // add a char "E"
2711 strcat(sNewWord,"e"); // add a char "e"
2712 if(oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2714 else if (isupcase || g_ascii_isupper(sWord[0])) {
2715 casestr = g_ascii_strdown(sNewWord, -1);
2716 if (strcmp(casestr, sNewWord)) {
2717 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2727 if(!bFound && iWordLen>3) {
2728 isupcase = (!strncmp(&sWord[iWordLen-2],"ES",2) &&
2729 (sWord[iWordLen-3] == 'S' ||
2730 sWord[iWordLen-3] == 'X' ||
2731 sWord[iWordLen-3] == 'O' ||
2732 (iWordLen >4 && sWord[iWordLen-3] == 'H' &&
2733 (sWord[iWordLen-4] == 'C' ||
2734 sWord[iWordLen-4] == 'S'))));
2736 (!strncmp(&sWord[iWordLen-2],"es",2) &&
2737 (sWord[iWordLen-3] == 's' || sWord[iWordLen-3] == 'x' ||
2738 sWord[iWordLen-3] == 'o' ||
2739 (iWordLen >4 && sWord[iWordLen-3] == 'h' &&
2740 (sWord[iWordLen-4] == 'c' || sWord[iWordLen-4] == 's'))))) {
2741 strcpy(sNewWord,sWord);
2742 sNewWord[iWordLen-2]='\0';
2743 if(oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2745 else if (isupcase || g_ascii_isupper(sWord[0])) {
2746 casestr = g_ascii_strdown(sNewWord, -1);
2747 if (strcmp(casestr, sNewWord)) {
2748 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2757 if (!bFound && iWordLen>3) {
2758 isupcase = !strncmp(&sWord[iWordLen-2],"ED",2);
2759 if (isupcase || !strncmp(&sWord[iWordLen-2],"ed",2)) {
2760 strcpy(sNewWord,sWord);
2761 sNewWord[iWordLen-2]='\0';
2762 if (iWordLen>5 && (sNewWord[iWordLen-3]==sNewWord[iWordLen-4])
2763 && !bIsVowel(sNewWord[iWordLen-4]) &&
2764 bIsVowel(sNewWord[iWordLen-5])) {//doubled
2765 sNewWord[iWordLen-3]='\0';
2766 if (oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2769 if (isupcase || g_ascii_isupper(sWord[0])) {
2770 casestr = g_ascii_strdown(sNewWord, -1);
2771 if (strcmp(casestr, sNewWord)) {
2772 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2778 sNewWord[iWordLen-3]=sNewWord[iWordLen-4]; //restore
2782 if (oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2784 else if (isupcase || g_ascii_isupper(sWord[0])) {
2785 casestr = g_ascii_strdown(sNewWord, -1);
2786 if (strcmp(casestr, sNewWord)) {
2787 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2796 // cut "ied" , add "y".
2797 if (!bFound && iWordLen>3) {
2798 isupcase = !strncmp(&sWord[iWordLen-3],"IED",3);
2799 if (isupcase || (!strncmp(&sWord[iWordLen-3],"ied",3))) {
2800 strcpy(sNewWord,sWord);
2801 sNewWord[iWordLen-3]='\0';
2803 strcat(sNewWord,"Y"); // add a char "Y"
2805 strcat(sNewWord,"y"); // add a char "y"
2806 if (oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2808 else if (isupcase || g_ascii_isupper(sWord[0])) {
2809 casestr = g_ascii_strdown(sNewWord, -1);
2810 if (strcmp(casestr, sNewWord)) {
2811 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2819 // cut "ies" , add "y".
2820 if (!bFound && iWordLen>3) {
2821 isupcase = !strncmp(&sWord[iWordLen-3],"IES",3);
2822 if (isupcase || (!strncmp(&sWord[iWordLen-3],"ies",3))) {
2823 strcpy(sNewWord,sWord);
2824 sNewWord[iWordLen-3]='\0';
2826 strcat(sNewWord,"Y"); // add a char "Y"
2828 strcat(sNewWord,"y"); // add a char "y"
2829 if(oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2831 else if (isupcase || g_ascii_isupper(sWord[0])) {
2832 casestr = g_ascii_strdown(sNewWord, -1);
2833 if (strcmp(casestr, sNewWord)) {
2834 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2843 if (!bFound && iWordLen>2) {
2844 isupcase = !strncmp(&sWord[iWordLen-2],"ER",2);
2845 if (isupcase || (!strncmp(&sWord[iWordLen-2],"er",2))) {
2846 strcpy(sNewWord,sWord);
2847 sNewWord[iWordLen-2]='\0';
2848 if(oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2850 else if (isupcase || g_ascii_isupper(sWord[0])) {
2851 casestr = g_ascii_strdown(sNewWord, -1);
2852 if (strcmp(casestr, sNewWord)) {
2853 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2862 if (!bFound && iWordLen>3) {
2863 isupcase = !strncmp(&sWord[iWordLen-3], "EST", 3);
2864 if (isupcase || (!strncmp(&sWord[iWordLen-3],"est", 3))) {
2865 strcpy(sNewWord,sWord);
2866 sNewWord[iWordLen-3]='\0';
2867 if(oLib[iLib]->Lookup(sNewWord, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2869 else if (isupcase || g_ascii_isupper(sWord[0])) {
2870 casestr = g_ascii_strdown(sNewWord, -1);
2871 if (strcmp(casestr, sNewWord)) {
2872 if(oLib[iLib]->Lookup(casestr, iIndex, idx_suggest, EnableCollationLevel, servercollatefunc))
2884 iWordIndex = iIndex;
2887 //don't change iWordIndex here.
2888 //when LookupSimilarWord all failed too, we want to use the old LookupWord index to list words.
2889 //iWordIndex = INVALID_INDEX;
2895 bool Libs::SimpleLookupWord(const gchar* sWord, glong & iWordIndex, glong &idx_suggest, size_t iLib, int servercollatefunc)
2897 bool bFound = oLib[iLib]->Lookup(sWord, iWordIndex, idx_suggest, EnableCollationLevel, servercollatefunc);
2899 bFound = LookupSimilarWord(sWord, iWordIndex, idx_suggest, iLib, servercollatefunc);
2903 bool Libs::SimpleLookupSynonymWord(const gchar* sWord, glong & iWordIndex, glong &synidx_suggest, size_t iLib, int servercollatefunc)
2905 bool bFound = oLib[iLib]->LookupSynonym(sWord, iWordIndex, synidx_suggest, EnableCollationLevel, servercollatefunc);
2907 bFound = LookupSynonymSimilarWord(sWord, iWordIndex, synidx_suggest, iLib, servercollatefunc);
2911 struct Fuzzystruct {
2913 int iMatchWordDistance;
2916 static inline bool operator<(const Fuzzystruct & lh, const Fuzzystruct & rh) {
2917 if (lh.iMatchWordDistance!=rh.iMatchWordDistance)
2918 return lh.iMatchWordDistance<rh.iMatchWordDistance;
2920 if (lh.pMatchWord && rh.pMatchWord)
2921 return stardict_strcmp(lh.pMatchWord, rh.pMatchWord)<0;
2926 static inline void unicode_strdown(gunichar *str)
2929 *str=g_unichar_tolower(*str);
2934 bool Libs::LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_size, std::vector<InstantDictIndex> &dictmask)
2936 if (sWord[0] == '\0')
2939 std::vector<Fuzzystruct> oFuzzystruct(reslist_size);
2941 for (int i=0; i<reslist_size; i++) {
2942 oFuzzystruct[i].pMatchWord = NULL;
2943 oFuzzystruct[i].iMatchWordDistance = iMaxFuzzyDistance;
2945 int iMaxDistance = iMaxFuzzyDistance;
2948 EditDistance oEditDistance;
2950 glong iCheckWordLen;
2952 gunichar *ucs4_str1, *ucs4_str2;
2953 glong ucs4_str2_len;
2955 ucs4_str2 = g_utf8_to_ucs4_fast(sWord, -1, &ucs4_str2_len);
2956 unicode_strdown(ucs4_str2);
2958 std::vector<Dict *>::size_type iRealLib;
2959 for (std::vector<InstantDictIndex>::size_type iLib=0; iLib<dictmask.size(); iLib++) {
2960 if (dictmask[iLib].type != InstantDictType_LOCAL)
2962 iRealLib = dictmask[iLib].index;
2963 for (gint synLib=0; synLib<2; synLib++) {
2965 if (oLib[iRealLib]->syn_file.get()==NULL)
2968 show_progress->notify_about_work();
2970 //if (stardict_strcmp(sWord, poGetWord(0,iRealLib))>=0 && stardict_strcmp(sWord, poGetWord(narticles(iRealLib)-1,iRealLib))<=0) {
2971 //there are Chinese dicts and English dicts...
2975 iwords = narticles(iRealLib);
2977 iwords = nsynarticles(iRealLib);
2978 for (glong index=0; index<iwords; index++) {
2979 // Need to deal with same word in index? But this will slow down processing in most case.
2981 sCheck = poGetOrigWord(index,iRealLib);
2983 sCheck = poGetOrigSynonymWord(index,iRealLib);
2984 // tolower and skip too long or too short words
2985 iCheckWordLen = g_utf8_strlen(sCheck, -1);
2986 if (iCheckWordLen-ucs4_str2_len>=iMaxDistance ||
2987 ucs4_str2_len-iCheckWordLen>=iMaxDistance)
2989 ucs4_str1 = g_utf8_to_ucs4_fast(sCheck, -1, NULL);
2990 if (iCheckWordLen > ucs4_str2_len)
2991 ucs4_str1[ucs4_str2_len]=0;
2992 unicode_strdown(ucs4_str1);
2994 iDistance = oEditDistance.CalEditDistance(ucs4_str1, ucs4_str2, iMaxDistance);
2996 if (iDistance<iMaxDistance && iDistance < ucs4_str2_len) {
2997 // when ucs4_str2_len=1,2 we need less fuzzy.
2999 bool bAlreadyInList = false;
3000 int iMaxDistanceAt=0;
3001 for (int j=0; j<reslist_size; j++) {
3002 if (oFuzzystruct[j].pMatchWord &&
3003 strcmp(oFuzzystruct[j].pMatchWord,sCheck)==0 ) {//already in list
3004 bAlreadyInList = true;
3007 //find the position,it will certainly be found (include the first time) as iMaxDistance is set by last time.
3008 if (oFuzzystruct[j].iMatchWordDistance == iMaxDistance ) {
3012 if (!bAlreadyInList) {
3013 if (oFuzzystruct[iMaxDistanceAt].pMatchWord)
3014 g_free(oFuzzystruct[iMaxDistanceAt].pMatchWord);
3015 oFuzzystruct[iMaxDistanceAt].pMatchWord = g_strdup(sCheck);
3016 oFuzzystruct[iMaxDistanceAt].iMatchWordDistance = iDistance;
3017 // calc new iMaxDistance
3018 iMaxDistance = iDistance;
3019 for (int j=0; j<reslist_size; j++) {
3020 if (oFuzzystruct[j].iMatchWordDistance > iMaxDistance)
3021 iMaxDistance = oFuzzystruct[j].iMatchWordDistance;
3022 } // calc new iMaxDistance
3031 if (Found)// sort with distance
3032 std::sort(oFuzzystruct.begin(), oFuzzystruct.end());
3034 for (gint i=0; i<reslist_size; ++i)
3035 reslist[i]=oFuzzystruct[i].pMatchWord;
3040 static inline bool less_for_compare(const char *lh, const char *rh) {
3041 return stardict_strcmp(lh, rh)<0;
3044 gint Libs::LookupWithRule(const gchar *word, gchar **ppMatchWord, std::vector<InstantDictIndex> &dictmask)
3046 glong aiIndex[MAX_MATCH_ITEM_PER_LIB+1];
3047 gint iMatchCount = 0;
3048 GPatternSpec *pspec = g_pattern_spec_new(word);
3050 const gchar * sMatchWord;
3051 bool bAlreadyInList;
3052 std::vector<Dict *>::size_type iRealLib;
3053 for (std::vector<InstantDictIndex>::size_type iLib=0; iLib<dictmask.size(); iLib++) {
3054 //if(oLibs.LookdupWordsWithRule(pspec,aiIndex,MAX_MATCH_ITEM_PER_LIB+1-iMatchCount,iLib))
3055 // -iMatchCount,so save time,but may got less result and the word may repeat.
3056 if (dictmask[iLib].type != InstantDictType_LOCAL)
3058 iRealLib = dictmask[iLib].index;
3059 if (oLib[iRealLib]->LookupWithRule(pspec, aiIndex, MAX_MATCH_ITEM_PER_LIB+1)) {
3060 show_progress->notify_about_work();
3061 for (int i=0; aiIndex[i]!=-1; i++) {
3062 sMatchWord = poGetOrigWord(aiIndex[i],iRealLib);
3063 bAlreadyInList = false;
3064 for (int j=0; j<iMatchCount; j++) {
3065 if (strcmp(ppMatchWord[j],sMatchWord)==0) {//already in list
3066 bAlreadyInList = true;
3070 if (!bAlreadyInList)
3071 ppMatchWord[iMatchCount++] = g_strdup(sMatchWord);
3074 if (oLib[iRealLib]->LookupWithRuleSynonym(pspec, aiIndex, MAX_MATCH_ITEM_PER_LIB+1)) {
3075 show_progress->notify_about_work();
3076 for (int i=0; aiIndex[i]!=-1; i++) {
3077 sMatchWord = poGetOrigSynonymWord(aiIndex[i],iRealLib);
3078 bAlreadyInList = false;
3079 for (int j=0; j<iMatchCount; j++) {
3080 if (strcmp(ppMatchWord[j],sMatchWord)==0) {//already in list
3081 bAlreadyInList = true;
3085 if (!bAlreadyInList)
3086 ppMatchWord[iMatchCount++] = g_strdup(sMatchWord);
3090 g_pattern_spec_free(pspec);
3092 if (iMatchCount)// sort it.
3093 std::sort(ppMatchWord, ppMatchWord+iMatchCount, less_for_compare);
3097 gint Libs::LookupWithRegex(const gchar *word, gchar **ppMatchWord, std::vector<InstantDictIndex> &dictmask)
3099 glong aiIndex[MAX_MATCH_ITEM_PER_LIB+1];
3100 gint iMatchCount = 0;
3101 GRegex *regex = g_regex_new(word, G_REGEX_OPTIMIZE, (GRegexMatchFlags)0, NULL);
3103 const gchar * sMatchWord;
3104 bool bAlreadyInList;
3105 std::vector<Dict *>::size_type iRealLib;
3106 for (std::vector<InstantDictIndex>::size_type iLib=0; iLib<dictmask.size(); iLib++) {
3107 //if(oLibs.LookdupWordsWithRule(pspec,aiIndex,MAX_MATCH_ITEM_PER_LIB+1-iMatchCount,iLib))
3108 // -iMatchCount,so save time,but may got less result and the word may repeat.
3109 if (dictmask[iLib].type != InstantDictType_LOCAL)
3111 iRealLib = dictmask[iLib].index;
3112 if (oLib[iRealLib]->LookupWithRegex(regex, aiIndex, MAX_MATCH_ITEM_PER_LIB+1)) {
3113 show_progress->notify_about_work();
3114 for (int i=0; aiIndex[i]!=-1; i++) {
3115 sMatchWord = poGetOrigWord(aiIndex[i],iRealLib);
3116 bAlreadyInList = false;
3117 for (int j=0; j<iMatchCount; j++) {
3118 if (strcmp(ppMatchWord[j],sMatchWord)==0) {//already in list
3119 bAlreadyInList = true;
3123 if (!bAlreadyInList)
3124 ppMatchWord[iMatchCount++] = g_strdup(sMatchWord);
3127 if (oLib[iRealLib]->LookupWithRegexSynonym(regex, aiIndex, MAX_MATCH_ITEM_PER_LIB+1)) {
3128 show_progress->notify_about_work();
3129 for (int i=0; aiIndex[i]!=-1; i++) {
3130 sMatchWord = poGetOrigSynonymWord(aiIndex[i],iRealLib);
3131 bAlreadyInList = false;
3132 for (int j=0; j<iMatchCount; j++) {
3133 if (strcmp(ppMatchWord[j],sMatchWord)==0) {//already in list
3134 bAlreadyInList = true;
3138 if (!bAlreadyInList)
3139 ppMatchWord[iMatchCount++] = g_strdup(sMatchWord);
3143 g_regex_unref(regex);
3145 if (iMatchCount)// sort it.
3146 std::sort(ppMatchWord, ppMatchWord+iMatchCount, less_for_compare);
3150 bool Libs::LookupData(const gchar *sWord, std::vector<gchar *> *reslist, updateSearchDialog_func search_func, gpointer search_data, bool *cancel, std::vector<InstantDictIndex> &dictmask)
3152 std::vector<std::string> SearchWords;
3153 std::string SearchWord;
3154 const char *p=sWord;
3174 } else if (*p == ' ') {
3175 if (!SearchWord.empty()) {
3176 SearchWords.push_back(SearchWord);
3184 if (!SearchWord.empty()) {
3185 SearchWords.push_back(SearchWord);
3188 if (SearchWords.empty())
3191 glong search_count=0;
3192 glong total_count=0;
3194 for (std::vector<InstantDictIndex>::size_type i=0; i<dictmask.size(); ++i) {
3195 if (dictmask[i].type == InstantDictType_LOCAL)
3196 total_count += narticles(dictmask[i].index);
3200 guint32 max_size =0;
3201 gchar *origin_data = NULL;
3202 std::vector<InstantDictIndex>::size_type iRealLib;
3203 for (std::vector<InstantDictIndex>::size_type i=0; i<dictmask.size(); ++i) {
3204 if (dictmask[i].type != InstantDictType_LOCAL)
3206 iRealLib = dictmask[i].index;
3207 if (!oLib[iRealLib]->containSearchData())
3209 const gulong iwords = narticles(iRealLib);
3211 guint32 offset, size;
3212 for (gulong j=0; j<iwords; ++j) {
3216 if (search_count % 10000 == 0) {
3217 search_func(search_data, (gdouble)search_count/(gdouble)total_count);
3221 oLib[iRealLib]->get_key_and_data(j, &key, &offset, &size);
3222 if (size>max_size) {
3223 origin_data = (gchar *)g_realloc(origin_data, size);
3226 if (oLib[iRealLib]->SearchData(SearchWords, offset, size, origin_data)) {
3227 if (reslist[i].empty() || strcmp(reslist[i].back(), key))
3228 reslist[i].push_back(g_strdup(key));
3233 g_free(origin_data);
3236 std::vector<InstantDictIndex>::size_type i;
3237 for (i=0; i<dictmask.size(); ++i)
3238 if (!reslist[i].empty())
3241 return i!=dictmask.size();
3244 int Libs::GetStorageType(size_t iLib)
3246 if (oLib[iLib]->storage == NULL)
3248 return oLib[iLib]->storage->is_file_or_db;
3251 const char *Libs::GetStorageFilePath(size_t iLib, const char *key)
3253 if (oLib[iLib]->storage == NULL)
3255 return oLib[iLib]->storage->get_file_path(key);
3258 const char *Libs::GetStorageFileContent(size_t iLib, const char *key)
3260 if (oLib[iLib]->storage == NULL)
3262 return oLib[iLib]->storage->get_file_content(key);