Added spliting html from xdxf downlowad page to fields describing dicts
[mdictionary] / src / plugins / xdxf / xdxfplugin.cpp
1 /*******************************************************************************
2
3     This file is part of mDictionary.
4
5     mDictionary is free software: you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation, either version 3 of the License, or
8     (at your option) any later version.
9
10     mDictionary is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License
16     along with mDictionary.  If not, see <http://www.gnu.org/licenses/>.
17
18     Copyright 2010 Comarch S.A.
19
20 *******************************************************************************/
21
22 /*! \file xdxfplugin.cpp
23 \author Jakub Jaszczynski <j.j.jaszczynski@gmail.com>
24 */
25
26 #include "xdxfplugin.h"
27 #include <QDebug>
28 #include "../../include/Notify.h"
29 #include "DownloadDict.h"
30
31 XdxfPlugin::XdxfPlugin(QObject *parent) : CommonDictInterface(parent),
32                     _langFrom(""), _langTo(""),_name(""), _infoNote("") {
33
34     //DownloadDict a("<tr><td><img src=\"buf/comn_sdict05_bulg_comp/icon16.png\" alt=\"icon\" /></td><td align=\"center\">English-Bulgarian computer dictionary</td><td align=\"center\"><a href=\"http://downloads.sourceforge.net/xdxf/comn_sdict05_bulg_comp.tar.bz2\" target=\"_blank\">comn_sdict05_bulg_comp.tar.bz2</a></td><td align=\"right\">13,889</td><td align=\"right\">55,094</td><td align=\"right\">523</td><td align=\"center\">English</td><td align=\"center\">Bulgarian</td><td align=\"center\"><a href=\"http://xdxf.revdanica.com/\">Common XDXF</a></td><td align=\"center\">2006-04-23 23:34:40</td></tr>");
35
36
37     _settings = new Settings();
38     _dictDialog = new XdxfDictDialog(this, this);
39
40     connect(_dictDialog, SIGNAL(notify(Notify::NotifyType,QString)),
41             this, SIGNAL(notify(Notify::NotifyType,QString)));
42
43
44     _settings->setValue("type","xdxf");
45     _icon = QIcon("/usr/share/mdictionary/xdxf.png");
46     _wordsCount = -1;
47     stopped = false;
48
49     initAccents();
50 }
51
52 void XdxfPlugin::retranslate() {
53     QString locale = QLocale::system().name();
54
55     QTranslator *translator = new QTranslator(this);
56
57     if(!translator->load(":/xdxf/translations/" + locale)) {
58         translator->load(":/xdxf/translations/en_US");
59     }
60     QCoreApplication::installTranslator(translator);
61 }
62
63
64 XdxfPlugin::~XdxfPlugin() {
65     delete _settings;
66     delete _dictDialog;
67 }
68
69
70 QString XdxfPlugin::langFrom() const {   
71     return _langFrom;
72 }
73
74
75 QString XdxfPlugin::langTo() const {
76     return  _langTo;
77 }
78
79
80 QString XdxfPlugin::name() const {
81     return  _name;
82 }
83
84
85 QString XdxfPlugin::type() const {
86     return QString("xdxf");
87 }
88
89
90 QString XdxfPlugin::infoNote() const {
91     return _infoNote;
92 }
93
94
95 QList<Translation*> XdxfPlugin::searchWordList(QString word, int limit) {
96     if( word.indexOf("*")==-1 && word.indexOf("?")==-1 &&
97         word.indexOf("_")==-1 && word.indexOf("%")==-1)
98         word+="*";
99
100     if(isCached())
101         return searchWordListCache(word,limit);
102     return searchWordListFile(word, limit);
103 }
104
105
106 QList<Translation*> XdxfPlugin::searchWordListCache(QString word, int limit) {
107     QSet<Translation*> translations;
108     QString cacheFilePath = _settings->value("cache_path");
109
110     db.setDatabaseName(cacheFilePath);
111     if(!QFile::exists(cacheFilePath) || !db.open()) {
112         qDebug() << "Database error" << db.lastError().text() << endl;
113         Q_EMIT notify(Notify::Warning, QString(tr("Cache database cannot be "
114                 "opened for %1 dictionary. Searching in XDXF file. "
115                 "You may want to recache.").arg(name())));
116         _settings->setValue("cached","false");
117         return searchWordListFile(word, limit);
118     }
119     stopped = false;
120     word = word.toLower();
121     word = word.replace("*", "%");
122     word = word.replace("?", "_");
123
124     QSqlQuery cur(db);
125     if(limit !=0)
126         cur.prepare("select word from dict where word like ? or normalized "
127                     "like ? limit ?");
128     else
129         cur.prepare("select word from dict where word like ? or normalized "
130                     "like ?");
131     cur.addBindValue(word);
132     cur.addBindValue(word);
133     if(limit !=0)
134         cur.addBindValue(limit);
135     cur.exec();
136
137     while(cur.next() && (translations.size()<limit || limit==0)) {
138        translations.insert(new TranslationXdxf(
139             cur.value(0).toString(),
140             _dictionaryInfo, this));
141     }
142     db.close();
143     return translations.toList();
144 }
145
146
147 QList<Translation*> XdxfPlugin::searchWordListFile(QString word, int limit) {
148     QSet<Translation*> translations;
149     QFile dictionaryFile(_settings->value("path"));
150     word = word.toLower();
151     stopped = false;
152
153     QRegExp regWord(word);
154     regWord.setCaseSensitivity(Qt::CaseInsensitive);
155     regWord.setPatternSyntax(QRegExp::Wildcard);
156
157     /*check xdxf file exist*/
158     if(!QFile::exists(_settings->value("path"))
159                 || !dictionaryFile.open(QFile::ReadOnly | QFile::Text)) {
160         qDebug()<<"Error: could not open file";
161         Q_EMIT notify(Notify::Warning,
162                 QString(tr("XDXF file cannot be read for %1").arg(name())));
163         return translations.toList();
164     }
165
166     QXmlStreamReader reader(&dictionaryFile);
167     QString readKey;
168     int i=0;
169
170     /*search words list*/
171     while(!reader.atEnd() && !stopped){
172         reader.readNextStartElement();
173         if(reader.name()=="ar") {
174             while(reader.name()!="k" && !reader.atEnd())
175                 reader.readNextStartElement();
176             if(!reader.atEnd())
177                 readKey = reader.readElementText();
178             if((regWord.exactMatch(readKey)
179                     || regWord.exactMatch(removeAccents(readKey)))
180                     && (i<limit || limit==0) && !reader.atEnd())  {
181  //               qDebug()<<readKey;
182                 translations<<(new TranslationXdxf(readKey.toLower(),
183                                _dictionaryInfo,this));
184                 if(translations.size()==limit && limit!=0)
185                     break;
186             }
187         }
188         this->thread()->yieldCurrentThread();
189     }
190     stopped=false;
191     dictionaryFile.close();
192     return translations.toList();
193 }
194
195
196 QString XdxfPlugin::search(QString key) {
197     if(isCached())
198         return searchCache(key);
199     return searchFile(key);
200 }
201
202
203 QString XdxfPlugin::searchCache(QString key) {
204     QString result("");
205     QString cacheFilePath = _settings->value("cache_path");
206     db.setDatabaseName(cacheFilePath);
207     key = key.toLower();
208
209     if(!QFile::exists(cacheFilePath) || !db.open()) {
210         qDebug() << "Database error" << db.lastError().text() << endl;
211         Q_EMIT notify(Notify::Warning, QString(tr("Cache database cannot be "
212                 "opened for %1 dictionary. Searching in XDXF file. "
213                 "You may want to recache.").arg(name())));
214         _settings->setValue("cached","false");
215         return searchFile(key);
216     }
217
218     QSqlQuery cur(db);
219
220     cur.prepare("select translation from dict where word like ?");
221     cur.addBindValue(key);
222     cur.exec();
223     while(cur.next())
224         result += cur.value(0).toString();
225
226     db.close();
227
228     return result;
229
230 }
231
232
233 QString XdxfPlugin::searchFile(QString key) {
234     QFile dictionaryFile(_settings->value("path"));
235     QString resultString("");
236     key = key.toLower();
237
238     /*check xdxf file exist*/
239     if(!QFile::exists(_settings->value("path"))
240                 || !dictionaryFile.open(QFile::ReadOnly | QFile::Text)) {
241         Q_EMIT notify(Notify::Warning,
242                 QString(tr("XDXF file cannot be read for %1").arg(name())));
243         qDebug()<<"Error: could not open file";
244         return "";
245     }
246
247     QXmlStreamReader reader(&dictionaryFile);
248     QString readKey;
249     bool match =false;
250     stopped = false;
251
252     /*search translations for word*/
253     while (!reader.atEnd()&& !stopped) {
254         reader.readNext();
255         if(reader.tokenType() == QXmlStreamReader::StartElement) {
256             if(reader.name()=="k") {
257                 readKey = reader.readElementText();
258                 if(readKey.toLower()==key.toLower())
259                     match = true;
260             }
261         }
262         if(match) {
263             QString temp("");
264             while(reader.name()!="ar" && !reader.atEnd()) {
265                 if(reader.name()!="" && reader.name()!="k") {
266                     if(reader.tokenType()==QXmlStreamReader::EndElement)
267                         temp+="</";
268                     if(reader.tokenType()==QXmlStreamReader::StartElement)
269                         temp+="<";
270                     temp+=reader.name().toString();
271                     if(reader.name().toString()=="c" &&
272                             reader.tokenType()==QXmlStreamReader::StartElement)
273                        temp= temp + " c=\"" + reader.attributes().
274                                value("c").toString() + "\"";
275                     temp+=">";
276                 }
277                 temp+= reader.text().toString().replace("<","&lt;").
278                         replace(">","&gt;");
279                 reader.readNext();
280             }
281             if(temp.at(0)==QChar('\n'))
282                 temp.remove(0,1);
283             resultString+="<key>" + readKey +"</key>";
284             resultString+="<t>" + temp + "</t>";
285             match=false;
286         }
287         this->thread()->yieldCurrentThread();
288     }
289     stopped=false;
290     dictionaryFile.close();
291     return resultString;
292 }
293
294
295 void XdxfPlugin::stop() {
296    //qDebug()<<"stop";
297     stopped=true;
298 }
299
300
301 DictDialog* XdxfPlugin::dictDialog() {
302      return _dictDialog;
303 }
304
305
306 CommonDictInterface* XdxfPlugin::getNew(const Settings *settings) const {
307     XdxfPlugin *plugin = new XdxfPlugin();
308
309     connect(plugin, SIGNAL(notify(Notify::NotifyType,QString)),
310             this, SIGNAL(notify(Notify::NotifyType,QString)));
311
312     ((XdxfDictDialog*)plugin->dictDialog())->setLastDialogParent(_dictDialog->lastDialogParent());
313
314
315
316     if(settings && plugin->setSettings(settings)) {
317
318         disconnect(plugin, SIGNAL(notify(Notify::NotifyType,QString)),
319                 this, SIGNAL(notify(Notify::NotifyType,QString)));
320         return plugin;
321     }
322     else {
323         disconnect(plugin, SIGNAL(notify(Notify::NotifyType,QString)),
324                 this, SIGNAL(notify(Notify::NotifyType,QString)));
325         delete plugin;
326         return 0;
327     }
328 }
329
330
331 bool XdxfPlugin::isAvailable() const {
332     return true;
333 }
334
335
336 Settings* XdxfPlugin::settings() {
337     return _settings;
338 }
339
340
341 bool XdxfPlugin::isCached() {
342     if(_settings->value("cached") == "true")
343         return true;
344     return false;
345 }
346
347
348 bool XdxfPlugin::setSettings(const Settings *settings) {
349     if(settings) {
350         bool isPathChange=false;
351         QString oldPath = _settings->value("path");
352         Settings *oldSettings =  new Settings ;
353
354         if(oldPath != settings->value("path")) {
355             if(oldPath!="" && _settings->value("cache_path")!="")
356                 clean();
357             isPathChange=true;
358         }
359
360         foreach(QString key, _settings->keys())
361             oldSettings->setValue(key, _settings->value(key));
362
363         foreach(QString key, settings->keys()) {
364            if(key != "generateCache")
365                _settings->setValue(key, settings->value(key));
366         }
367
368         if(!getDictionaryInfo()) {
369             Q_EMIT notify(Notify::Warning,
370                 QString(tr("XDXF file is in wrong format")));
371             qDebug()<<"Error: xdxf file is in wrong format";
372             delete _settings;
373             _settings=oldSettings;
374             return false;
375         }
376
377         if(isPathChange) {
378             _wordsCount=0;
379             if(oldPath!="")
380                 _settings->setValue("cached","false");
381             if(_settings->value("cached")=="true"
382                     && _settings->value("cache_path")!="") {
383                 db_name = _settings->value("type")
384                         + _settings->value("cache_path");
385                 db = QSqlDatabase::addDatabase("QSQLITE",db_name);
386             }
387         }
388
389         if((_settings->value("cached") == "false" ||
390             _settings->value("cached").isEmpty()) &&
391             settings->value("generateCache") == "true") {
392             clean();
393             makeCache("");
394         }
395
396         else if (settings->value("generateCache") == "false") {
397             _settings->setValue("cached", "false");
398         }
399     }
400     else
401         return false;
402     Q_EMIT settingsChanged();
403     return true;
404 }
405
406
407 bool XdxfPlugin::getDictionaryInfo() {
408     QFile dictionaryFile(_settings->value("path"));
409     if(!QFile::exists(_settings->value("path"))
410                 || !dictionaryFile.open(QFile::ReadOnly | QFile::Text)) {
411        Q_EMIT notify(Notify::Warning,
412                QString(tr("XDXF dictionary cannot be read from file")));
413         qDebug()<<"Error: could not open file";
414         return false;
415     }
416
417     bool okFormat=false;
418     QXmlStreamReader reader(&dictionaryFile);
419     reader.readNextStartElement();
420     if(reader.name()=="xdxf") {
421         okFormat=true;
422         if(reader.attributes().hasAttribute("lang_from"))
423             _langFrom = reader.attributes().value("lang_from").toString();
424         if(reader.attributes().hasAttribute("lang_to"))
425             _langTo = reader.attributes().value("lang_to").toString();
426     }
427     reader.readNextStartElement();
428     if(reader.name()=="full_name")
429         _name=reader.readElementText();
430     else
431         qDebug()<<"no full_name";
432     reader.readNextStartElement();
433     if(reader.name()=="description")
434         _infoNote=reader.readElementText();
435     else
436         qDebug()<<"no description";
437
438     _dictionaryInfo= _name + " [" + _langFrom + "-"
439                 + _langTo + "]";
440
441     dictionaryFile.close();
442     if(okFormat)
443         return true;
444     return false;
445 }
446
447
448 QIcon* XdxfPlugin::icon() {
449     return &_icon;
450 }
451
452
453 int XdxfPlugin::countWords() {
454     if(_wordsCount>0)
455         return _wordsCount;
456     QFile dictionaryFile(_settings->value("path"));
457     if(!QFile::exists(_settings->value("path"))
458                 || !dictionaryFile.open(QFile::ReadOnly | QFile::Text)) {
459         Q_EMIT notify(Notify::Warning,
460                 QString(tr("XDXF file cannot be read for %1 dictionary")
461                 .arg(name())));
462         qDebug()<<"Error: could not open file";
463         return -1;
464     }
465
466     dictionaryFile.seek(0);
467
468     long wordsCount = 0;
469
470     QString line;
471     while(!dictionaryFile.atEnd()) {
472         line = dictionaryFile.readLine();
473         if(line.contains("<k>")) {
474             wordsCount++;
475         }
476     }
477     _wordsCount = wordsCount;
478     dictionaryFile.close();
479     return wordsCount;
480 }
481
482
483 bool XdxfPlugin::makeCache(QString) {
484
485     XdxfCachingDialog d(_dictDialog->lastDialogParent());
486
487 //    qDebug()<<_dictDialog->lastDialogParent();
488
489     connect(&d, SIGNAL(cancelCaching()),
490             this, SLOT(stop()));
491     connect(this, SIGNAL(updateCachingProgress(int,int)),
492             &d, SLOT(updateCachingProgress(int,int)));
493
494     d.show();
495
496     QCoreApplication::processEvents();
497     QFileInfo dictFileN(_settings->value("path"));
498     QString cachePathN;
499     stopped = false;
500
501     /*create cache file name*/
502     int i=0;
503     do {
504         cachePathN = QDir::homePath() + "/.mdictionary/"
505                                       + dictFileN.completeBaseName()+"."
506                                       +QString::number(i) + ".cache";
507         i++;
508     } while(QFile::exists(cachePathN));
509
510     db_name = _settings->value("type") + cachePathN;
511     db = QSqlDatabase::addDatabase("QSQLITE",db_name);
512
513     /*checke errors (File open and db open)*/
514     QFile dictionaryFile(dictFileN.filePath());
515     if (!QFile::exists(_settings->value("path"))
516                 || !dictionaryFile.open(QFile::ReadOnly | QFile::Text)) {
517         Q_EMIT updateCachingProgress(100, 0);
518         Q_EMIT notify(Notify::Warning,
519                 QString(tr("XDXF file cannot be read for %1 dictionary")
520                 .arg(name())));
521         return 0;
522     }
523     QXmlStreamReader reader(&dictionaryFile);
524     db.setDatabaseName(cachePathN);
525     if(!db.open()) {
526         qDebug() << "Database error" << db.lastError().text() << endl;
527         Q_EMIT updateCachingProgress(100, 0);
528         Q_EMIT notify(Notify::Warning, QString(tr("Cache database cannot be "
529                 "opened for %1 dictionary. Searching in XDXF file. "
530                 "You may want to recache.").arg(name())));
531         return false;
532     }
533
534     /*inicial sqlQuery*/
535     QCoreApplication::processEvents();
536     QSqlQuery cur(db);
537     cur.exec("PRAGMA synchronous = 0");
538     cur.exec("drop table dict");
539     QCoreApplication::processEvents();
540     cur.exec("create table dict(word text, normalized text ,translation text)");
541     int counter = 0;
542     cur.exec("BEGIN;");
543
544     QString readKey;
545     bool match = false;
546     QTime timer;
547     timer.start();
548     countWords();
549     int lastProg = -1;
550     _settings->setValue("strip_accents", "true");
551     counter=0;
552
553     /*add all words to db*/
554     while (!reader.atEnd() && !stopped) {
555
556         QCoreApplication::processEvents();
557         reader.readNext();
558         if(reader.tokenType() == QXmlStreamReader::StartElement) {
559             if(reader.name()=="k"){
560                 readKey = reader.readElementText();
561                 match = true;
562             }
563         }
564         if(match) {
565             QString temp("");
566             while(reader.name()!="ar" && !reader.atEnd()) {
567                 if(reader.name()!="" && reader.name()!="k") {
568                     if(reader.tokenType()==QXmlStreamReader::EndElement)
569                         temp+="</";
570                     if(reader.tokenType()==QXmlStreamReader::StartElement)
571                         temp+="<";
572                     temp+=reader.name().toString();
573                     if(reader.name().toString()=="c"
574                         && reader.tokenType()==QXmlStreamReader::StartElement) {
575                         temp= temp + " c=\""
576                                    + reader.attributes().value("c").toString()
577                                    + "\"";
578                     }
579                     temp+=">";
580                 }
581                 temp+= reader.text().toString().replace("<","&lt;").replace(">"
582                               ,"&gt;");
583                 reader.readNext();
584             }
585             if(temp.at(0)==QChar('\n'))
586                 temp.remove(0,1);
587             temp="<key>" + readKey + "</key>" + "<t>" + temp+ "</t>";
588             match=false;
589             cur.prepare("insert into dict values(?,?,?)");
590             cur.addBindValue(readKey.toLower());
591             cur.addBindValue(removeAccents(readKey).toLower());
592             cur.addBindValue(temp);
593             cur.exec();
594             counter++;
595             int prog = counter*100/_wordsCount;
596             if(prog % 2 == 0 && lastProg != prog) {
597                 Q_EMIT updateCachingProgress(prog,timer.restart());
598                 lastProg = prog;
599             }
600         }
601     }
602     cur.exec("END;");
603     cur.exec("select count(*) from dict");
604
605     /*checke errors (wrong number of added words)*/
606     countWords();
607     if(!cur.next() || countWords() != cur.value(0).toInt()) {
608         Q_EMIT updateCachingProgress(100, timer.restart());
609         Q_EMIT notify(Notify::Warning,
610                 QString(tr("Database caching error, please try again.")));
611         db.close();
612         _settings->setValue("cache_path", cachePathN);
613         if(stopped)
614             clean();
615         _settings->setValue("cache_path","");
616         return false;
617     }
618
619     _settings->setValue("cache_path", cachePathN);
620     _settings->setValue("cached", "true");
621
622     disconnect(&d, SIGNAL(cancelCaching()),
623             this, SLOT(stop()));
624     disconnect(this, SIGNAL(updateCachingProgress(int,int)),
625             &d, SLOT(updateCachingProgress(int,int)));
626     db.close();
627     return true;
628 }
629
630 void XdxfPlugin::clean() {
631     if(QFile::exists(_settings->value("cache_path"))) {
632         QFile(_settings->value("cache_path")).remove();
633         QSqlDatabase::removeDatabase(db_name);
634     }
635 }
636
637
638 Q_EXPORT_PLUGIN2(xdxf, XdxfPlugin)