9246901984ffa321bf14bd2d3fc34dd1e12cbfa2
[mdictionary] / src / include / AccentsNormalizer.h
1 /*******************************************************************************
2
3     This file is part of mDictionary.
4
5     mDictionary is free software: you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation, either version 3 of the License, or
8     (at your option) any later version.
9
10     mDictionary is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License
16     along with mDictionary.  If not, see <http://www.gnu.org/licenses/>.
17
18     Copyright 2010 Comarch S.A.
19
20 *******************************************************************************/
21
22 /*! /file AccentsNormalizer.h
23 \brief Common functionality for all classes that need to remove accents from strings
24 \author Bartosz Szatkowski <bulislaw@linux.com>
25 */
26
27 #ifndef ACCENTSNORMALIZER_H
28 #define ACCENTSNORMALIZER_H
29
30 #include <QMap>
31 #include <QString>
32 #include <QRegExp>
33
34 //! Common functionality for all classes that need to remove accents from strings
35 class AccentsNormalizer {
36   protected:
37     QMap<QChar, QRegExp> letters;
38     QRegExp noLetter;
39
40     //! removes accents from letters in a word searched for (e.g. ą -> a, ł -> l)
41     QString removeAccents(QString string) {
42        string = string.toLower();
43        QString normalized = string.normalized(QString::NormalizationForm_KD);
44        foreach(QChar let, letters.keys()) {
45            normalized.replace(letters[let], QString(let));
46        }
47        normalized.replace(noLetter, "");
48        return normalized;
49     }
50
51    //! initializes the list of pairs (letters, accents)
52     void initAccents() {
53         letters['a'] = QRegExp(QString::fromUtf8("[ÀàÁáÂÂâÃãÄäÅåæÆĀāĂ㥹ǠȀǡȁǢȂǣȃȦȧǺȺǻǼǍǽǎǞǟⱥ]"), Qt::CaseInsensitive);
54         letters['b'] = QRegExp(QString::fromUtf8("[ƀƁƂƃɃƄƅ]"), Qt::CaseInsensitive);
55         letters['c'] = QRegExp(QString::fromUtf8("[ÇçÈçŒĆćĈĉĊċČčƇƈȻȼ]"), Qt::CaseInsensitive);
56         letters['d'] = QRegExp(QString::fromUtf8("[ÐĐđĎďȡƉ]"), Qt::CaseInsensitive);
57         letters['e'] = QRegExp(QString::fromUtf8("[ÈéèÉÊêËëĒēĔĕĖėĘęĚěēȄȅȆɆȇɇȨȩ]"), Qt::CaseInsensitive);
58         letters['f'] = QRegExp(QString::fromUtf8("[ſƑƒꜰ]"), Qt::CaseInsensitive);
59         letters['g'] = QRegExp(QString::fromUtf8("[ĠġĢģĜĝĞğƓǤǴǥǵǦǧ]"), Qt::CaseInsensitive);
60         letters['h'] = QRegExp(QString::fromUtf8("[ħĤĥĦȞȟⱧⱨ]"), Qt::CaseInsensitive);
61         letters['i'] = QRegExp(QString::fromUtf8("[ÌìÍíÎîÏïİijĨĩĪīĬĭĮįǐƗȈȉȊȋǏꟾ]"), Qt::CaseInsensitive);
62         letters['j'] = QRegExp(QString::fromUtf8("[ĴĵǰȷɈɉⱼ]"), Qt::CaseInsensitive);
63         letters['k'] = QRegExp(QString::fromUtf8("[ĶķĸƘǨƙǩⱩⱪ]"), Qt::CaseInsensitive);
64         letters['l'] = QRegExp(QString::fromUtf8("[ŀŁłĹĺĻļĽľĿȴȽⱠⱡⱢꝈ]"), Qt::CaseInsensitive);
65         letters['m'] = QRegExp(QString::fromUtf8("[Ɱ]"), Qt::CaseInsensitive);
66         letters['n'] = QRegExp(QString::fromUtf8("[ÑñŃńŅņŇňʼnŊŋȵǸǹƝ]"), Qt::CaseInsensitive);
67         letters['o'] = QRegExp(QString::fromUtf8("[ÒòÓóÔÕõÖöØøŐőœŌōŎŏƠȰơǑȱǒǪȪǫȫǬȌȬǭȍȭǾȎȮǿȏȯⱺ]"), Qt::CaseInsensitive);
68         letters['p'] = QRegExp(QString::fromUtf8("[ƤƥⱣ]"), Qt::CaseInsensitive);
69         letters['q'] = QRegExp(QString::fromUtf8("[Ɋɋ]"), Qt::CaseInsensitive);
70         letters['r'] = QRegExp(QString::fromUtf8("[ŕŖŖŗŘřȐȑȒȓɌɍⱤ]"), Qt::CaseInsensitive);
71         letters['s'] = QRegExp(QString::fromUtf8("[ߊšŚśŜŝŞşȘșȿꜱ]"), Qt::CaseInsensitive);
72         letters['t'] = QRegExp(QString::fromUtf8("[ŢţŤťŦŧȶȚƫțƬƮȾⱦ]"), Qt::CaseInsensitive);
73         letters['u'] = QRegExp(QString::fromUtf8("[ÙùÚúÛûÜüŰűŲųŨũŪūŬŮůưǓǔȔɄǕȕǖȖǗȗǘǙǚǛǜƯ]"), Qt::CaseInsensitive);
74         letters['v'] = QRegExp(QString::fromUtf8("[Ʋⱱⱴⱽ]"), Qt::CaseInsensitive);
75         letters['w'] = QRegExp(QString::fromUtf8("[ŴŵⱲⱳ]"), Qt::CaseInsensitive);
76         letters['y'] = QRegExp(QString::fromUtf8("[ÿÝýŶŷŸȲƳȳƴɎɏ]"), Qt::CaseInsensitive);
77         letters['z'] = QRegExp(QString::fromUtf8("[ŹźŻżŽžɀȤƵȥƶⱫⱬ]"), Qt::CaseInsensitive);
78         noLetter = QRegExp("[^-a-z '\"]", Qt::CaseInsensitive);
79     }
80 };
81
82 #endif // ACCENTSNORMALIZER_H