1 /*******************************************************************************
3 This file is part of mDictionary.
5 mDictionary is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 mDictionary is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with mDictionary. If not, see <http://www.gnu.org/licenses/>.
18 Copyright 2010 Comarch S.A.
20 *******************************************************************************/
22 /*! /file AccentsNormalizer.h
23 \brief Common functionality for all classes that need to remove accents from strings
25 \author Bartosz Szatkowski <bulislaw@linux.com>
28 #ifndef ACCENTSNORMALIZER_H
29 #define ACCENTSNORMALIZER_H
36 class AccentsNormalizer {
38 QMap<QChar, QRegExp> letters;
41 QString removeAccents(QString string) {
42 string = string.toLower();
43 QString normalized = string.normalized(QString::NormalizationForm_KD);
44 foreach(QChar let, letters.keys()) {
45 normalized.replace(letters[let], QString(let));
47 normalized.replace(noLetter, "");
52 letters['a'] = QRegExp(QString::fromUtf8("[ÀàÁáÂÂâÃãÄäÅåæÆĀāĂ㥹ǠȀǡȁǢȂǣȃȦȧǺȺǻǼǍǽǎǞǟⱥ]"), Qt::CaseInsensitive);
53 letters['b'] = QRegExp(QString::fromUtf8("[ƀƁƂƃɃƄƅ]"), Qt::CaseInsensitive);
54 letters['c'] = QRegExp(QString::fromUtf8("[ÇçÈçŒĆćĈĉĊċČčƇƈȻȼ]"), Qt::CaseInsensitive);
55 letters['d'] = QRegExp(QString::fromUtf8("[ÐĐđĎďȡƉ]"), Qt::CaseInsensitive);
56 letters['e'] = QRegExp(QString::fromUtf8("[ÈéèÉÊêËëĒēĔĕĖėĘęĚěēȄȅȆɆȇɇȨȩ]"), Qt::CaseInsensitive);
57 letters['f'] = QRegExp(QString::fromUtf8("[ſƑƒꜰ]"), Qt::CaseInsensitive);
58 letters['g'] = QRegExp(QString::fromUtf8("[ĠġĢģĜĝĞğƓǤǴǥǵǦǧ]"), Qt::CaseInsensitive);
59 letters['h'] = QRegExp(QString::fromUtf8("[ħĤĥĦȞȟⱧⱨ]"), Qt::CaseInsensitive);
60 letters['i'] = QRegExp(QString::fromUtf8("[ÌìÍíÎîÏïİijĨĩĪīĬĭĮįǐƗȈȉȊȋǏꟾ]"), Qt::CaseInsensitive);
61 letters['j'] = QRegExp(QString::fromUtf8("[ĴĵǰȷɈɉⱼ]"), Qt::CaseInsensitive);
62 letters['k'] = QRegExp(QString::fromUtf8("[ĶķĸƘǨƙǩⱩⱪ]"), Qt::CaseInsensitive);
63 letters['l'] = QRegExp(QString::fromUtf8("[ŀŁłĹĺĻļĽľĿȴȽⱠⱡⱢꝈ]"), Qt::CaseInsensitive);
64 letters['m'] = QRegExp(QString::fromUtf8("[Ɱ]"), Qt::CaseInsensitive);
65 letters['n'] = QRegExp(QString::fromUtf8("[ÑñŃńŅņŇňʼnŊŋȵǸǹƝ]"), Qt::CaseInsensitive);
66 letters['o'] = QRegExp(QString::fromUtf8("[ÒòÓóÔÕõÖöØøŐőœŌōŎŏƠȰơǑȱǒǪȪǫȫǬȌȬǭȍȭǾȎȮǿȏȯⱺ]"), Qt::CaseInsensitive);
67 letters['p'] = QRegExp(QString::fromUtf8("[ƤƥⱣ]"), Qt::CaseInsensitive);
68 letters['q'] = QRegExp(QString::fromUtf8("[Ɋɋ]"), Qt::CaseInsensitive);
69 letters['r'] = QRegExp(QString::fromUtf8("[ŕŖŖŗŘřȐȑȒȓɌɍⱤ]"), Qt::CaseInsensitive);
70 letters['s'] = QRegExp(QString::fromUtf8("[ߊšŚśŜŝŞşȘșȿꜱ]"), Qt::CaseInsensitive);
71 letters['t'] = QRegExp(QString::fromUtf8("[ŢţŤťŦŧȶȚƫțƬƮȾⱦ]"), Qt::CaseInsensitive);
72 letters['u'] = QRegExp(QString::fromUtf8("[ÙùÚúÛûÜüŰűŲųŨũŪūŬŮůưǓǔȔɄǕȕǖȖǗȗǘǙǚǛǜƯ]"), Qt::CaseInsensitive);
73 letters['v'] = QRegExp(QString::fromUtf8("[Ʋⱱⱴⱽ]"), Qt::CaseInsensitive);
74 letters['w'] = QRegExp(QString::fromUtf8("[ŴŵⱲⱳ]"), Qt::CaseInsensitive);
75 letters['y'] = QRegExp(QString::fromUtf8("[ÿÝýŶŷŸȲƳȳƴɎɏ]"), Qt::CaseInsensitive);
76 letters['z'] = QRegExp(QString::fromUtf8("[ŹźŻżŽžɀȤƵȥƶⱫⱬ]"), Qt::CaseInsensitive);
77 noLetter = QRegExp("[^-a-z '\"]", Qt::CaseInsensitive);
81 #endif // ACCENTSNORMALIZER_H