2 # Spelling -- check for common spelling errors
4 # Copyright (C) 1998 Richard Braakman
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, you can find it on the World Wide
18 # Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
19 # Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
27 our @ISA = qw(Exporter);
28 our @EXPORT = qw(spelling_check spelling_check_picky);
30 # All spelling errors that have been observed "in the wild" in package
31 # descriptions are added here, on the grounds that if they occurred once they
32 # are more likely to occur again.
34 # Misspellings of "compatibility", "separate", and "similar" are particularly
37 # Be careful with corrections that involve punctuation, since the check is a
38 # bit rough with punctuation. For example, I had to delete the correction of
39 # "builtin" to "built-in".
41 our %CORRECTIONS = qw(
43 accelleration acceleration
45 accomodate accommodate
48 additionaly additionally
54 alegorical allegorical
56 allpication application
63 appropiate appropriate
68 authentification authentication
69 automaticly automatically
74 availavility availability
86 charachters characters
92 comminucation communication
94 compability compatibility
95 compatability compatibility
97 compatibiliy compatibility
98 compatibilty compatibility
106 contraints constraints
108 convinient convenient
109 cryptocraphic cryptographic
115 definately definitely
116 dependancies dependencies
117 dependancy dependency
119 developement development
121 deveolpment development
123 dictionnary dictionary
125 disapeared disappeared
126 dissapears disappears
127 documentaion documentation
128 docuentation documentation
129 documantation documentation
132 ecspecially especially
138 encorporating incorporating
139 enlightnment enlightenment
141 enviroiment environment
142 environement environment
145 exprimental experimental
156 functionnality functionality
157 functonality functionality
158 functionaly functionally
159 futhermore furthermore
160 generiously generously
166 heirarchically hierarchically
171 implemantation implementation
173 incompatabilities incompatibilities
175 indendation indentation
176 independant independent
177 informatiom information
179 inofficial unofficial
180 integreated integrated
184 interchangable interchangeable
185 intermittant intermittent
198 maintainance maintenance
199 maintainence maintenance
202 manoeuvering maneuvering
203 mathimatic mathematic
204 mathimatics mathematics
205 mathimatical mathematical
208 monochromo monochrome
209 multidimensionnal multidimensional
210 navagating navigating
219 orientatied orientated
227 particularily particularly
228 persistant persistent
233 postgressql PostgreSQL
236 prefferably preferably
240 proccesors processors
242 processsing processing
243 processessing processing
245 programers programmers
249 pronnounced pronounced
250 prononciation pronunciation
258 regulamentations regulations
260 repectively respectively
261 replacments replacements
270 seperately separately
283 succesfully successfully
289 synchonized synchronized
290 syncronize synchronize
291 syncronizing synchronizing
292 syncronus synchronous
306 variantions variations
316 # The format above doesn't allow spaces.
317 $CORRECTIONS{'alot'} = 'a lot';
319 # Picky corrections, applied before lowercasing the word. These are only
320 # applied to things known to be entirely English text, such as package
321 # descriptions, and should not be applied to files that may contain
322 # configuration fragments or more informal files such as debian/copyright.
323 our %CORRECTIONS_CASE = qw(
348 meta-package metapackage
361 Postgresql PostgreSQL
362 postgresql PostgreSQL
365 SkoleLinux Skolelinux
366 skolelinux Skolelinux
386 # The format above doesn't allow spaces.
387 $CORRECTIONS_CASE{'Debian-Edu'} = 'Debian Edu';
388 $CORRECTIONS_CASE{'debian-edu'} = 'Debian Edu';
389 $CORRECTIONS_CASE{'TeXLive'} = 'TeX Live';
390 $CORRECTIONS_CASE{'TeX-Live'} = 'TeX Live';
391 $CORRECTIONS_CASE{'TeXlive'} = 'TeX Live';
392 $CORRECTIONS_CASE{'TeX-live'} = 'TeX Live';
393 $CORRECTIONS_CASE{'texlive'} = 'TeX Live';
394 $CORRECTIONS_CASE{'tex-live'} = 'TeX Live';
396 # -----------------------------------
399 my @args = grep { defined($_) } @_;
403 # Check spelling of $text and report the tag $tag if we find anything.
404 # $filename, if included, is given as the first argument to the tag. If it's
405 # not defined, it will be omitted.
407 my ($tag, $text, $filename) = @_;
410 for my $word (split(/\s+/, $text)) {
413 # Try deleting the non-alphabetic parts from the word. Treat
414 # apostrophes specially: only delete them if they occur at the
415 # beginning or end of the word.
417 # FIXME: Should do something that's aware of Unicode character
418 # classes rather than only handling ISO 8859-15 characters.
419 $word =~ s/(^\')|[^\w\xc0-\xd6\xd8-\xf6\xf8-\xff\'-]+|(\'\z)//g;
420 if (exists $CORRECTIONS{$word}) {
421 _tag($tag, $filename, $word, $CORRECTIONS{$word});
425 # Special case for correcting a multi-word string.
426 if ($text =~ m,Debian/GNU\s+Linux,) {
427 _tag($tag, $filename, "Debian/GNU Linux", "Debian GNU/Linux");
431 # Check spelling of $text against pickier corrections, such as common
432 # capitalization mistakes. This check is separate from spelling_check since
433 # it isn't appropriate for some files (such as changelog). Takes $text to
434 # check spelling in and $tag to report if we find anything. $filename, if
435 # included, is given as the first argument to the tag. If it's not defined,
436 # it will be omitted.
437 sub spelling_check_picky {
438 my ($tag, $text, $filename) = @_;
440 for my $word (split(/\s+/, $text)) {
441 $word =~ s/^\(|[).,?!:;]+$//g;
442 if (exists $CORRECTIONS_CASE{$word}) {
443 _tag($tag, $filename, $word, $CORRECTIONS_CASE{$word});
447 if ($text =~ m,meta\s+package,) {
448 _tag($tag, $filename, "meta package", "metapackage");
455 # indent-tabs-mode: nil
456 # cperl-indent-level: 4
458 # vim: syntax=perl sw=4 sts=4 ts=4 et shiftround