2 # Spelling -- check for common spelling errors
4 # Copyright (C) 1998 Richard Braakman
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, you can find it on the World Wide
18 # Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
19 # Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
27 our @ISA = qw(Exporter);
28 our @EXPORT = qw(spelling_check spelling_check_picky);
30 # All spelling errors that have been observed "in the wild" in package
31 # descriptions are added here, on the grounds that if they occurred once they
32 # are more likely to occur again.
34 # Misspellings of "compatibility", "separate", and "similar" are particularly
37 # Be careful with corrections that involve punctuation, since the check is a
38 # bit rough with punctuation. For example, I had to delete the correction of
39 # "builtin" to "built-in".
41 our %CORRECTIONS = qw(
42 abandonning abandoning
47 accelleration acceleration
52 accidentaly accidentally
53 accidentually accidentally
54 accomodate accommodate
55 accomodate accommodate
56 accomodates accommodates
60 acumulating accumulating
62 additionaly additionally
68 alegorical allegorical
70 algorithmical algorithmically
71 allpication application
80 annoucement announcement
82 appropiate appropriate
83 appropriatly appropriately
86 architechture architecture
95 assigments assignments
98 atomatically automatically
100 attruibutes attributes
101 authentification authentication
102 automaticly automatically
104 automatized automated
105 automatizes automates
109 availablity availability
111 availavility availability
124 cancelation cancellation
125 cancelation cancellation
126 capabilites capabilities
127 capatibilities capabilities
129 challanges challenges
131 charachters characters
137 commerical commercial
138 comminucation communication
142 compability compatibility
143 compatability compatibility
144 compatable compatible
145 compatibiliy compatibility
146 compatibilty compatibility
147 compleatly completely
150 compresion compression
151 connectinos connections
152 consistancy consistency
157 continueing continuing
158 contraints constraints
160 convinient convenient
162 cryptocraphic cryptographic
168 definately definitely
170 dependancies dependencies
171 dependancy dependency
174 developement development
175 developement development
177 deveolpment development
179 dictionnary dictionary
181 disapeared disappeared
182 dispertion dispersion
183 dissapears disappears
184 docuentation documentation
185 documantation documentation
186 documentaion documentation
189 ecspecially especially
194 encorporating incorporating
197 enlightnment enlightenment
199 enviroiment environment
200 enviroment environment
201 environement environment
202 excecutable executable
206 explicitely explicitly
208 exprimental experimental
220 functionaly functionally
221 functionnality functionality
222 functonality functionality
223 futhermore furthermore
224 generiously generously
230 heirarchically hierarchically
235 implemantation implementation
237 incompatabilities incompatibilities
238 incompatable incompatible
239 inconsistant inconsistent
240 indendation indentation
242 independant independent
243 informatiom information
244 infromation information
247 initializiation initialization
248 inofficial unofficial
249 integreated integrated
253 interchangable interchangeable
254 intermittant intermittent
255 interupted interrupted
271 maintainance maintenance
272 maintainence maintenance
275 manoeuvering maneuvering
276 mathimatical mathematical
277 mathimatic mathematic
278 mathimatics mathematics
281 microprocesspr microprocessor
282 milliseonds milliseconds
283 miscelleneous miscellaneous
288 monochorome monochrome
289 monochromo monochrome
292 multidimensionnal multidimensional
293 navagating navigating
299 negotation negotiation
303 notications notifications
308 optmizations optimizations
309 orientatied orientated
318 paramameters parameters
319 parameterize parametrize
322 particularily particularly
324 peprocessor preprocessor
326 permissons permissions
327 persistant persistent
331 postgressql PostgreSQL
337 prefferably preferably
342 proccesors processors
344 processessing processing
346 processsing processing
348 programers programmers
352 pronnounced pronounced
353 prononciation pronunciation
362 psychadelic psychedelic
369 recognizeable recognizable
370 recommanded recommended
372 redirectrion redirection
378 registraration registration
379 regulamentations regulations
382 repectively respectively
383 replacments replacements
389 retransmited retransmitted
399 seperately separately
404 sequencial sequential
412 specificaton specification
420 subdirectoires subdirectories
421 succesfully successfully
423 superceded superseded
424 superflous superfluous
425 superseeded superseded
433 suspicously suspiciously
435 synchonized synchronized
436 syncronize synchronize
437 syncronize synchronize
438 syncronizing synchronizing
439 syncronus synchronous
444 transfering transferring
445 trasmission transmission
447 trigerring triggering
448 unexecpted unexpected
449 unfortunatelly unfortunately
461 variantions variations
474 # The format above doesn't allow spaces.
475 $CORRECTIONS{'alot'} = 'a lot';
477 # Picky corrections, applied before lowercasing the word. These are only
478 # applied to things known to be entirely English text, such as package
479 # descriptions, and should not be applied to files that may contain
480 # configuration fragments or more informal files such as debian/copyright.
481 our %CORRECTIONS_CASE = qw(
512 meta-package metapackage
525 Postgresql PostgreSQL
526 postgresql PostgreSQL
529 SkoleLinux Skolelinux
530 skolelinux Skolelinux
535 subversion Subversion
552 # The format above doesn't allow spaces.
553 $CORRECTIONS_CASE{'Debian-Edu'} = 'Debian Edu';
554 $CORRECTIONS_CASE{'debian-edu'} = 'Debian Edu';
555 $CORRECTIONS_CASE{'TeXLive'} = 'TeX Live';
556 $CORRECTIONS_CASE{'TeX-Live'} = 'TeX Live';
557 $CORRECTIONS_CASE{'TeXlive'} = 'TeX Live';
558 $CORRECTIONS_CASE{'TeX-live'} = 'TeX Live';
559 $CORRECTIONS_CASE{'texlive'} = 'TeX Live';
560 $CORRECTIONS_CASE{'tex-live'} = 'TeX Live';
562 # -----------------------------------
565 my @args = grep { defined($_) } @_;
569 # Check spelling of $text and report the tag $tag if we find anything.
570 # $filename, if included, is given as the first argument to the tag. If it's
571 # not defined, it will be omitted.
573 my ($tag, $text, $filename) = @_;
577 $text =~ s/[.,;:?!()[\]]//g;
579 for my $word (split(/\s+/, $text)) {
580 if (exists $CORRECTIONS{$word}) {
581 _tag($tag, $filename, $word, $CORRECTIONS{$word});
585 # Special case for correcting a multi-word string.
586 if ($text =~ m,debian/gnu\s+linux,) {
587 _tag($tag, $filename, "Debian/GNU Linux", "Debian GNU/Linux");
591 # Check spelling of $text against pickier corrections, such as common
592 # capitalization mistakes. This check is separate from spelling_check since
593 # it isn't appropriate for some files (such as changelog). Takes $text to
594 # check spelling in and $tag to report if we find anything. $filename, if
595 # included, is given as the first argument to the tag. If it's not defined,
596 # it will be omitted.
597 sub spelling_check_picky {
598 my ($tag, $text, $filename) = @_;
600 # Check this first in case it's contained in square brackets and
602 if ($text =~ m,meta\s+package,) {
603 _tag($tag, $filename, "meta package", "metapackage");
606 # Exclude text enclosed in square brackets as it could be a package list
607 # or similar which may legitimately contain lower-cased versions of
609 $text =~ s/\[.+?\]//sg;
610 for my $word (split(/\s+/, $text)) {
611 $word =~ s/^\(|[).,?!:;]+$//g;
612 if (exists $CORRECTIONS_CASE{$word}) {
613 _tag($tag, $filename, $word, $CORRECTIONS_CASE{$word});
622 # indent-tabs-mode: nil
623 # cperl-indent-level: 4
625 # vim: syntax=perl sw=4 sts=4 ts=4 et shiftround