Added lots more modules from lintian. Maemian appears to work.
[maemian] / lib / Spelling.pm
1 # -*- perl -*-
2 # Spelling -- check for common spelling errors
3
4 # Copyright (C) 1998 Richard Braakman
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 # GNU General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program.  If not, you can find it on the World Wide
18 # Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
19 # Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
20 # MA 02110-1301, USA.
21
22 package Spelling;
23 use strict;
24 use Tags;
25
26 use Exporter;
27 our @ISA = qw(Exporter);
28 our @EXPORT = qw(spelling_check spelling_check_picky);
29
30 # All spelling errors that have been observed "in the wild" in package
31 # descriptions are added here, on the grounds that if they occurred once they
32 # are more likely to occur again.
33
34 # Misspellings of "compatibility", "separate", and "similar" are particularly
35 # common.
36
37 # Be careful with corrections that involve punctuation, since the check is a
38 # bit rough with punctuation.  For example, I had to delete the correction of
39 # "builtin" to "built-in".
40
41 our %CORRECTIONS = qw(
42                       abandonning abandoning
43                       abigious ambiguous
44                       abitrate arbitrate
45                       abov above
46                       absolut absolute
47                       accelleration acceleration
48                       accesing accessing
49                       accesnt accent
50                       accessable accessible
51                       accessable accessible
52                       accidentaly accidentally
53                       accidentually accidentally
54                       accomodate accommodate
55                       accomodate accommodate
56                       accomodates accommodates
57                       accout account
58                       acess access
59                       acording according
60                       acumulating accumulating
61                       addional additional
62                       additionaly additionally
63                       adress address
64                       adresses addresses
65                       adviced advised
66                       afecting affecting
67                       albumns albums
68                       alegorical allegorical
69                       algorith algorithm
70                       algorithmical algorithmically
71                       allpication application
72                       alows allows
73                       als also
74                       altough although
75                       ambigious ambiguous
76                       amoung among
77                       amout amount
78                       analysator analyzer
79                       ang and
80                       annoucement announcement
81                       appearence appearance
82                       appropiate appropriate
83                       appropriatly appropriately
84                       aquired acquired
85                       aquired acquired
86                       architechture architecture
87                       arguement argument
88                       arguements arguments
89                       aritmetic arithmetic
90                       arne't aren't
91                       arraival arrival
92                       artifical artificial
93                       artillary artillery
94                       assigment assignment
95                       assigments assignments
96                       assistent assistant
97                       asuming assuming
98                       atomatically automatically
99                       attemps attempts
100                       attruibutes attributes
101                       authentification authentication
102                       automaticly automatically
103                       automatize automate
104                       automatized automated
105                       automatizes automates
106                       auxilliary auxiliary
107                       avaiable available
108                       availabled available
109                       availablity availability
110                       availale available
111                       availavility availability
112                       availble available
113                       availble available
114                       availiable available
115                       avaliable available
116                       avaliable available
117                       backgroud background
118                       baloons balloons
119                       batery battery
120                       becomming becoming
121                       becuase because
122                       begining beginning
123                       calender calendar
124                       cancelation cancellation
125                       cancelation cancellation
126                       capabilites capabilities
127                       capatibilities capabilities
128                       cariage carriage
129                       challanges challenges
130                       changable changeable
131                       charachters characters
132                       charcter character
133                       choosen chosen
134                       colorfull colorful
135                       comand command
136                       comit commit
137                       commerical commercial
138                       comminucation communication
139                       commiting committing
140                       committ commit
141                       commoditiy commodity
142                       compability compatibility
143                       compatability compatibility
144                       compatable compatible
145                       compatibiliy compatibility
146                       compatibilty compatibility
147                       compleatly completely
148                       complient compliant
149                       compres compress
150                       compresion compression
151                       connectinos connections
152                       consistancy consistency
153                       containes contains
154                       containts contains
155                       contence contents
156                       continous continuous
157                       continueing continuing
158                       contraints constraints
159                       convertor converter
160                       convinient convenient
161                       corected corrected
162                       cryptocraphic cryptographic
163                       deamon daemon
164                       debain Debian
165                       debians Debian's
166                       decompres decompress
167                       definate definite
168                       definately definitely
169                       delemiter delimiter
170                       dependancies dependencies
171                       dependancy dependency
172                       dependant dependent
173                       detabase database
174                       developement development
175                       developement development
176                       developped developed
177                       deveolpment development
178                       devided divided
179                       dictionnary dictionary
180                       diplay display
181                       disapeared disappeared
182                       dispertion dispersion
183                       dissapears disappears
184                       docuentation documentation
185                       documantation documentation
186                       documentaion documentation
187                       dont don't
188                       easilly easily
189                       ecspecially especially
190                       edditable editable
191                       editting editing
192                       eletronic electronic
193                       enchanced enhanced
194                       encorporating incorporating
195                       endianness endianess
196                       enhaced enhanced
197                       enlightnment enlightenment
198                       enterily entirely
199                       enviroiment environment
200                       enviroment environment
201                       environement environment
202                       excecutable executable
203                       exceded exceeded
204                       excellant excellent
205                       exlcude exclude
206                       explicitely explicitly
207                       expresion expression
208                       exprimental experimental
209                       extention extension
210                       failuer failure
211                       familar familiar
212                       fatser faster
213                       fetaures features
214                       forse force
215                       fortan fortran
216                       forwardig forwarding
217                       framwork framework
218                       fuction function
219                       fuctions functions
220                       functionaly functionally
221                       functionnality functionality
222                       functonality functionality
223                       futhermore furthermore
224                       generiously generously
225                       grahical graphical
226                       grahpical graphical
227                       grapic graphic
228                       guage gauge
229                       halfs halves
230                       heirarchically hierarchically
231                       helpfull helpful
232                       hierachy hierarchy
233                       hierarchie hierarchy
234                       howver however
235                       implemantation implementation
236                       incomming incoming
237                       incompatabilities incompatibilities
238                       incompatable incompatible
239                       inconsistant inconsistent
240                       indendation indentation
241                       indended intended
242                       independant independent
243                       informatiom information
244                       infromation information
245                       initalize initialize
246                       initators initiators
247                       initializiation initialization
248                       inofficial unofficial
249                       integreated integrated
250                       integrety integrity
251                       integrey integrity
252                       intendet intended
253                       interchangable interchangeable
254                       intermittant intermittent
255                       interupted interrupted
256                       jave java
257                       langage language
258                       langauage language
259                       langugage language
260                       lauch launch
261                       lenght length
262                       lesstiff lesstif
263                       libaries libraries
264                       libary library
265                       libraris libraries
266                       licenceing licencing
267                       loggging logging
268                       loggin login
269                       logile logfile
270                       machinary machinery
271                       maintainance maintenance
272                       maintainence maintenance
273                       makeing making
274                       managable manageable
275                       manoeuvering maneuvering
276                       mathimatical mathematical
277                       mathimatic mathematic
278                       mathimatics mathematics
279                       ment meant
280                       messsages messages
281                       microprocesspr microprocessor
282                       milliseonds milliseconds
283                       miscelleneous miscellaneous
284                       misformed malformed
285                       mispelled misspelled
286                       mmnemonic mnemonic
287                       modulues modules
288                       monochorome monochrome
289                       monochromo monochrome
290                       monocrome monochrome
291                       mroe more
292                       multidimensionnal multidimensional
293                       navagating navigating
294                       nead need
295                       neccesary necessary
296                       neccessary necessary
297                       neccessary necessary
298                       necesary necessary
299                       negotation negotiation
300                       nescessary necessary
301                       nessessary necessary
302                       noticable noticeable
303                       notications notifications
304                       o'caml OCaml
305                       omitt omit
306                       ommitted omitted
307                       optionnal optional
308                       optmizations optimizations
309                       orientatied orientated
310                       orientied oriented
311                       overaall overall
312                       pacakge package
313                       pachage package
314                       packacge package
315                       packege package
316                       packge package
317                       pakage package
318                       paramameters parameters
319                       parameterize parametrize
320                       paramter parameter
321                       paramters parameters
322                       particularily particularly
323                       pased passed
324                       peprocessor preprocessor
325                       perfoming performing
326                       permissons permissions
327                       persistant persistent
328                       plattform platform
329                       ploting plotting
330                       posible possible
331                       postgressql PostgreSQL
332                       powerfull powerful
333                       preceeded preceded
334                       preceeding preceding
335                       precission precision
336                       prefered preferred
337                       prefferably preferably
338                       prepaired prepared
339                       primative primitive
340                       princliple principle
341                       priorty priority
342                       proccesors processors
343                       proces process
344                       processessing processing
345                       processpr processor
346                       processsing processing
347                       progams programs
348                       programers programmers
349                       programm program
350                       programms programs
351                       promps prompts
352                       pronnounced pronounced
353                       prononciation pronunciation
354                       pronouce pronounce
355                       pronunce pronounce
356                       propery property
357                       prosess process
358                       protable portable
359                       protcol protocol
360                       protecion protection
361                       protocoll protocol
362                       psychadelic psychedelic
363                       quering querying
364                       recieved received
365                       recieved received
366                       recieve receive
367                       recieve receive
368                       reciever receiver
369                       recognizeable recognizable
370                       recommanded recommended
371                       redircet redirect
372                       redirectrion redirection
373                       reenabled re-enabled
374                       reenable re-enable
375                       reencode re-encode
376                       refence reference
377                       registerd registered
378                       registraration registration
379                       regulamentations regulations
380                       remoote remote
381                       removeable removable
382                       repectively respectively
383                       replacments replacements
384                       requiere require
385                       requred required
386                       resizeable resizable
387                       ressize resize
388                       ressource resource
389                       retransmited retransmitted
390                       runnning running
391                       safly safely
392                       savable saveable
393                       searchs searches
394                       secund second
395                       separatly separately
396                       sepcify specify
397                       seperated separated
398                       seperated separated
399                       seperately separately
400                       seperate separate
401                       seperate separate
402                       seperatly separately
403                       seperator separator
404                       sequencial sequential
405                       serveral several
406                       setts sets
407                       similiar similar
408                       simliar similar
409                       speach speech
410                       speciefied specified
411                       specifed specified
412                       specificaton specification
413                       specifing specifying
414                       speficied specified
415                       speling spelling
416                       splitted split
417                       staically statically
418                       standart standard
419                       staticly statically
420                       subdirectoires subdirectories
421                       succesfully successfully
422                       succesful successful
423                       superceded superseded
424                       superflous superfluous
425                       superseeded superseded
426                       suplied supplied
427                       suport support
428                       suppored supported
429                       supportin supporting
430                       suppoted supported
431                       suppported supported
432                       suppport support
433                       suspicously suspiciously
434                       synax syntax
435                       synchonized synchronized
436                       syncronize synchronize
437                       syncronize synchronize
438                       syncronizing synchronizing
439                       syncronus synchronous
440                       syste system
441                       sythesis synthesis
442                       taht that
443                       throught through
444                       transfering transferring
445                       trasmission transmission
446                       treshold threshold
447                       trigerring triggering
448                       unexecpted unexpected
449                       unfortunatelly unfortunately
450                       unknonw unknown
451                       unuseful useless
452                       useable usable
453                       usefull useful
454                       usera users
455                       usetnet Usenet
456                       utilites utilities
457                       utillities utilities
458                       utilties utilities
459                       utiltity utility
460                       utitlty utility
461                       variantions variations
462                       varient variant
463                       verbse verbose
464                       verisons versions
465                       verison version
466                       verson version
467                       vicefersa vice-versa
468                       wheter whether
469                       wierd weird
470                       xwindows X
471                       yur your
472                      );
473
474 # The format above doesn't allow spaces.
475 $CORRECTIONS{'alot'} = 'a lot';
476
477 # Picky corrections, applied before lowercasing the word.  These are only
478 # applied to things known to be entirely English text, such as package
479 # descriptions, and should not be applied to files that may contain
480 # configuration fragments or more informal files such as debian/copyright.
481 our %CORRECTIONS_CASE = qw(
482                            apache Apache
483                            api API
484                            Api API
485                            D-BUS D-Bus
486                            d-bus D-Bus
487                            dbus D-Bus
488                            debian Debian
489                            english English
490                            french French
491                            EMacs Emacs
492                            Gconf GConf
493                            gconf GConf
494                            german German
495                            Gnome GNOME
496                            gnome GNOME
497                            Gnome-VFS GnomeVFS
498                            Gnome-Vfs GnomeVFS
499                            GnomeVfs GnomeVFS
500                            gnome-vfs GnomeVFS
501                            gnomevfs GnomeVFS
502                            gnu GNU
503                            Gnu GNU
504                            Gobject GObject
505                            gobject GObject
506                            Gstreamer GStreamer
507                            gstreamer GStreamer
508                            GTK GTK+
509                            gtk+ GTK+
510                            Http HTTP
511                            kde KDE
512                            meta-package metapackage
513                            MYSQL MySQL
514                            Mysql MySQL
515                            mysql MySQL
516                            linux Linux
517                            Latex LaTeX
518                            latex LaTeX
519                            OCAML OCaml
520                            Ocaml OCaml
521                            ocaml OCaml
522                            OpenLdap OpenLDAP
523                            Openldap OpenLDAP
524                            openldap OpenLDAP
525                            Postgresql PostgreSQL
526                            postgresql PostgreSQL
527                            python Python
528                            russian Russian
529                            SkoleLinux Skolelinux
530                            skolelinux Skolelinux
531                            SLang S-Lang
532                            S-lang S-Lang
533                            s-lang S-Lang
534                            spanish Spanish
535                            subversion Subversion
536                            TCL Tcl
537                            tcl Tcl
538                            TEX TeX
539                            Tex TeX
540                            TeTeX teTeX
541                            Tetex teTeX
542                            tetex teTeX
543                            TK Tk
544                            tk Tk
545                            Xemacs XEmacs
546                            XEMacs XEmacs
547                            XFCE Xfce
548                            XFce Xfce
549                            xfce Xfce
550                           );
551
552 # The format above doesn't allow spaces.
553 $CORRECTIONS_CASE{'Debian-Edu'} = 'Debian Edu';
554 $CORRECTIONS_CASE{'debian-edu'} = 'Debian Edu';
555 $CORRECTIONS_CASE{'TeXLive'} = 'TeX Live';
556 $CORRECTIONS_CASE{'TeX-Live'} = 'TeX Live';
557 $CORRECTIONS_CASE{'TeXlive'} = 'TeX Live';
558 $CORRECTIONS_CASE{'TeX-live'} = 'TeX Live';
559 $CORRECTIONS_CASE{'texlive'} = 'TeX Live';
560 $CORRECTIONS_CASE{'tex-live'} = 'TeX Live';
561
562 # -----------------------------------
563
564 sub _tag {
565     my @args = grep { defined($_) } @_;
566     tag(@args);
567 }
568
569 # Check spelling of $text and report the tag $tag if we find anything.
570 # $filename, if included, is given as the first argument to the tag.  If it's
571 # not defined, it will be omitted.
572 sub spelling_check {
573     my ($tag, $text, $filename) = @_;
574     return unless $text;
575
576     $text = lc $text;
577     $text =~ s/[.,;:?!()[\]]//g;
578
579     for my $word (split(/\s+/, $text)) {
580         if (exists $CORRECTIONS{$word}) {
581             _tag($tag, $filename, $word, $CORRECTIONS{$word});
582         }
583     }
584
585     # Special case for correcting a multi-word string.
586     if ($text =~ m,debian/gnu\s+linux,) {
587         _tag($tag, $filename, "Debian/GNU Linux", "Debian GNU/Linux");
588     }
589 }
590
591 # Check spelling of $text against pickier corrections, such as common
592 # capitalization mistakes.  This check is separate from spelling_check since
593 # it isn't appropriate for some files (such as changelog).  Takes $text to
594 # check spelling in and $tag to report if we find anything.  $filename, if
595 # included, is given as the first argument to the tag.  If it's not defined,
596 # it will be omitted.
597 sub spelling_check_picky {
598     my ($tag, $text, $filename) = @_;
599
600     # Check this first in case it's contained in square brackets and
601     # removed below.
602     if ($text =~ m,meta\s+package,) {
603         _tag($tag, $filename, "meta package", "metapackage");
604     }
605
606     # Exclude text enclosed in square brackets as it could be a package list
607     # or similar which may legitimately contain lower-cased versions of
608     # the words.
609     $text =~ s/\[.+?\]//sg;
610     for my $word (split(/\s+/, $text)) {
611         $word =~ s/^\(|[).,?!:;]+$//g;
612         if (exists $CORRECTIONS_CASE{$word}) {
613             _tag($tag, $filename, $word, $CORRECTIONS_CASE{$word});
614             next;
615         }
616     }
617 }
618
619 1;
620
621 # Local Variables:
622 # indent-tabs-mode: nil
623 # cperl-indent-level: 4
624 # End:
625 # vim: syntax=perl sw=4 sts=4 ts=4 et shiftround