Added some code to peer into a data structure in Maemian/Schedule.pm. Also added the
[maemian] / nokia-lintian / lib / Spelling.pm
1 # -*- perl -*-
2 # Spelling -- check for common spelling errors
3
4 # Copyright (C) 1998 Richard Braakman
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 # GNU General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program.  If not, you can find it on the World Wide
18 # Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
19 # Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
20 # MA 02110-1301, USA.
21
22 package Spelling;
23 use strict;
24 use Tags;
25
26 use Exporter;
27 our @ISA = qw(Exporter);
28 our @EXPORT = qw(spelling_check spelling_check_picky);
29
30 # All spelling errors that have been observed "in the wild" in package
31 # descriptions are added here, on the grounds that if they occurred once they
32 # are more likely to occur again.
33
34 # Misspellings of "compatibility", "separate", and "similar" are particularly
35 # common.
36
37 # Be careful with corrections that involve punctuation, since the check is a
38 # bit rough with punctuation.  For example, I had to delete the correction of
39 # "builtin" to "built-in".
40
41 our %CORRECTIONS = qw(
42                       accesnt accent
43                       accelleration acceleration
44                       accessable accessible
45                       accomodate accommodate
46                       acess access
47                       acording according
48                       additionaly additionally
49                       adress address
50                       adresses addresses
51                       adviced advised
52                       afecting affecting
53                       albumns albums
54                       alegorical allegorical
55                       algorith algorithm
56                       allpication application
57                       altough although
58                       alows allows
59                       amoung among
60                       amout amount
61                       analysator analyzer
62                       ang and
63                       appropiate appropriate
64                       arraival arrival
65                       artifical artificial
66                       artillary artillery
67                       attemps attempts
68                       authentification authentication
69                       automaticly automatically
70                       automatize automate
71                       automatized automated
72                       automatizes automates
73                       auxilliary auxiliary
74                       availavility availability
75                       availble available
76                       avaliable available
77                       availiable available
78                       backgroud background
79                       baloons balloons
80                       becomming becoming
81                       becuase because
82                       calender calendar
83                       cariage carriage
84                       challanges challenges
85                       changable changeable
86                       charachters characters
87                       charcter character
88                       choosen chosen
89                       colorfull colorful
90                       comand command
91                       commerical commercial
92                       comminucation communication
93                       commoditiy commodity
94                       compability compatibility
95                       compatability compatibility
96                       compatable compatible
97                       compatibiliy compatibility
98                       compatibilty compatibility
99                       compleatly completely
100                       complient compliant
101                       compres compress
102                       containes contains
103                       containts contains
104                       contence contents
105                       continous continuous
106                       contraints constraints
107                       convertor converter
108                       convinient convenient
109                       cryptocraphic cryptographic
110                       deamon daemon
111                       debain Debian
112                       debians Debian\'s
113                       decompres decompress
114                       definate definite
115                       definately definitely
116                       dependancies dependencies
117                       dependancy dependency
118                       dependant dependent
119                       developement development
120                       developped developed
121                       deveolpment development
122                       devided divided
123                       dictionnary dictionary
124                       diplay display
125                       disapeared disappeared
126                       dissapears disappears
127                       documentaion documentation
128                       docuentation documentation
129                       documantation documentation
130                       dont don\'t
131                       easilly easily
132                       ecspecially especially
133                       edditable editable
134                       editting editing
135                       eletronic electronic
136                       enchanced enhanced
137                       enhaced enhanced
138                       encorporating incorporating
139                       enlightnment enlightenment
140                       enterily entirely
141                       enviroiment environment
142                       environement environment
143                       excellant excellent
144                       exlcude exclude
145                       exprimental experimental
146                       extention extension
147                       failuer failure
148                       familar familiar
149                       fatser faster
150                       fetaures features
151                       forse force
152                       fortan fortran
153                       framwork framework
154                       fuction function
155                       fuctions functions
156                       functionnality functionality
157                       functonality functionality
158                       functionaly functionally
159                       futhermore furthermore
160                       generiously generously
161                       grahical graphical
162                       grahpical graphical
163                       grapic graphic
164                       guage gauge
165                       halfs halves
166                       heirarchically hierarchically
167                       helpfull helpful
168                       hierachy hierarchy
169                       hierarchie hierarchy
170                       howver however
171                       implemantation implementation
172                       incomming incoming
173                       incompatabilities incompatibilities
174                       indended intended
175                       indendation indentation
176                       independant independent
177                       informatiom information
178                       initalize initialize
179                       inofficial unofficial
180                       integreated integrated
181                       integrety integrity
182                       integrey integrity
183                       intendet intended
184                       interchangable interchangeable
185                       intermittant intermittent
186                       jave java
187                       langage language
188                       langauage language
189                       langugage language
190                       lauch launch
191                       lesstiff lesstif
192                       libaries libraries
193                       libary library
194                       licenceing licencing
195                       loggin login
196                       logile logfile
197                       loggging logging
198                       maintainance maintenance
199                       maintainence maintenance
200                       makeing making
201                       managable manageable
202                       manoeuvering maneuvering
203                       mathimatic mathematic
204                       mathimatics mathematics
205                       mathimatical mathematical
206                       ment meant
207                       modulues modules
208                       monochromo monochrome
209                       multidimensionnal multidimensional
210                       navagating navigating
211                       nead need
212                       neccesary necessary
213                       neccessary necessary
214                       necesary necessary
215                       nescessary necessary
216                       noticable noticeable
217                       o\'caml OCaml
218                       optionnal optional
219                       orientatied orientated
220                       orientied oriented
221                       pacakge package
222                       pachage package
223                       packacge package
224                       packege package
225                       packge package
226                       pakage package
227                       particularily particularly
228                       persistant persistent
229                       plattform platform
230                       ploting plotting
231                       protable portable
232                       posible possible
233                       postgressql PostgreSQL
234                       powerfull powerful
235                       prefered preferred
236                       prefferably preferably
237                       prepaired prepared
238                       princliple principle
239                       priorty priority
240                       proccesors processors
241                       proces process
242                       processsing processing
243                       processessing processing
244                       progams programs
245                       programers programmers
246                       programm program
247                       programms programs
248                       promps prompts
249                       pronnounced pronounced
250                       prononciation pronunciation
251                       pronouce pronounce
252                       protcol protocol
253                       protocoll protocol
254                       recieve receive
255                       recieved received
256                       redircet redirect
257                       refence reference
258                       regulamentations regulations
259                       remoote remote
260                       repectively respectively
261                       replacments replacements
262                       requiere require
263                       runnning running
264                       safly safely
265                       savable saveable
266                       searchs searches
267                       separatly separately
268                       seperate separate
269                       seperated separated
270                       seperately separately
271                       seperatly separately
272                       serveral several
273                       setts sets
274                       similiar similar
275                       simliar similar
276                       speach speech
277                       speling spelling
278                       splitted split
279                       standart standard
280                       staically statically
281                       staticly statically
282                       succesful successful
283                       succesfully successfully
284                       suplied supplied
285                       suport support
286                       suppport support
287                       supportin supporting
288                       synax syntax
289                       synchonized synchronized
290                       syncronize synchronize
291                       syncronizing synchronizing
292                       syncronus synchronous
293                       syste system
294                       sythesis synthesis
295                       taht that
296                       throught through
297                       useable usable
298                       usefull useful
299                       usera users
300                       usetnet Usenet
301                       utilites utilities
302                       utillities utilities
303                       utilties utilities
304                       utiltity utility
305                       utitlty utility
306                       variantions variations
307                       varient variant
308                       verson version
309                       vicefersa vice-versa
310                       yur your
311                       wheter whether
312                       wierd weird
313                       xwindows X
314                      );
315
316 # The format above doesn't allow spaces.
317 $CORRECTIONS{'alot'} = 'a lot';
318
319 # Picky corrections, applied before lowercasing the word.  These are only
320 # applied to things known to be entirely English text, such as package
321 # descriptions, and should not be applied to files that may contain
322 # configuration fragments or more informal files such as debian/copyright.
323 our %CORRECTIONS_CASE = qw(
324                            D-BUS D-Bus
325                            d-bus D-Bus
326                            dbus D-Bus
327                            debian Debian
328                            english English
329                            french French
330                            EMacs Emacs
331                            Gconf GConf
332                            gconf GConf
333                            german German
334                            Gnome GNOME
335                            gnome GNOME
336                            Gnome-VFS GnomeVFS
337                            Gnome-Vfs GnomeVFS
338                            GnomeVfs GnomeVFS
339                            gnome-vfs GnomeVFS
340                            gnomevfs GnomeVFS
341                            Gobject GObject
342                            gobject GObject
343                            Gstreamer GStreamer
344                            gstreamer GStreamer
345                            GTK GTK+
346                            gtk+ GTK+
347                            kde KDE
348                            meta-package metapackage
349                            MYSQL MySQL
350                            Mysql MySQL
351                            mysql MySQL
352                            linux Linux
353                            Latex LaTeX
354                            latex LaTeX
355                            OCAML OCaml
356                            Ocaml OCaml
357                            ocaml OCaml
358                            OpenLdap OpenLDAP
359                            Openldap OpenLDAP
360                            openldap OpenLDAP
361                            Postgresql PostgreSQL
362                            postgresql PostgreSQL
363                            python Python
364                            russian Russian
365                            SkoleLinux Skolelinux
366                            skolelinux Skolelinux
367                            SLang S-Lang
368                            S-lang S-Lang
369                            s-lang S-Lang
370                            TCL Tcl
371                            tcl Tcl
372                            TEX TeX
373                            Tex TeX
374                            TeTeX teTeX
375                            Tetex teTeX
376                            tetex teTeX
377                            TK Tk
378                            tk Tk
379                            Xemacs XEmacs
380                            XEMacs XEmacs
381                            XFCE Xfce
382                            XFce Xfce
383                            xfce Xfce
384                           );
385
386 # The format above doesn't allow spaces.
387 $CORRECTIONS_CASE{'Debian-Edu'} = 'Debian Edu';
388 $CORRECTIONS_CASE{'debian-edu'} = 'Debian Edu';
389 $CORRECTIONS_CASE{'TeXLive'} = 'TeX Live';
390 $CORRECTIONS_CASE{'TeX-Live'} = 'TeX Live';
391 $CORRECTIONS_CASE{'TeXlive'} = 'TeX Live';
392 $CORRECTIONS_CASE{'TeX-live'} = 'TeX Live';
393 $CORRECTIONS_CASE{'texlive'} = 'TeX Live';
394 $CORRECTIONS_CASE{'tex-live'} = 'TeX Live';
395
396 # -----------------------------------
397
398 sub _tag {
399     my @args = grep { defined($_) } @_;
400     tag(@args);
401 }
402
403 # Check spelling of $text and report the tag $tag if we find anything.
404 # $filename, if included, is given as the first argument to the tag.  If it's
405 # not defined, it will be omitted.
406 sub spelling_check {
407     my ($tag, $text, $filename) = @_;
408     return unless $text;
409
410     for my $word (split(/\s+/, $text)) {
411         $word = lc $word;
412
413         # Try deleting the non-alphabetic parts from the word.  Treat
414         # apostrophes specially: only delete them if they occur at the
415         # beginning or end of the word.
416         #
417         # FIXME: Should do something that's aware of Unicode character
418         # classes rather than only handling ISO 8859-15 characters.
419         $word =~ s/(^\')|[^\w\xc0-\xd6\xd8-\xf6\xf8-\xff\'-]+|(\'\z)//g;
420         if (exists $CORRECTIONS{$word}) {
421             _tag($tag, $filename, $word, $CORRECTIONS{$word});
422         }
423     }
424
425     # Special case for correcting a multi-word string.
426     if ($text =~ m,Debian/GNU\s+Linux,) {
427         _tag($tag, $filename, "Debian/GNU Linux", "Debian GNU/Linux");
428     }
429 }
430
431 # Check spelling of $text against pickier corrections, such as common
432 # capitalization mistakes.  This check is separate from spelling_check since
433 # it isn't appropriate for some files (such as changelog).  Takes $text to
434 # check spelling in and $tag to report if we find anything.  $filename, if
435 # included, is given as the first argument to the tag.  If it's not defined,
436 # it will be omitted.
437 sub spelling_check_picky {
438     my ($tag, $text, $filename) = @_;
439
440     for my $word (split(/\s+/, $text)) {
441         $word =~ s/^\(|[).,?!:;]+$//g;
442         if (exists $CORRECTIONS_CASE{$word}) {
443             _tag($tag, $filename, $word, $CORRECTIONS_CASE{$word});
444             next;
445         }
446     }
447     if ($text =~ m,meta\s+package,) {
448         _tag($tag, $filename, "meta package", "metapackage");
449     }
450 }
451
452 1;
453
454 # Local Variables:
455 # indent-tabs-mode: nil
456 # cperl-indent-level: 4
457 # End:
458 # vim: syntax=perl sw=4 sts=4 ts=4 et shiftround