3 # Lintian HTML reporting tool -- Create Lintian web reports
5 # Copyright (C) 1998 Christian Schwarz and Richard Braakman
6 # Copyright (C) 2007 Russ Allbery
8 # This program is free software. It is distributed under the terms of
9 # the GNU General Public License as published by the Free Software
10 # Foundation; either version 2 of the License, or (at your option) any
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, you can find it on the World Wide
20 # Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
21 # Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
26 use File::Copy qw(copy);
28 use Text::Template ();
30 # ------------------------------
31 # Global variables and configuration
33 # Maximum number of identical tags per package to display. Any remaining tags
34 # will be compressed into a "... reported %d more times" line.
37 # These have no default and must be set in the configuration file.
38 # FIXME: $statistics_file should be in all caps as well.
39 our ($LINTIAN_ROOT, $LINTIAN_LAB, $LINTIAN_ARCHIVEDIR, $LINTIAN_DIST,
40 $LINTIAN_SECTION, $LINTIAN_ARCH, $HTML_TMP_DIR, $statistics_file,
43 # Read the configuration.
46 if (defined $LINTIAN_SECTION and not defined $LINTIAN_AREA) {
47 $LINTIAN_AREA = $LINTIAN_SECTION;
50 # The path to the mirror timestamp.
51 our $LINTIAN_TIMESTAMP
52 = "$LINTIAN_ARCHIVEDIR/project/trace/ftp-master.debian.org";
54 # FIXME: At least the lab should be a parameter to Read_pkglists rather
55 # than an environment variable.
56 $ENV{'LINTIAN_LAB'} = $LINTIAN_LAB;
57 $ENV{'LINTIAN_ROOT'} = $LINTIAN_ROOT;
59 # Import Lintian Perl libraries.
60 use lib "$ENV{LINTIAN_ROOT}/lib";
61 use Lintian::Tag::Info ();
66 # Global variables from Read_pkglists. Ugh.
67 # FIXME: Read_pkglists should return this information instead.
68 our (%binary_info, %source_info, %udeb_info, %bin_src_ref);
70 # Get additional tag information.
73 opendir(CHECKDIR, "$LINTIAN_ROOT/checks")
74 or fail("cannot read directory $LINTIAN_ROOT/checks");
76 for my $check (readdir CHECKDIR) {
77 next unless $check =~ /\.desc$/;
78 my @tags = read_dpkg_control("$LINTIAN_ROOT/checks/$check");
80 foreach my $tag (@tags) {
81 next unless $tag->{severity} and $tag->{certainty};
82 my $name = $tag->{tag};
83 $tag_extra{$name}{severity} = $tag->{severity};
84 $tag_extra{$name}{certainty} = $tag->{certainty};
90 # Set the Lintian version, current timestamp, and archive timestamp.
91 our $LINTIAN_VERSION = `$LINTIAN_ROOT/frontend/lintian --print-version`;
92 our $timestamp = `date -u --rfc-822`;
93 our $mirror_timestamp = slurp_entire_file($LINTIAN_TIMESTAMP);
94 chomp ($LINTIAN_VERSION, $timestamp);
95 $mirror_timestamp =~ s/\n.*//s;
98 # ------------------------------
99 # Initialize templates
101 # The path to our templates.
102 our $TEMPLATES = "$LINTIAN_ROOT/reporting/templates";
104 # This only has to be done once, so do it at the start and then reuse the same
105 # templates throughout.
107 for my $template (qw/head foot clean index maintainer maintainers packages tag
108 tags tags-severity/) {
109 my %options = (TYPE => 'FILE', SOURCE => "$TEMPLATES/$template.tmpl");
110 $templates{$template} = Text::Template->new (%options)
111 or die "cannot load template $template: $Text::Template::ERROR\n";
115 # ------------------------------
118 # Read the package lists.
120 # FIXME: get_bin_src_ref runs read_src_list unconditionally so we can't call
121 # it directly, which is confusing.
126 # Create output directories.
127 mkdir($HTML_TMP_DIR, 0777)
128 or die "cannot create output directory $HTML_TMP_DIR: $!\n";
129 mkdir("$HTML_TMP_DIR/full", 0777)
130 or die "cannot create output directory $HTML_TMP_DIR/full: $!\n";
131 mkdir("$HTML_TMP_DIR/maintainer", 0777)
132 or die "cannot create output directory $HTML_TMP_DIR/maintainer: $!\n";
133 mkdir("$HTML_TMP_DIR/tags", 0777)
134 or die "cannot create output directory $HTML_TMP_DIR/tags: $!\n";
135 symlink(".", "$HTML_TMP_DIR/reports")
136 or die "cannot create symlink $HTML_TMP_DIR/reports: $!\n";
137 symlink("$LINTIAN_ROOT/doc/lintian.html", "$HTML_TMP_DIR/manual")
138 or die "cannot create symlink $HTML_TMP_DIR/manual: $!\n";
140 symlink($ARGV[0], "$HTML_TMP_DIR/lintian.log")
141 or die "cannot create symlink $HTML_TMP_DIR/lintian.log: $!\n";
143 copy("$LINTIAN_ROOT/reporting/lintian.css", "$HTML_TMP_DIR/lintian.css")
144 or die "cannot copy lintian.css to $HTML_TMP_DIR: $!\n";
145 for my $image (qw/ico.png l.png logo-small.png/) {
146 copy("$LINTIAN_ROOT/reporting/images/$image", "$HTML_TMP_DIR/$image")
147 or die "cannot copy images/$image to $HTML_TMP_DIR: $!\n";
150 # This variable will accumulate statistics. For tags: errors, warnings,
151 # experimental, overridden, and info are the keys holding the count of tags of
152 # that sort. For packages: binary, udeb, and source are the number of
153 # packages of each type with Lintian errors or warnings. For maintainers:
154 # maintainers is the number of maintainers with Lintian errors or warnings.
157 # %by_maint holds a hash of maintainer names to packages and tags. Each
158 # maintainer is a key. The value is a hash of package names to hashes. Each
159 # package hash is in turn a hash of versions to an anonymous array of hashes,
160 # with each hash having keys code, package, type, tag, severity, certainty,
161 # extra, and xref. xref gets the partial URL of the maintainer page for that
164 # In other words, the lintian output line:
166 # W: gnubg source: substvar-source-version-is-deprecated gnubg-data
168 # for gnubg 0.15~20061120-1 maintained by Russ Allbery <rra@debian.org> is
169 # turned into the following structure:
172 # '0.15~20061120-1' => [
174 # package => 'gnubg',
176 # tag => 'substvar-source-version-is-deprecated',
177 # severity => 'normal',
178 # certainty => 'certain',
179 # extra => 'gnubg-data'
180 # xref => 'rra@debian.org.html#gnubg' } ] } }
182 # and then stored under the key 'Russ Allbery <rra@debian.org>'
184 # %by_uploader holds the same thing except for packages for which the person
185 # is only an uploader.
187 # %by_tag is a hash of tag names to an anonymous array of tag information
188 # hashes just like the inside-most data structure above.
189 my (%by_maint, %by_uploader, %by_tag);
191 # We take a lintian log file on either standard input or as the first
192 # argument. This log file contains all the tags lintian found, plus N: tags
193 # with informational messages. Ignore all the N: tags and load everything
194 # else into the hashes we use for all web page generation.
196 # We keep track of a hash from maintainer page URLs to maintainer values so
197 # that we don't have two maintainers who map to the same page and overwrite
198 # each other's pages. If we find two maintainers who map to the same URL,
199 # just assume that the second maintainer is the same as the first (but warn
201 my (%seen, %saw_maintainer);
204 next unless m/^([EWIXO]): (\S+)(?: (\S+))?: (\S+)(?:\s+(.*))?/;
205 my ($code, $package, $type, $tag, $extra) = ($1, $2, $3, $4, $5);
206 $type = 'binary' unless (defined $type);
207 next unless ($type eq 'source' || $type eq 'binary' || $type eq 'udeb');
218 unless ($seen{"$package $type"}) {
219 $statistics{"$type-packages"}++;
220 $seen{"$package $type"} = 1;
223 # Determine the source package for this package and warn if there appears
224 # to be no source package in the archive. Determine the maintainer and
225 # version. Work around a missing source package by pulling information
226 # from a binary package or udeb of the same name if there is any.
227 my ($source, $version, $source_version, $maintainer, $uploaders);
228 if ($type eq 'source') {
230 if (exists $source_info{$source}) {
231 $version = $source_info{$source}->{version};
232 $maintainer = $source_info{$source}->{maintainer};
233 $uploaders = $source_info{$source}->{uploaders};
235 warn "source package $package not listed!\n";
238 $source = $bin_src_ref{$package};
239 if ($source and exists $source_info{$source}) {
240 $maintainer = $source_info{$source}->{maintainer};
241 $uploaders = $source_info{$source}->{uploaders};
243 warn "source for package $package not found!\n";
245 if ($type eq 'binary') {
246 $maintainer = $binary_info{$package}->{maintainer};
247 } elsif ($type eq 'udeb') {
248 $maintainer = $udeb_info{$package}->{maintainer};
251 if ($type eq 'binary') {
252 $version = $binary_info{$package}->{version};
253 $source_version = $binary_info{$package}->{'source-version'};
254 } elsif ($type eq 'udeb') {
255 $version = $udeb_info{$package}->{version};
256 $source_version = $udeb_info{$package}->{'source-version'};
259 $maintainer ||= '(unknown)';
260 $version ||= 'unknown';
261 $source_version ||= $version;
263 # Check if we've seen the URL for this maintainer before and, if so, map
264 # them to the same person as the previous one.
265 $maintainer = map_maintainer ($maintainer);
266 $saw_maintainer{$maintainer} = 1;
268 # Update maintainer statistics.
269 $statistics{maintainers}++ unless defined $by_maint{$maintainer};
271 # Sanitize, just out of paranoia.
272 $source =~ s/[^a-zA-Z0-9.+-]/_/g;
273 $version =~ s/[^a-zA-Z0-9.+:~-]/_/g;
275 # Add the tag information to our hashes. Share the data between the
276 # hashes to save space (which means we can't later do destructive tricks
279 code => html_quote ($code),
280 package => html_quote ($package),
281 version => html_quote ($version),
282 type => html_quote ($type),
283 tag => html_quote ($tag),
284 severity => html_quote ($tag_extra{$tag}{severity}),
285 certainty => html_quote ($tag_extra{$tag}{certainty}),
286 extra => html_quote ($extra),
287 xref => maintainer_url ($maintainer) . "#$source"
289 $by_maint{$maintainer}{$source}{$source_version} ||= [];
290 push(@{ $by_maint{$maintainer}{$source}{$source_version} }, $info);
291 $by_tag{$tag} ||= [];
292 push(@{ $by_tag{$tag} }, $info);
294 # If the package had uploaders listed, also add the information to
295 # %by_uploaders (still sharing the data between hashes).
297 my @uploaders = split (/\s*,\s*/, $uploaders);
299 my $uploader = map_maintainer ($_);
300 next if $uploader eq $maintainer;
301 $saw_maintainer{$uploader} = 1;
302 $by_uploader{$uploader}{$source}{$source_version} ||= [];
303 push(@{ $by_uploader{$uploader}{$source}{$source_version} }, $info);
308 # Build a hash of all maintainers, not just those with Lintian tags. We use
309 # this later to generate stub pages for maintainers whose packages are all
312 for my $source (keys %source_info) {
313 my $maintainer = $source_info{$source}->{maintainer};
314 my $id = maintainer_url ($maintainer);
315 $clean{$id} = $maintainer;
318 # Now, walk through the tags by source package (sorted by maintainer). Output
319 # a summary page of errors and warnings for each maintainer, output a full
320 # page that includes info, experimental, and overriden tags, and assemble the
321 # maintainer index and the QA package list as we go.
322 my (%qa, %maintainers, %packages);
327 sort grep { !$seen{$_}++ } keys (%by_maint), keys (%by_uploader);
329 for my $maintainer (@maintainers) {
330 my $id = maintainer_url ($maintainer);
333 # For each of this maintainer's packages, add statistical information
334 # about warnings and errors to the QA list and build the packages hash
335 # used for the package index. We only do this for the maintainer
336 # packages, not the uploader packages, to avoid double-counting.
337 for my $source (keys %{ $by_maint{$maintainer} }) {
338 my ($errors, $warnings) = (0, 0);
339 for my $version (keys %{ $by_maint{$maintainer}{$source} }) {
340 my $tags = $by_maint{$maintainer}{$source}{$version};
341 for my $tag (@$tags) {
342 $errors++ if $tag->{code} eq 'E';
343 $warnings++ if $tag->{code} eq 'W';
344 $packages{$tag->{package}} = $tag->{xref};
347 $qa{$source} = [ $errors, $warnings ];
350 # Determine if the maintainer's page is clean. Check all packages for
351 # which they're either maintainer or uploader and set $error_clean if
352 # they have no errors or warnings.
354 for my $source (keys %{ $by_maint{$maintainer} },
355 keys %{ $by_uploader{$maintainer} }) {
356 my $versions = $by_maint{$maintainer}{$source}
357 || $by_uploader{$maintainer}{$source};
358 for my $version (keys %$versions) {
359 my $tags = $versions->{$version};
360 for my $tag (@$tags) {
361 $error_clean = 0 if ($tag->{code} eq 'E');
362 $error_clean = 0 if ($tag->{code} eq 'W');
367 # Determine the parts of the maintainer and the file name for the
369 my ($name, $email) = ($maintainer =~ /^(.*) <([^>]+)>/);
370 $name = 'Unknown Maintainer' unless $name;
371 $email = 'unknown' unless $email;
372 my $regular = "maintainer/$id";
373 my $full = "full/$id";
375 # Create the regular maintainer page (only errors and warnings) and the
376 # full maintainer page (all tags, including overrides and info tags).
377 print "Generating page for $id\n";
379 email => html_quote (uri_escape ($email)),
382 maintainer => html_quote ($maintainer),
383 name => html_quote ($name),
384 packages => $by_maint{$maintainer},
385 uploads => $by_uploader{$maintainer},
389 $template = $templates{clean};
391 $template = $templates{maintainer};
393 output_template ($regular, $template, \%data);
394 $template = $templates{maintainer};
396 output_template ($full, $template, \%data);
398 # Add this maintainer to the hash of maintainer to URL mappings.
399 $maintainers{$maintainer} = $id;
402 # Write out the maintainer index.
404 maintainers => \%maintainers,
406 output_template ('maintainers.html', $templates{maintainers}, \%data);
408 # Write out the QA package list.
409 open (QA, '>', "$HTML_TMP_DIR/qa-list.txt")
410 or die "cannot create qa-list.txt: $!\n";
411 for my $source (sort keys %qa) {
412 print QA "$source $qa{$source}[0] $qa{$source}[1]\n";
414 close QA or die "cannot write to qa-list: $!\n";
416 # Now, generate stub pages for every maintainer who has only clean packages.
417 for my $id (keys %clean) {
418 my $maintainer = $clean{$id};
419 my ($name, $email) = ($maintainer =~ /^(.*) <([^>]+)>/);
420 $email = 'unknown' unless $email;
422 email => html_quote (uri_escape ($email)),
423 maintainer => html_quote ($maintainer),
424 name => html_quote ($name),
426 print "Generating clean page for $id\n";
427 output_template ("maintainer/$id", $templates{clean}, \%data);
428 output_template ("full/$id", $templates{clean}, \%data);
431 # Create the pages for each tag. Each page shows the extended description for
432 # the tag and all the packages for which that tag was issued.
433 for my $tag (sort keys %by_tag) {
434 my $info = Lintian::Tag::Info->new($tag);
437 $description = $info->description('html', ' ');
439 $description = " <p>Can't find description of tag $tag.</p>";
442 foreach (@{$by_tag{$tag}}) {
443 if ($_->{code} ne 'O') {
449 description => $description,
450 tag => html_quote ($tag),
452 tags => $by_tag{$tag},
454 output_template ("tags/$tag.html", $templates{tag}, \%data);
457 # Create the general tag indices.
461 output_template ('tags.html', $templates{tags}, \%data);
462 output_template ('tags-severity.html', $templates{'tags-severity'}, \%data);
464 # Generate the package lists. These are huge, so we break them into four
467 # FIXME: Does anyone actually use these pages? They're basically unreadable.
469 $list{'0-9, A-F'} = [];
473 for my $package (sort keys %packages) {
474 my $first = uc substr($package, 0, 1);
475 if ($first le 'F') { push(@{ $list{'0-9, A-F'} }, $package) }
476 elsif ($first le 'L') { push(@{ $list{'G-L'} }, $package) }
477 elsif ($first le 'R') { push(@{ $list{'M-R'} }, $package) }
478 else { push(@{ $list{'S-Z'} }, $package) }
481 packages => \%packages,
484 for my $area (sort keys %list) {
486 $data{list} = $list{$area};
487 output_template ("packages_$i.html", $templates{packages}, \%data);
491 # Finally, we can start creating the index page. First, read in the old
492 # statistics file so that we can calculate deltas for all of our statistics.
494 if (-f $statistics_file) {
495 ($old_statistics) = read_dpkg_control($statistics_file);
498 my @attrs = qw(maintainers source-packages binary-packages udeb-packages
499 errors warnings info experimental overridden);
500 for my $attr (@attrs) {
501 my $old = $old_statistics->{$attr} || 0;
502 $statistics{$attr} ||= 0;
503 $delta{$attr} = sprintf("%d (%+d)", $statistics{$attr},
504 $statistics{$attr} - $old);
507 # Update the statistics file.
508 open (STATS, '>', $statistics_file)
509 or die "cannot open $statistics_file for writing: $!\n";
510 print STATS "last-updated: $timestamp\n";
511 print STATS "mirror-timestamp: $mirror_timestamp\n";
512 for my $attr (@attrs) {
513 print STATS "$attr: $statistics{$attr}\n";
515 print STATS "lintian-version: $LINTIAN_VERSION\n";
516 close STATS or die "cannot write to $statistics_file: $!\n";
518 # Create the main page.
520 architecture => $LINTIAN_ARCH,
522 dist => $LINTIAN_DIST,
523 mirror => $mirror_timestamp,
524 previous => $old_statistics->{'last-updated'},
525 area => $LINTIAN_AREA,
527 output_template ('index.html', $templates{index}, \%data);
530 # ------------------------------
533 # Determine the file name for the maintainer page given a maintainer. It
534 # should be <email>.html where <email> is their email address with all
535 # characters other than a-z A-Z 0-9 - _ . @ = + replaced with _. Don't change
536 # this without coordinating with QA.
538 my ($maintainer) = @_;
539 my ($email) = ($maintainer =~ /<([^>]+)>/);
540 my ($regular, $full);
543 $id =~ tr/a-zA-Z0-9_.@=+-/_/c;
546 return 'unsorted.html';
550 # Deduplicate maintainers. Maintains a cache of the maintainers we've seen
551 # with a given e-mail address, issues a warning if two maintainers have the
552 # same e-mail address, and returns the maintainer string that we should use
553 # (which is whatever maintainer we saw first with that e-mail).
555 my (%urlmap, %warned);
557 my ($maintainer) = @_;
558 my $url = maintainer_url ($maintainer);
559 if ($urlmap{$url} && $urlmap{$url} ne $maintainer) {
560 warn "$maintainer has the same page as $urlmap{$url}\n"
561 unless ($warned{$maintainer}
562 || lc ($maintainer) eq lc ($urlmap{$url})
563 || $maintainer =~ /\@lists\.(alioth\.)?debian\.org>/);
564 $warned{$maintainer}++;
565 $maintainer = $urlmap{$url};
567 $urlmap{$url} = $maintainer;
573 # Quote special characters for HTML output.
577 $text =~ s/&/\&/g;
578 $text =~ s/</\</g;
579 $text =~ s/>/\>/g;
583 # Given a file name, a template, and a data hash, fill out the template with
584 # that data hash and output the results to the file.
585 sub output_template {
586 my ($file, $template, $data) = @_;
587 $data->{version} ||= $LINTIAN_VERSION;
588 $data->{timestamp} ||= $timestamp;
589 $data->{head} ||= sub { $templates{head}->fill_in (HASH => { page_title => $_[0],
590 path_prefix => '../' x ($_[1]||0),
592 $data->{foot} ||= sub { $templates{foot}->fill_in (HASH => $data) };
593 open (OUTPUT, '>', "$HTML_TMP_DIR/$file")
594 or die "creating $HTML_TMP_DIR/$file falied: $!\n";
595 $template->fill_in (OUTPUT => \*OUTPUT, HASH => $data)
596 or die "filling out $file failed: $Text::Template::ERROR\n";
601 # indent-tabs-mode: nil
602 # cperl-indent-level: 4
604 # vim: syntax=perl sw=4 sts=4 ts=4 et shiftround