vcs.maemo.org Git - pkg-perl/blob - deb-src/libio-compress-zlib-perl/libio-compress-zlib-perl-2.012/pod/FAQ.pod

   1
   2 =head1 NAME
   3
   4 IO::Compress::Zlib::FAQ -- Frequently Asked Questions about IO::Compress::Zlib
   5
   6 =head1 DESCRIPTION
   7
   8 Common questions answered.
   9
  10 =head2 Compatibility with Unix compress/uncompress.
  11
  12 This module is not compatible with Unix C<compress>.
  13
  14 If you have the C<uncompress> program available, you can use this to read
  15 compressed files
  16
  17     open F, "uncompress -c $filename |";
  18     while (<F>)
  19     {
  20         ...
  21
  22 Alternatively, if you have the C<gunzip> program available, you can use
  23 this to read compressed files
  24
  25     open F, "gunzip -c $filename |";
  26     while (<F>)
  27     {
  28         ...
  29
  30 and this to write compress files, if you have the C<compress> program
  31 available
  32
  33     open F, "| compress -c $filename ";
  34     print F "data";
  35     ...
  36     close F ;
  37
  38 =head2 Accessing .tar.Z files
  39
  40 See previous FAQ item.
  41
  42 If the C<Archive::Tar> module is installed and either the C<uncompress> or
  43 C<gunzip> programs are available, you can use one of these workarounds to
  44 read C<.tar.Z> files.
  45
  46 Firstly with C<uncompress>
  47
  48     use strict;
  49     use warnings;
  50     use Archive::Tar;
  51
  52     open F, "uncompress -c $filename |";
  53     my $tar = Archive::Tar->new(*F);
  54     ...
  55
  56 and this with C<gunzip>
  57
  58     use strict;
  59     use warnings;
  60     use Archive::Tar;
  61
  62     open F, "gunzip -c $filename |";
  63     my $tar = Archive::Tar->new(*F);
  64     ...
  65
  66 Similarly, if the C<compress> program is available, you can use this to
  67 write a C<.tar.Z> file
  68
  69     use strict;
  70     use warnings;
  71     use Archive::Tar;
  72     use IO::File;
  73
  74     my $fh = new IO::File "| compress -c >$filename";
  75     my $tar = Archive::Tar->new();
  76     ...
  77     $tar->write($fh);
  78     $fh->close ;
  79
  80 =head2 Accessing Zip Files
  81
  82 This module provides support for reading/writing zip files using the
  83 C<IO::Compress::Zip> and C<IO::Uncompress::Unzip> modules.
  84
  85 The primary focus of the C<IO::Compress::Zip> and C<IO::Uncompress::Unzip>
  86 modules is to provide an C<IO::File> compatible streaming read/write
  87 interface to zip files/buffers. They are not fully flegged archivers. If
  88 you are looking for an archiver check out the C<Archive::Zip> module. You
  89 can find it on CPAN at
  90
  91     http://www.cpan.org/modules/by-module/Archive/Archive-Zip-*.tar.gz
  92
  93 =head2 Compressed files and Net::FTP
  94
  95 The C<Net::FTP> module provides two low-level methods called C<stor> and
  96 C<retr> that both return filehandles. These filehandles can used with the
  97 C<IO::Compress/Uncompress> modules to compress or uncompress files read
  98 from or written to an FTP Server on the fly, without having to create a
  99 temporary file.
 100
 101 Firstly, here is code that uses C<retr> to uncompressed a file as it is
 102 read from the FTP Server.
 103
 104     use Net::FTP;
 105     use IO::Uncompress::Gunzip qw(:all);
 106
 107     my $ftp = new Net::FTP ...
 108
 109     my $retr_fh = $ftp->retr($compressed_filename);
 110     gunzip $retr_fh => $outFilename, AutoClose => 1
 111         or die "Cannot uncompress '$compressed_file': $GunzipError\n";
 112
 113 and this to compress a file as it is written to the FTP Server
 114
 115     use Net::FTP;
 116     use IO::Compress::Gzip qw(:all);
 117
 118     my $stor_fh = $ftp->stor($filename);
 119     gzip "filename" => $stor_fh, AutoClose => 1
 120         or die "Cannot compress '$filename': $GzipError\n";
 121
 122 =head2 How do I recompress using a different compression?
 123
 124 This is easier that you might expect if you realise that all the
 125 C<IO::Compress::*> objects are derived from C<IO::File> and that all the
 126 C<IO::Uncompress::*> modules can read from an C<IO::File> filehandle.
 127
 128 So, for example, say you have a file compressed with gzip that you want to
 129 recompress with bzip2. Here is all that is needed to carry out the
 130 recompression.
 131
 132     use IO::Uncompress::Gunzip ':all';
 133     use IO::Compress::Bzip2 ':all';
 134
 135     my $gzipFile = "somefile.gz";
 136     my $bzipFile = "somefile.bz2";
 137
 138     my $gunzip = new IO::Uncompress::Gunzip $gzipFile
 139         or die "Cannot gunzip $gzipFile: $GunzipError\n" ;
 140
 141     bzip2 $gunzip => $bzipFile
 142         or die "Cannot bzip2 to $bzipFile: $Bzip2Error\n" ;
 143
 144 Note, there is a limitation of this technique. Some compression file
 145 formats store extra information along with the compressed data payload. For
 146 example, gzip can optionally store the original filename and Zip stores a
 147 lot of information about the original file. If the original compressed file
 148 contains any of this extra information, it will not be transferred to the
 149 new compressed file usign the technique above.
 150
 151 =head2 Apache::GZip Revisited
 152
 153 Below is a mod_perl Apache compression module, called C<Apache::GZip>,
 154 taken from
 155 F<http://perl.apache.org/docs/tutorials/tips/mod_perl_tricks/mod_perl_tricks.html#On_the_Fly_Compression>
 156
 157   package Apache::GZip;
 158   #File: Apache::GZip.pm
 159
 160   use strict vars;
 161   use Apache::Constants ':common';
 162   use Compress::Zlib;
 163   use IO::File;
 164   use constant GZIP_MAGIC => 0x1f8b;
 165   use constant OS_MAGIC => 0x03;
 166
 167   sub handler {
 168       my $r = shift;
 169       my ($fh,$gz);
 170       my $file = $r->filename;
 171       return DECLINED unless $fh=IO::File->new($file);
 172       $r->header_out('Content-Encoding'=>'gzip');
 173       $r->send_http_header;
 174       return OK if $r->header_only;
 175
 176       tie *STDOUT,'Apache::GZip',$r;
 177       print($_) while <$fh>;
 178       untie *STDOUT;
 179       return OK;
 180   }
 181
 182   sub TIEHANDLE {
 183       my($class,$r) = @_;
 184       # initialize a deflation stream
 185       my $d = deflateInit(-WindowBits=>-MAX_WBITS()) || return undef;
 186
 187       # gzip header -- don't ask how I found out
 188       $r->print(pack("nccVcc",GZIP_MAGIC,Z_DEFLATED,0,time(),0,OS_MAGIC));
 189
 190       return bless { r   => $r,
 191                      crc =>  crc32(undef),
 192                      d   => $d,
 193                      l   =>  0
 194                    },$class;
 195   }
 196
 197   sub PRINT {
 198       my $self = shift;
 199       foreach (@_) {
 200         # deflate the data
 201         my $data = $self->{d}->deflate($_);
 202         $self->{r}->print($data);
 203         # keep track of its length and crc
 204         $self->{l} += length($_);
 205         $self->{crc} = crc32($_,$self->{crc});
 206       }
 207   }
 208
 209   sub DESTROY {
 210      my $self = shift;
 211
 212      # flush the output buffers
 213      my $data = $self->{d}->flush;
 214      $self->{r}->print($data);
 215
 216      # print the CRC and the total length (uncompressed)
 217      $self->{r}->print(pack("LL",@{$self}{qw/crc l/}));
 218   }
 219
 220   1;
 221
 222 Here's the Apache configuration entry you'll need to make use of it.  Once
 223 set it will result in everything in the /compressed directory will be
 224 compressed automagically.
 225
 226   <Location /compressed>
 227      SetHandler  perl-script
 228      PerlHandler Apache::GZip
 229   </Location>
 230
 231 Although at first sight there seems to be quite a lot going on in
 232 C<Apache::GZip>, you could sum up what the code was doing as follows --
 233 read the contents of the file in C<< $r->filename >>, compress it and write
 234 the compressed data to standard output. That's all.
 235
 236 This code has to jump through a few hoops to achieve this because
 237
 238 =over
 239
 240 =item 1.
 241
 242 The gzip support in C<Compress::Zlib> version 1.x can only work with a real
 243 filesystem filehandle. The filehandles used by Apache modules are not
 244 associated with the filesystem.
 245
 246 =item 2.
 247
 248 That means all the gzip support has to be done by hand - in this case by
 249 creating a tied filehandle to deal with creating the gzip header and
 250 trailer.
 251
 252 =back
 253
 254 C<IO::Compress::Gzip> doesn't have that filehandle limitation (this was one
 255 of the reasons for writing it in the first place). So if
 256 C<IO::Compress::Gzip> is used instead of C<Compress::Zlib> the whole tied
 257 filehandle code can be removed. Here is the rewritten code.
 258
 259   package Apache::GZip;
 260
 261   use strict vars;
 262   use Apache::Constants ':common';
 263   use IO::Compress::Gzip;
 264   use IO::File;
 265
 266   sub handler {
 267       my $r = shift;
 268       my ($fh,$gz);
 269       my $file = $r->filename;
 270       return DECLINED unless $fh=IO::File->new($file);
 271       $r->header_out('Content-Encoding'=>'gzip');
 272       $r->send_http_header;
 273       return OK if $r->header_only;
 274
 275       my $gz = new IO::Compress::Gzip '-', Minimal => 1
 276           or return DECLINED ;
 277
 278       print $gz $_ while <$fh>;
 279
 280       return OK;
 281   }
 282
 283 or even more succinctly, like this, using a one-shot gzip
 284
 285   package Apache::GZip;
 286
 287   use strict vars;
 288   use Apache::Constants ':common';
 289   use IO::Compress::Gzip qw(gzip);
 290
 291   sub handler {
 292       my $r = shift;
 293       $r->header_out('Content-Encoding'=>'gzip');
 294       $r->send_http_header;
 295       return OK if $r->header_only;
 296
 297       gzip $r->filename => '-', Minimal => 1
 298         or return DECLINED ;
 299
 300       return OK;
 301   }
 302
 303   1;
 304
 305 The use of one-shot C<gzip> above just reads from C<< $r->filename >> and
 306 writes the compressed data to standard output.
 307
 308 Note the use of the C<Minimal> option in the code above. When using gzip
 309 for Content-Encoding you should I<always> use this option. In the example
 310 above it will prevent the filename being included in the gzip header and
 311 make the size of the gzip data stream a slight bit smaller.
 312
 313 =head2 Using C<InputLength> to uncompress data embedded in a larger file/buffer.
 314
 315 A fairly common use-case is where compressed data is embedded in a larger
 316 file/buffer and you want to read both.
 317
 318 As an example consider the structure of a zip file. This is a well-defined
 319 file format that mixes both compressed and uncompressed sections of data in
 320 a single file.
 321
 322 For the purposes of this discussion you can think of a zip file as sequence
 323 of compressed data streams, each of which is prefixed by an uncompressed
 324 local header. The local header contains information about the compressed
 325 data stream, including the name of the compressed file and, in particular,
 326 the length of the compressed data stream.
 327
 328 To illustrate how to use C<InputLength> here is a script that walks a zip
 329 file and prints out how many lines are in each compressed file (if you
 330 intend write code to walking through a zip file for real see
 331 L<IO::Uncompress::Unzip/"Walking through a zip file"> )
 332
 333     use strict;
 334     use warnings;
 335
 336     use IO::File;
 337     use IO::Uncompress::RawInflate qw(:all);
 338
 339     use constant ZIP_LOCAL_HDR_SIG  => 0x04034b50;
 340     use constant ZIP_LOCAL_HDR_LENGTH => 30;
 341
 342     my $file = $ARGV[0] ;
 343
 344     my $fh = new IO::File "<$file"
 345                 or die "Cannot open '$file': $!\n";
 346
 347     while (1)
 348     {
 349         my $sig;
 350         my $buffer;
 351
 352         my $x ;
 353         ($x = $fh->read($buffer, ZIP_LOCAL_HDR_LENGTH)) == ZIP_LOCAL_HDR_LENGTH
 354             or die "Truncated file: $!\n";
 355
 356         my $signature = unpack ("V", substr($buffer, 0, 4));
 357
 358         last unless $signature == ZIP_LOCAL_HDR_SIG;
 359
 360         # Read Local Header
 361         my $gpFlag             = unpack ("v", substr($buffer, 6, 2));
 362         my $compressedMethod   = unpack ("v", substr($buffer, 8, 2));
 363         my $compressedLength   = unpack ("V", substr($buffer, 18, 4));
 364         my $uncompressedLength = unpack ("V", substr($buffer, 22, 4));
 365         my $filename_length    = unpack ("v", substr($buffer, 26, 2));
 366         my $extra_length       = unpack ("v", substr($buffer, 28, 2));
 367
 368         my $filename ;
 369         $fh->read($filename, $filename_length) == $filename_length
 370             or die "Truncated file\n";
 371
 372         $fh->read($buffer, $extra_length) == $extra_length
 373             or die "Truncated file\n";
 374
 375         if ($compressedMethod != 8 && $compressedMethod != 0)
 376         {
 377             warn "Skipping file '$filename' - not deflated $compressedMethod\n";
 378             $fh->read($buffer, $compressedLength) == $compressedLength
 379                 or die "Truncated file\n";
 380             next;
 381         }
 382
 383         if ($compressedMethod == 0 && $gpFlag & 8 == 8)
 384         {
 385             die "Streamed Stored not supported for '$filename'\n";
 386         }
 387
 388         next if $compressedLength == 0;
 389
 390         # Done reading the Local Header
 391
 392         my $inf = new IO::Uncompress::RawInflate $fh,
 393                             Transparent => 1,
 394                             InputLength => $compressedLength
 395           or die "Cannot uncompress $file [$filename]: $RawInflateError\n"  ;
 396
 397         my $line_count = 0;
 398
 399         while (<$inf>)
 400         {
 401             ++ $line_count;
 402         }
 403
 404         print "$filename: $line_count\n";
 405     }
 406
 407 The majority of the code above is concerned with reading the zip local
 408 header data. The code that I want to focus on is at the bottom.
 409
 410     while (1) {
 411
 412         # read local zip header data
 413         # get $filename
 414         # get $compressedLength
 415
 416         my $inf = new IO::Uncompress::RawInflate $fh,
 417                             Transparent => 1,
 418                             InputLength => $compressedLength
 419           or die "Cannot uncompress $file [$filename]: $RawInflateError\n"  ;
 420
 421         my $line_count = 0;
 422
 423         while (<$inf>)
 424         {
 425             ++ $line_count;
 426         }
 427
 428         print "$filename: $line_count\n";
 429     }
 430
 431 The call to C<IO::Uncompress::RawInflate> creates a new filehandle C<$inf>
 432 that can be used to read from the parent filehandle C<$fh>, uncompressing
 433 it as it goes. The use of the C<InputLength> option will guarantee that
 434 I<at most> C<$compressedLength> bytes of compressed data will be read from
 435 the C<$fh> filehandle (The only exception is for an error case like a
 436 truncated file or a corrupt data stream).
 437
 438 This means that once RawInflate is finished C<$fh> will be left at the
 439 byte directly after the compressed data stream.
 440
 441 Now consider what the code looks like without C<InputLength>
 442
 443     while (1) {
 444
 445         # read local zip header data
 446         # get $filename
 447         # get $compressedLength
 448
 449         # read all the compressed data into $data
 450         read($fh, $data, $compressedLength);
 451
 452         my $inf = new IO::Uncompress::RawInflate \$data,
 453                             Transparent => 1,
 454           or die "Cannot uncompress $file [$filename]: $RawInflateError\n"  ;
 455
 456         my $line_count = 0;
 457
 458         while (<$inf>)
 459         {
 460             ++ $line_count;
 461         }
 462
 463         print "$filename: $line_count\n";
 464     }
 465
 466 The difference here is the addition of the temporary variable C<$data>.
 467 This is used to store a copy of the compressed data while it is being
 468 uncompressed.
 469
 470 If you know that C<$compressedLength> isn't that big then using temporary
 471 storage won't be a problem. But if C<$compressedLength> is very large or
 472 you are writing an application that other people will use, and so have no
 473 idea how big C<$compressedLength> will be, it could be an issue.
 474
 475 Using C<InputLength> avoids the use of temporary storage and means the
 476 application can cope with large compressed data streams.
 477
 478 One final point -- obviously C<InputLength> can only be used whenever you
 479 know the length of the compressed data beforehand, like here with a zip
 480 file.
 481
 482 =head1 SEE ALSO
 483
 484 L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress>
 485
 486 L<Compress::Zlib::FAQ|Compress::Zlib::FAQ>
 487
 488 L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
 489 L<Archive::Tar|Archive::Tar>,
 490 L<IO::Zlib|IO::Zlib>
 491
 492 =head1 AUTHOR
 493
 494 This module was written by Paul Marquess, F<pmqs@cpan.org>.
 495
 496 =head1 MODIFICATION HISTORY
 497
 498 See the Changes file.
 499
 500 =head1 COPYRIGHT AND LICENSE
 501
 502 Copyright (c) 2005-2008 Paul Marquess. All rights reserved.
 503
 504 This program is free software; you can redistribute it and/or
 505 modify it under the same terms as Perl itself.
 506