vcs.maemo.org Git - dh-make-perl/blob - dev/arm/libio-compress-base-perl/libio-compress-base-perl-2.012/pod/FAQ.pod

   1
   2 =head1 NAME
   3
   4 IO::Compress::Base::FAQ -- Frequently Asked Questions about IO::Compress::Base
   5
   6 =head1 DESCRIPTION
   7
   8 Common questions answered.
   9
  10 =head2 Compatibility with Unix compress/uncompress.
  11
  12 This module is not compatible with Unix C<compress>.
  13
  14 If you have the C<uncompress> program available, you can use this to read
  15 compressed files
  16
  17     open F, "uncompress -c $filename |";
  18     while (<F>)
  19     {
  20         ...
  21
  22 Alternatively, if you have the C<gunzip> program available, you can use
  23 this to read compressed files
  24
  25     open F, "gunzip -c $filename |";
  26     while (<F>)
  27     {
  28         ...
  29
  30 and this to write compress files, if you have the C<compress> program
  31 available
  32
  33     open F, "| compress -c $filename ";
  34     print F "data";
  35     ...
  36     close F ;
  37
  38 =head2 Accessing .tar.Z files
  39
  40 See previous FAQ item.
  41
  42 If the C<Archive::Tar> module is installed and either the C<uncompress> or
  43 C<gunzip> programs are available, you can use one of these workarounds to
  44 read C<.tar.Z> files.
  45
  46 Firstly with C<uncompress>
  47
  48     use strict;
  49     use warnings;
  50     use Archive::Tar;
  51
  52     open F, "uncompress -c $filename |";
  53     my $tar = Archive::Tar->new(*F);
  54     ...
  55
  56 and this with C<gunzip>
  57
  58     use strict;
  59     use warnings;
  60     use Archive::Tar;
  61
  62     open F, "gunzip -c $filename |";
  63     my $tar = Archive::Tar->new(*F);
  64     ...
  65
  66 Similarly, if the C<compress> program is available, you can use this to
  67 write a C<.tar.Z> file
  68
  69     use strict;
  70     use warnings;
  71     use Archive::Tar;
  72     use IO::File;
  73
  74     my $fh = new IO::File "| compress -c >$filename";
  75     my $tar = Archive::Tar->new();
  76     ...
  77     $tar->write($fh);
  78     $fh->close ;
  79
  80 =head2 Accessing Zip Files
  81
  82 This module does not support reading/writing zip files.
  83
  84 Support for reading/writing zip files is included with the
  85 C<IO::Compress::Zip> and C<IO::Uncompress::Unzip> modules.
  86
  87 The primary focus of the C<IO::Compress::Zip> and C<IO::Uncompress::Unzip>
  88 modules is to provide an C<IO::File> compatible streaming read/write
  89 interface to zip files/buffers. They are not fully flegged archivers. If
  90 you are looking for an archiver check out the C<Archive::Zip> module. You
  91 can find it on CPAN at
  92
  93     http://www.cpan.org/modules/by-module/Archive/Archive-Zip-*.tar.gz
  94
  95 =head2 Compressed files and Net::FTP
  96
  97 The C<Net::FTP> module provides two low-level methods called C<stor> and
  98 C<retr> that both return filehandles. These filehandles can used with the
  99 C<IO::Compress/Uncompress> modules to compress or uncompress files read
 100 from or written to an FTP Server on the fly, without having to create a
 101 temporary file.
 102
 103 Firstly, here is code that uses C<retr> to uncompressed a file as it is
 104 read from the FTP Server.
 105
 106     use Net::FTP;
 107     use IO::Uncompress::Bunzip2 qw(:all);
 108
 109     my $ftp = new Net::FTP ...
 110
 111     my $retr_fh = $ftp->retr($compressed_filename);
 112     bunzip2 $retr_fh => $outFilename, AutoClose => 1
 113         or die "Cannot uncompress '$compressed_file': $Bunzip2Error\n";
 114
 115 and this to compress a file as it is written to the FTP Server
 116
 117     use Net::FTP;
 118     use IO::Compress::Bzip2 qw(:all);
 119
 120     my $stor_fh = $ftp->stor($filename);
 121     bzip2 "filename" => $stor_fh, AutoClose => 1
 122         or die "Cannot compress '$filename': $Bzip2Error\n";
 123
 124 =head2 How do I recompress using a different compression?
 125
 126 This is easier that you might expect if you realise that all the
 127 C<IO::Compress::*> objects are derived from C<IO::File> and that all the
 128 C<IO::Uncompress::*> modules can read from an C<IO::File> filehandle.
 129
 130 So, for example, say you have a file compressed with gzip that you want to
 131 recompress with bzip2. Here is all that is needed to carry out the
 132 recompression.
 133
 134     use IO::Uncompress::Gunzip ':all';
 135     use IO::Compress::Bzip2 ':all';
 136
 137     my $gzipFile = "somefile.gz";
 138     my $bzipFile = "somefile.bz2";
 139
 140     my $gunzip = new IO::Uncompress::Gunzip $gzipFile
 141         or die "Cannot gunzip $gzipFile: $GunzipError\n" ;
 142
 143     bzip2 $gunzip => $bzipFile
 144         or die "Cannot bzip2 to $bzipFile: $Bzip2Error\n" ;
 145
 146 Note, there is a limitation of this technique. Some compression file
 147 formats store extra information along with the compressed data payload. For
 148 example, gzip can optionally store the original filename and Zip stores a
 149 lot of information about the original file. If the original compressed file
 150 contains any of this extra information, it will not be transferred to the
 151 new compressed file usign the technique above.
 152
 153 =head2 Using C<InputLength> to uncompress data embedded in a larger file/buffer.
 154
 155 A fairly common use-case is where compressed data is embedded in a larger
 156 file/buffer and you want to read both.
 157
 158 As an example consider the structure of a zip file. This is a well-defined
 159 file format that mixes both compressed and uncompressed sections of data in
 160 a single file.
 161
 162 For the purposes of this discussion you can think of a zip file as sequence
 163 of compressed data streams, each of which is prefixed by an uncompressed
 164 local header. The local header contains information about the compressed
 165 data stream, including the name of the compressed file and, in particular,
 166 the length of the compressed data stream.
 167
 168 To illustrate how to use C<InputLength> here is a script that walks a zip
 169 file and prints out how many lines are in each compressed file (if you
 170 intend write code to walking through a zip file for real see
 171 L<IO::Uncompress::Unzip/"Walking through a zip file"> )
 172
 173     use strict;
 174     use warnings;
 175
 176     use IO::File;
 177     use IO::Uncompress::RawInflate qw(:all);
 178
 179     use constant ZIP_LOCAL_HDR_SIG  => 0x04034b50;
 180     use constant ZIP_LOCAL_HDR_LENGTH => 30;
 181
 182     my $file = $ARGV[0] ;
 183
 184     my $fh = new IO::File "<$file"
 185                 or die "Cannot open '$file': $!\n";
 186
 187     while (1)
 188     {
 189         my $sig;
 190         my $buffer;
 191
 192         my $x ;
 193         ($x = $fh->read($buffer, ZIP_LOCAL_HDR_LENGTH)) == ZIP_LOCAL_HDR_LENGTH
 194             or die "Truncated file: $!\n";
 195
 196         my $signature = unpack ("V", substr($buffer, 0, 4));
 197
 198         last unless $signature == ZIP_LOCAL_HDR_SIG;
 199
 200         # Read Local Header
 201         my $gpFlag             = unpack ("v", substr($buffer, 6, 2));
 202         my $compressedMethod   = unpack ("v", substr($buffer, 8, 2));
 203         my $compressedLength   = unpack ("V", substr($buffer, 18, 4));
 204         my $uncompressedLength = unpack ("V", substr($buffer, 22, 4));
 205         my $filename_length    = unpack ("v", substr($buffer, 26, 2));
 206         my $extra_length       = unpack ("v", substr($buffer, 28, 2));
 207
 208         my $filename ;
 209         $fh->read($filename, $filename_length) == $filename_length
 210             or die "Truncated file\n";
 211
 212         $fh->read($buffer, $extra_length) == $extra_length
 213             or die "Truncated file\n";
 214
 215         if ($compressedMethod != 8 && $compressedMethod != 0)
 216         {
 217             warn "Skipping file '$filename' - not deflated $compressedMethod\n";
 218             $fh->read($buffer, $compressedLength) == $compressedLength
 219                 or die "Truncated file\n";
 220             next;
 221         }
 222
 223         if ($compressedMethod == 0 && $gpFlag & 8 == 8)
 224         {
 225             die "Streamed Stored not supported for '$filename'\n";
 226         }
 227
 228         next if $compressedLength == 0;
 229
 230         # Done reading the Local Header
 231
 232         my $inf = new IO::Uncompress::RawInflate $fh,
 233                             Transparent => 1,
 234                             InputLength => $compressedLength
 235           or die "Cannot uncompress $file [$filename]: $RawInflateError\n"  ;
 236
 237         my $line_count = 0;
 238
 239         while (<$inf>)
 240         {
 241             ++ $line_count;
 242         }
 243
 244         print "$filename: $line_count\n";
 245     }
 246
 247 The majority of the code above is concerned with reading the zip local
 248 header data. The code that I want to focus on is at the bottom.
 249
 250     while (1) {
 251
 252         # read local zip header data
 253         # get $filename
 254         # get $compressedLength
 255
 256         my $inf = new IO::Uncompress::RawInflate $fh,
 257                             Transparent => 1,
 258                             InputLength => $compressedLength
 259           or die "Cannot uncompress $file [$filename]: $RawInflateError\n"  ;
 260
 261         my $line_count = 0;
 262
 263         while (<$inf>)
 264         {
 265             ++ $line_count;
 266         }
 267
 268         print "$filename: $line_count\n";
 269     }
 270
 271 The call to C<IO::Uncompress::RawInflate> creates a new filehandle C<$inf>
 272 that can be used to read from the parent filehandle C<$fh>, uncompressing
 273 it as it goes. The use of the C<InputLength> option will guarantee that
 274 I<at most> C<$compressedLength> bytes of compressed data will be read from
 275 the C<$fh> filehandle (The only exception is for an error case like a
 276 truncated file or a corrupt data stream).
 277
 278 This means that once RawInflate is finished C<$fh> will be left at the
 279 byte directly after the compressed data stream.
 280
 281 Now consider what the code looks like without C<InputLength>
 282
 283     while (1) {
 284
 285         # read local zip header data
 286         # get $filename
 287         # get $compressedLength
 288
 289         # read all the compressed data into $data
 290         read($fh, $data, $compressedLength);
 291
 292         my $inf = new IO::Uncompress::RawInflate \$data,
 293                             Transparent => 1,
 294           or die "Cannot uncompress $file [$filename]: $RawInflateError\n"  ;
 295
 296         my $line_count = 0;
 297
 298         while (<$inf>)
 299         {
 300             ++ $line_count;
 301         }
 302
 303         print "$filename: $line_count\n";
 304     }
 305
 306 The difference here is the addition of the temporary variable C<$data>.
 307 This is used to store a copy of the compressed data while it is being
 308 uncompressed.
 309
 310 If you know that C<$compressedLength> isn't that big then using temporary
 311 storage won't be a problem. But if C<$compressedLength> is very large or
 312 you are writing an application that other people will use, and so have no
 313 idea how big C<$compressedLength> will be, it could be an issue.
 314
 315 Using C<InputLength> avoids the use of temporary storage and means the
 316 application can cope with large compressed data streams.
 317
 318 One final point -- obviously C<InputLength> can only be used whenever you
 319 know the length of the compressed data beforehand, like here with a zip
 320 file.
 321
 322 =head1 SEE ALSO
 323
 324 L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress>
 325
 326 L<Compress::Zlib::FAQ|Compress::Zlib::FAQ>
 327
 328 L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
 329 L<Archive::Tar|Archive::Tar>,
 330 L<IO::Zlib|IO::Zlib>
 331
 332 =head1 AUTHOR
 333
 334 This module was written by Paul Marquess, F<pmqs@cpan.org>.
 335
 336 =head1 MODIFICATION HISTORY
 337
 338 See the Changes file.
 339
 340 =head1 COPYRIGHT AND LICENSE
 341
 342 Copyright (c) 2005-2008 Paul Marquess. All rights reserved.
 343
 344 This program is free software; you can redistribute it and/or
 345 modify it under the same terms as Perl itself.
 346