1 package IO::Uncompress::Unzip;
11 use IO::Uncompress::RawInflate 2.012 ;
12 use IO::Compress::Base::Common 2.012 qw(:Status createSelfTiedObject);
13 use IO::Uncompress::Adapter::Inflate 2.012 ;
14 use IO::Uncompress::Adapter::Identity 2.012 ;
15 use IO::Compress::Zlib::Extra 2.012 ;
16 use IO::Compress::Zip::Constants 2.012 ;
18 use Compress::Raw::Zlib 2.012 qw(crc32) ;
22 eval { require IO::Uncompress::Adapter::Bunzip2 ;
23 import IO::Uncompress::Adapter::Bunzip2 } ;
29 our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $UnzipError, %headerLookup);
34 @ISA = qw(Exporter IO::Uncompress::RawInflate);
35 @EXPORT_OK = qw( $UnzipError unzip );
36 %EXPORT_TAGS = %IO::Uncompress::RawInflate::EXPORT_TAGS ;
37 push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
38 Exporter::export_ok_tags('all');
41 ZIP_CENTRAL_HDR_SIG, \&skipCentralDirectory,
42 ZIP_END_CENTRAL_HDR_SIG, \&skipEndCentralDirectory,
43 ZIP64_END_CENTRAL_REC_HDR_SIG, \&skipCentralDirectory64Rec,
44 ZIP64_END_CENTRAL_LOC_HDR_SIG, \&skipCentralDirectory64Loc,
45 ZIP64_ARCHIVE_EXTRA_SIG, \&skipArchiveExtra,
46 ZIP64_DIGITAL_SIGNATURE_SIG, \&skipDigitalSignature,
52 my $obj = createSelfTiedObject($class, \$UnzipError);
53 $obj->_create(undef, 0, @_);
58 my $obj = createSelfTiedObject(undef, \$UnzipError);
59 return $obj->_inf(@_) ;
64 use IO::Compress::Base::Common 2.012 qw(:Parse);
69 'Name' => [1, 1, Parse_any, undef],
71 # 'Streaming' => [1, 1, Parse_boolean, 1],
80 # unzip always needs crc32
81 $got->value('CRC32' => 1);
83 *$self->{UnzipData}{Name} = $got->value('Name');
93 my $magic = $self->ckMagic()
96 *$self->{Info} = $self->readHeader($magic)
108 $self->smartReadExact(\$magic, 4);
110 *$self->{HeaderPending} = $magic ;
112 return $self->HeaderError("Minimum header size is " .
114 if length $magic != 4 ;
116 return $self->HeaderError("Bad Magic")
117 if ! _isZipMagic($magic) ;
119 *$self->{Type} = 'zip';
131 my $name = *$self->{UnzipData}{Name} ;
132 my $hdr = $self->_readZipHeader($magic) ;
136 if (! defined $name || $hdr->{Name} eq $name)
143 if (*$self->{ZipData}{Streaming}) {
148 my $status = $self->smartRead(\$b, 1024 * 16);
154 $status = *$self->{Uncomp}->uncompr(\$b, \$temp_buf, 0, $out);
156 return $self->saveErrorString(undef, *$self->{Uncomp}{Error},
157 *$self->{Uncomp}{ErrorNo})
158 if $self->saveStatus($status) == STATUS_ERROR;
160 if ($status == STATUS_ENDSTREAM) {
161 *$self->{Uncomp}->reset();
162 $self->pushBack($b) ;
168 $self->smartReadExact(\$buffer, $hdr->{TrailerLength})
169 or return $self->saveErrorString(undef, "Truncated file");
172 my $c = $hdr->{CompressedLength}->get32bit();
173 $self->smartReadExact(\$buffer, $c)
174 or return $self->saveErrorString(undef, "Truncated file");
178 $self->chkTrailer($buffer) == STATUS_OK
179 or return $self->saveErrorString(undef, "Truncated file");
181 $hdr = $self->_readFullZipHeader();
183 return $self->saveErrorString(undef, "Cannot find '$name'")
184 if $self->smartEof();
195 my ($sig, $CRC32, $cSize, $uSize) ;
196 my ($cSizeHi, $uSizeHi) = (0, 0);
197 if (*$self->{ZipData}{Streaming}) {
198 $sig = unpack ("V", substr($trailer, 0, 4));
199 $CRC32 = unpack ("V", substr($trailer, 4, 4));
201 if (*$self->{ZipData}{Zip64} ) {
202 $cSize = U64::newUnpack_V64 substr($trailer, 8, 8);
203 $uSize = U64::newUnpack_V64 substr($trailer, 16, 8);
206 $cSize = U64::newUnpack_V32 substr($trailer, 8, 4);
207 $uSize = U64::newUnpack_V32 substr($trailer, 12, 4);
210 return $self->TrailerError("Data Descriptor signature, got $sig")
211 if $sig != ZIP_DATA_HDR_SIG;
214 ($CRC32, $cSize, $uSize) =
215 (*$self->{ZipData}{Crc32},
216 *$self->{ZipData}{CompressedLen},
217 *$self->{ZipData}{UnCompressedLen});
220 if (*$self->{Strict}) {
221 return $self->TrailerError("CRC mismatch")
222 if $CRC32 != *$self->{ZipData}{CRC32} ;
224 return $self->TrailerError("CSIZE mismatch.")
225 if ! $cSize->equal(*$self->{CompSize});
227 return $self->TrailerError("USIZE mismatch.")
228 if ! $uSize->equal(*$self->{UnCompSize});
231 my $reachedEnd = STATUS_ERROR ;
232 # check for central directory or end of central directory
236 my $got = $self->smartRead(\$magic, 4);
238 return $self->saveErrorString(STATUS_ERROR, "Truncated file")
239 if $got != 4 && *$self->{Strict};
245 return STATUS_ERROR ;
248 $self->pushBack($magic) ;
252 my $sig = unpack("V", $magic) ;
255 if ($hdr = $headerLookup{$sig})
257 if (&$hdr($self, $magic) != STATUS_OK ) {
258 if (*$self->{Strict}) {
259 return STATUS_ERROR ;
267 if ($sig == ZIP_END_CENTRAL_HDR_SIG)
273 elsif ($sig == ZIP_LOCAL_HDR_SIG)
275 $self->pushBack($magic) ;
281 $self->pushBack($magic) ;
289 sub skipCentralDirectory
295 $self->smartReadExact(\$buffer, 46 - 4)
296 or return $self->TrailerError("Minimum header size is " .
299 my $keep = $magic . $buffer ;
300 *$self->{HeaderPending} = $keep ;
302 #my $versionMadeBy = unpack ("v", substr($buffer, 4-4, 2));
303 #my $extractVersion = unpack ("v", substr($buffer, 6-4, 2));
304 #my $gpFlag = unpack ("v", substr($buffer, 8-4, 2));
305 #my $compressedMethod = unpack ("v", substr($buffer, 10-4, 2));
306 #my $lastModTime = unpack ("V", substr($buffer, 12-4, 4));
307 #my $crc32 = unpack ("V", substr($buffer, 16-4, 4));
308 my $compressedLength = unpack ("V", substr($buffer, 20-4, 4));
309 my $uncompressedLength = unpack ("V", substr($buffer, 24-4, 4));
310 my $filename_length = unpack ("v", substr($buffer, 28-4, 2));
311 my $extra_length = unpack ("v", substr($buffer, 30-4, 2));
312 my $comment_length = unpack ("v", substr($buffer, 32-4, 2));
313 #my $disk_start = unpack ("v", substr($buffer, 34-4, 2));
314 #my $int_file_attrib = unpack ("v", substr($buffer, 36-4, 2));
315 #my $ext_file_attrib = unpack ("V", substr($buffer, 38-4, 2));
316 #my $lcl_hdr_offset = unpack ("V", substr($buffer, 42-4, 2));
322 if ($filename_length)
324 $self->smartReadExact(\$filename, $filename_length)
325 or return $self->TruncatedTrailer("filename");
331 $self->smartReadExact(\$extraField, $extra_length)
332 or return $self->TruncatedTrailer("extra");
333 $keep .= $extraField ;
338 $self->smartReadExact(\$comment, $comment_length)
339 or return $self->TruncatedTrailer("comment");
352 $self->smartReadExact(\$buffer, 4)
353 or return $self->TrailerError("Minimum header size is " .
356 my $keep = $magic . $buffer ;
358 my $size = unpack ("V", $buffer);
360 $self->smartReadExact(\$buffer, $size)
361 or return $self->TrailerError("Minimum header size is " .
365 *$self->{HeaderPending} = $keep ;
371 sub skipCentralDirectory64Rec
377 $self->smartReadExact(\$buffer, 8)
378 or return $self->TrailerError("Minimum header size is " .
381 my $keep = $magic . $buffer ;
383 my ($sizeLo, $sizeHi) = unpack ("V V", $buffer);
385 # TODO - take SizeHi into account
386 $self->smartReadExact(\$buffer, $sizeLo)
387 or return $self->TrailerError("Minimum header size is " .
388 $sizeLo . " bytes") ;
391 *$self->{HeaderPending} = $keep ;
393 #my $versionMadeBy = unpack ("v", substr($buffer, 0, 2));
394 #my $extractVersion = unpack ("v", substr($buffer, 2, 2));
395 #my $diskNumber = unpack ("V", substr($buffer, 4, 4));
396 #my $cntrlDirDiskNo = unpack ("V", substr($buffer, 8, 4));
397 #my $entriesInThisCD = unpack ("V V", substr($buffer, 12, 8));
398 #my $entriesInCD = unpack ("V V", substr($buffer, 20, 8));
399 #my $sizeOfCD = unpack ("V V", substr($buffer, 28, 8));
400 #my $offsetToCD = unpack ("V V", substr($buffer, 36, 8));
405 sub skipCentralDirectory64Loc
411 $self->smartReadExact(\$buffer, 20 - 4)
412 or return $self->TrailerError("Minimum header size is " .
415 my $keep = $magic . $buffer ;
416 *$self->{HeaderPending} = $keep ;
418 #my $startCdDisk = unpack ("V", substr($buffer, 4-4, 4));
419 #my $offsetToCD = unpack ("V V", substr($buffer, 8-4, 8));
420 #my $diskCount = unpack ("V", substr($buffer, 16-4, 4));
425 sub skipEndCentralDirectory
431 $self->smartReadExact(\$buffer, 22 - 4)
432 or return $self->TrailerError("Minimum header size is " .
435 my $keep = $magic . $buffer ;
436 *$self->{HeaderPending} = $keep ;
438 #my $diskNumber = unpack ("v", substr($buffer, 4-4, 2));
439 #my $cntrlDirDiskNo = unpack ("v", substr($buffer, 6-4, 2));
440 #my $entriesInThisCD = unpack ("v", substr($buffer, 8-4, 2));
441 #my $entriesInCD = unpack ("v", substr($buffer, 10-4, 2));
442 #my $sizeOfCD = unpack ("V", substr($buffer, 12-4, 2));
443 #my $offsetToCD = unpack ("V", substr($buffer, 16-4, 2));
444 my $comment_length = unpack ("v", substr($buffer, 20-4, 2));
450 $self->smartReadExact(\$comment, $comment_length)
451 or return $self->TruncatedTrailer("comment");
462 return 0 if length $buffer < 4 ;
463 my $sig = unpack("V", $buffer) ;
464 return $sig == ZIP_LOCAL_HDR_SIG ;
468 sub _readFullZipHeader($)
473 $self->smartReadExact(\$magic, 4);
475 *$self->{HeaderPending} = $magic ;
477 return $self->HeaderError("Minimum header size is " .
479 if length $magic != 4 ;
482 return $self->HeaderError("Bad Magic")
483 if ! _isZipMagic($magic) ;
485 my $status = $self->_readZipHeader($magic);
486 delete *$self->{Transparent} if ! defined $status ;
490 sub _readZipHeader($)
492 my ($self, $magic) = @_ ;
496 $self->smartReadExact(\$buffer, 30 - 4)
497 or return $self->HeaderError("Minimum header size is " .
500 my $keep = $magic . $buffer ;
501 *$self->{HeaderPending} = $keep ;
503 my $extractVersion = unpack ("v", substr($buffer, 4-4, 2));
504 my $gpFlag = unpack ("v", substr($buffer, 6-4, 2));
505 my $compressedMethod = unpack ("v", substr($buffer, 8-4, 2));
506 my $lastModTime = unpack ("V", substr($buffer, 10-4, 4));
507 my $crc32 = unpack ("V", substr($buffer, 14-4, 4));
508 my $compressedLength = new U64 unpack ("V", substr($buffer, 18-4, 4));
509 my $uncompressedLength = new U64 unpack ("V", substr($buffer, 22-4, 4));
510 my $filename_length = unpack ("v", substr($buffer, 26-4, 2));
511 my $extra_length = unpack ("v", substr($buffer, 28-4, 2));
516 my $streamingMode = ($gpFlag & ZIP_GP_FLAG_STREAMING_MASK) ? 1 : 0 ;
518 return $self->HeaderError("Streamed Stored content not supported")
519 if $streamingMode && $compressedMethod == 0 ;
521 return $self->HeaderError("Encrypted content not supported")
522 if $gpFlag & (ZIP_GP_FLAG_ENCRYPTED_MASK|ZIP_GP_FLAG_STRONG_ENCRYPTED_MASK);
524 return $self->HeaderError("Patch content not supported")
525 if $gpFlag & ZIP_GP_FLAG_PATCHED_MASK;
527 *$self->{ZipData}{Streaming} = $streamingMode;
530 if ($filename_length)
532 $self->smartReadExact(\$filename, $filename_length)
533 or return $self->TruncatedHeader("Filename");
541 $self->smartReadExact(\$extraField, $extra_length)
542 or return $self->TruncatedHeader("Extra Field");
544 my $bad = IO::Compress::Zlib::Extra::parseRawExtra($extraField,
546 return $self->HeaderError($bad)
549 $keep .= $extraField ;
554 $Extra{$_->[0]} = \$_->[1];
557 if (defined $Extra{ZIP_EXTRA_ID_ZIP64()})
561 my $buff = ${ $Extra{ZIP_EXTRA_ID_ZIP64()} };
563 # TODO - This code assumes that all the fields in the Zip64
564 # extra field aren't necessarily present. The spec says that
565 # they only exist if the equivalent local headers are -1.
566 # Need to check that info-zip fills out -1 in the local header
569 if (! $streamingMode) {
572 $uncompressedLength = U64::newUnpack_V64 substr($buff, 0, 8)
573 if $uncompressedLength == 0xFFFF ;
577 $compressedLength = U64::newUnpack_V64 substr($buff, $offset, 8)
578 if $compressedLength == 0xFFFF ;
582 #my $cheaderOffset = U64::newUnpack_V64 substr($buff, 16, 8);
583 #my $diskNumber = unpack ("V", substr($buff, 24, 4));
588 *$self->{ZipData}{Zip64} = $zip64;
590 if (! $streamingMode) {
591 *$self->{ZipData}{Streaming} = 0;
592 *$self->{ZipData}{Crc32} = $crc32;
593 *$self->{ZipData}{CompressedLen} = $compressedLength;
594 *$self->{ZipData}{UnCompressedLen} = $uncompressedLength;
595 *$self->{CompressedInputLengthRemaining} =
596 *$self->{CompressedInputLength} = $compressedLength->get32bit();
599 *$self->{ZipData}{Method} = $compressedMethod;
600 if ($compressedMethod == ZIP_CM_DEFLATE)
602 *$self->{Type} = 'zip-deflate';
603 my $obj = IO::Uncompress::Adapter::Inflate::mkUncompObject(1,0,0);
605 *$self->{Uncomp} = $obj;
606 *$self->{ZipData}{CRC32} = crc32(undef);
608 elsif ($compressedMethod == ZIP_CM_BZIP2)
610 return $self->HeaderError("Unsupported Compression format $compressedMethod")
611 if ! defined $IO::Uncompress::Adapter::Bunzip2::VERSION ;
613 *$self->{Type} = 'zip-bzip2';
615 my $obj = IO::Uncompress::Adapter::Bunzip2::mkUncompObject();
617 *$self->{Uncomp} = $obj;
618 *$self->{ZipData}{CRC32} = crc32(undef);
620 elsif ($compressedMethod == ZIP_CM_STORE)
622 # TODO -- add support for reading uncompressed
624 *$self->{Type} = 'zip-stored';
626 my $obj = IO::Uncompress::Adapter::Identity::mkUncompObject();
628 *$self->{Uncomp} = $obj;
632 return $self->HeaderError("Unsupported Compression format $compressedMethod");
637 'FingerprintLength' => 4,
638 #'HeaderLength' => $compressedMethod == 8 ? length $keep : 0,
639 'HeaderLength' => length $keep,
641 'TrailerLength' => ! $streamingMode ? 0 : $zip64 ? 24 : 16,
643 'CompressedLength' => $compressedLength ,
644 'UncompressedLength' => $uncompressedLength ,
647 'Time' => _dosToUnixTime($lastModTime),
648 'Stream' => $streamingMode,
650 'MethodID' => $compressedMethod,
651 'MethodName' => $compressedMethod == ZIP_CM_DEFLATE
653 : $compressedMethod == ZIP_CM_BZIP2
655 : $compressedMethod == ZIP_CM_STORE
659 # 'TextFlag' => $flag & GZIP_FLG_FTEXT ? 1 : 0,
660 # 'HeaderCRCFlag' => $flag & GZIP_FLG_FHCRC ? 1 : 0,
661 # 'NameFlag' => $flag & GZIP_FLG_FNAME ? 1 : 0,
662 # 'CommentFlag' => $flag & GZIP_FLG_FCOMMENT ? 1 : 0,
663 # 'ExtraFlag' => $flag & GZIP_FLG_FEXTRA ? 1 : 0,
664 # 'Comment' => $comment,
666 # 'OsName' => defined $GZIP_OS_Names{$os}
667 # ? $GZIP_OS_Names{$os} : "Unknown",
668 # 'HeaderCRC' => $HeaderCRC,
670 # 'ExtraFlags' => $xfl,
671 'ExtraFieldRaw' => $extraField,
672 'ExtraField' => [ @EXTRA ],
678 sub filterUncompressed
682 if (*$self->{ZipData}{Method} == 12) {
683 *$self->{ZipData}{CRC32} = crc32(${$_[0]}, *$self->{ZipData}{CRC32});
686 *$self->{ZipData}{CRC32} = *$self->{Uncomp}->crc32() ;
694 #use Time::Local 'timelocal_nocheck';
695 use Time::Local 'timelocal';
699 my $year = ( ( $dt >> 25 ) & 0x7f ) + 80;
700 my $mon = ( ( $dt >> 21 ) & 0x0f ) - 1;
701 my $mday = ( ( $dt >> 16 ) & 0x1f );
703 my $hour = ( ( $dt >> 11 ) & 0x1f );
704 my $min = ( ( $dt >> 5 ) & 0x3f );
705 my $sec = ( ( $dt << 1 ) & 0x3e );
709 eval { timelocal( $sec, $min, $hour, $mday, $mon, $year ); };
723 IO::Uncompress::Unzip - Read zip files/buffers
727 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
729 my $status = unzip $input => $output [,OPTS]
730 or die "unzip failed: $UnzipError\n";
732 my $z = new IO::Uncompress::Unzip $input [OPTS]
733 or die "unzip failed: $UnzipError\n";
735 $status = $z->read($buffer)
736 $status = $z->read($buffer, $length)
737 $status = $z->read($buffer, $length, $offset)
738 $line = $z->getline()
743 $status = $z->inflateSync()
745 $data = $z->trailingData()
746 $status = $z->nextStream()
747 $data = $z->getHeaderInfo()
749 $z->seek($position, $whence)
761 read($z, $buffer, $length);
762 read($z, $buffer, $length, $offset);
764 seek($z, $position, $whence)
772 This module provides a Perl interface that allows the reading of
775 For writing zip files/buffers, see the companion module IO::Compress::Zip.
777 =head1 Functional Interface
779 A top-level function, C<unzip>, is provided to carry out
780 "one-shot" uncompression between buffers and/or files. For finer
781 control over the uncompression process, see the L</"OO Interface">
784 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
786 unzip $input => $output [,OPTS]
787 or die "unzip failed: $UnzipError\n";
789 The functional interface needs Perl5.005 or better.
791 =head2 unzip $input => $output [, OPTS]
793 C<unzip> expects at least two parameters, C<$input> and C<$output>.
795 =head3 The C<$input> parameter
797 The parameter, C<$input>, is used to define the source of
800 It can take one of the following forms:
806 If the C<$input> parameter is a simple scalar, it is assumed to be a
807 filename. This file will be opened for reading and the input data
808 will be read from it.
812 If the C<$input> parameter is a filehandle, the input data will be
814 The string '-' can be used as an alias for standard input.
816 =item A scalar reference
818 If C<$input> is a scalar reference, the input data will be read
821 =item An array reference
823 If C<$input> is an array reference, each element in the array must be a
826 The input data will be read from each file in turn.
828 The complete array will be walked to ensure that it only
829 contains valid filenames before any data is uncompressed.
831 =item An Input FileGlob string
833 If C<$input> is a string that is delimited by the characters "<" and ">"
834 C<unzip> will assume that it is an I<input fileglob string>. The
835 input is the list of files that match the fileglob.
837 If the fileglob does not match any files ...
839 See L<File::GlobMapper|File::GlobMapper> for more details.
843 If the C<$input> parameter is any other type, C<undef> will be returned.
845 =head3 The C<$output> parameter
847 The parameter C<$output> is used to control the destination of the
848 uncompressed data. This parameter can take one of these forms.
854 If the C<$output> parameter is a simple scalar, it is assumed to be a
855 filename. This file will be opened for writing and the uncompressed
856 data will be written to it.
860 If the C<$output> parameter is a filehandle, the uncompressed data
861 will be written to it.
862 The string '-' can be used as an alias for standard output.
864 =item A scalar reference
866 If C<$output> is a scalar reference, the uncompressed data will be
867 stored in C<$$output>.
869 =item An Array Reference
871 If C<$output> is an array reference, the uncompressed data will be
872 pushed onto the array.
874 =item An Output FileGlob
876 If C<$output> is a string that is delimited by the characters "<" and ">"
877 C<unzip> will assume that it is an I<output fileglob string>. The
878 output is the list of files that match the fileglob.
880 When C<$output> is an fileglob string, C<$input> must also be a fileglob
881 string. Anything else is an error.
885 If the C<$output> parameter is any other type, C<undef> will be returned.
889 When C<$input> maps to multiple compressed files/buffers and C<$output> is
890 a single file/buffer, after uncompression C<$output> will contain a
891 concatenation of all the uncompressed data from each of the input
894 =head2 Optional Parameters
896 Unless specified below, the optional parameters for C<unzip>,
897 C<OPTS>, are the same as those used with the OO interface defined in the
898 L</"Constructor Options"> section below.
902 =item C<< AutoClose => 0|1 >>
904 This option applies to any input or output data streams to
905 C<unzip> that are filehandles.
907 If C<AutoClose> is specified, and the value is true, it will result in all
908 input and/or output filehandles being closed once C<unzip> has
911 This parameter defaults to 0.
913 =item C<< BinModeOut => 0|1 >>
915 When writing to a file or filehandle, set C<binmode> before writing to the
920 =item C<< Append => 0|1 >>
924 =item C<< MultiStream => 0|1 >>
926 If the input file/buffer contains multiple compressed data streams, this
927 option will uncompress the whole lot as a single data stream.
931 =item C<< TrailingData => $scalar >>
933 Returns the data, if any, that is present immediately after the compressed
934 data stream once uncompression is complete.
936 This option can be used when there is useful information immediately
937 following the compressed data stream, and you don't know the length of the
938 compressed data stream.
940 If the input is a buffer, C<trailingData> will return everything from the
941 end of the compressed data stream to the end of the buffer.
943 If the input is a filehandle, C<trailingData> will return the data that is
944 left in the filehandle input buffer once the end of the compressed data
945 stream has been reached. You can then use the filehandle to read the rest
948 Don't bother using C<trailingData> if the input is a filename.
950 If you know the length of the compressed data stream before you start
951 uncompressing, you can avoid having to use C<trailingData> by setting the
952 C<InputLength> option.
958 To read the contents of the file C<file1.txt.zip> and write the
959 compressed data to the file C<file1.txt>.
963 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
965 my $input = "file1.txt.zip";
966 my $output = "file1.txt";
967 unzip $input => $output
968 or die "unzip failed: $UnzipError\n";
970 To read from an existing Perl filehandle, C<$input>, and write the
971 uncompressed data to a buffer, C<$buffer>.
975 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
978 my $input = new IO::File "<file1.txt.zip"
979 or die "Cannot open 'file1.txt.zip': $!\n" ;
981 unzip $input => \$buffer
982 or die "unzip failed: $UnzipError\n";
984 To uncompress all files in the directory "/my/home" that match "*.txt.zip" and store the compressed data in the same directory
988 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
990 unzip '</my/home/*.txt.zip>' => '</my/home/#1.txt>'
991 or die "unzip failed: $UnzipError\n";
993 and if you want to compress each file one at a time, this will do the trick
997 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
999 for my $input ( glob "/my/home/*.txt.zip" )
1001 my $output = $input;
1002 $output =~ s/.zip// ;
1003 unzip $input => $output
1004 or die "Error compressing '$input': $UnzipError\n";
1011 The format of the constructor for IO::Uncompress::Unzip is shown below
1013 my $z = new IO::Uncompress::Unzip $input [OPTS]
1014 or die "IO::Uncompress::Unzip failed: $UnzipError\n";
1016 Returns an C<IO::Uncompress::Unzip> object on success and undef on failure.
1017 The variable C<$UnzipError> will contain an error message on failure.
1019 If you are running Perl 5.005 or better the object, C<$z>, returned from
1020 IO::Uncompress::Unzip can be used exactly like an L<IO::File|IO::File> filehandle.
1021 This means that all normal input file operations can be carried out with
1022 C<$z>. For example, to read a line from a compressed file/buffer you can
1023 use either of these forms
1025 $line = $z->getline();
1028 The mandatory parameter C<$input> is used to determine the source of the
1029 compressed data. This parameter can take one of three forms.
1035 If the C<$input> parameter is a scalar, it is assumed to be a filename. This
1036 file will be opened for reading and the compressed data will be read from it.
1040 If the C<$input> parameter is a filehandle, the compressed data will be
1042 The string '-' can be used as an alias for standard input.
1044 =item A scalar reference
1046 If C<$input> is a scalar reference, the compressed data will be read from
1051 =head2 Constructor Options
1053 The option names defined below are case insensitive and can be optionally
1054 prefixed by a '-'. So all of the following are valid
1061 OPTS is a combination of the following options:
1065 =item C<< AutoClose => 0|1 >>
1067 This option is only valid when the C<$input> parameter is a filehandle. If
1068 specified, and the value is true, it will result in the file being closed once
1069 either the C<close> method is called or the IO::Uncompress::Unzip object is
1072 This parameter defaults to 0.
1074 =item C<< MultiStream => 0|1 >>
1076 Treats the complete zip file/buffer as a single compressed data
1077 stream. When reading in multi-stream mode each member of the zip
1078 file/buffer will be uncompressed in turn until the end of the file/buffer
1081 This parameter defaults to 0.
1083 =item C<< Prime => $string >>
1085 This option will uncompress the contents of C<$string> before processing the
1088 This option can be useful when the compressed data is embedded in another
1089 file/data structure and it is not possible to work out where the compressed
1090 data begins without having to read the first few bytes. If this is the
1091 case, the uncompression can be I<primed> with these bytes using this
1094 =item C<< Transparent => 0|1 >>
1096 If this option is set and the input file/buffer is not compressed data,
1097 the module will allow reading of it anyway.
1099 In addition, if the input file/buffer does contain compressed data and
1100 there is non-compressed data immediately following it, setting this option
1101 will make this module treat the whole file/bufffer as a single data stream.
1103 This option defaults to 1.
1105 =item C<< BlockSize => $num >>
1107 When reading the compressed input data, IO::Uncompress::Unzip will read it in
1108 blocks of C<$num> bytes.
1110 This option defaults to 4096.
1112 =item C<< InputLength => $size >>
1114 When present this option will limit the number of compressed bytes read
1115 from the input file/buffer to C<$size>. This option can be used in the
1116 situation where there is useful data directly after the compressed data
1117 stream and you know beforehand the exact length of the compressed data
1120 This option is mostly used when reading from a filehandle, in which case
1121 the file pointer will be left pointing to the first byte directly after the
1122 compressed data stream.
1124 This option defaults to off.
1126 =item C<< Append => 0|1 >>
1128 This option controls what the C<read> method does with uncompressed data.
1130 If set to 1, all uncompressed data will be appended to the output parameter
1131 of the C<read> method.
1133 If set to 0, the contents of the output parameter of the C<read> method
1134 will be overwritten by the uncompressed data.
1138 =item C<< Strict => 0|1 >>
1140 This option controls whether the extra checks defined below are used when
1141 carrying out the decompression. When Strict is on, the extra tests are
1142 carried out, when Strict is off they are not.
1144 The default for this option is off.
1158 $status = $z->read($buffer)
1160 Reads a block of compressed data (the size the the compressed block is
1161 determined by the C<Buffer> option in the constructor), uncompresses it and
1162 writes any uncompressed data into C<$buffer>. If the C<Append> parameter is
1163 set in the constructor, the uncompressed data will be appended to the
1164 C<$buffer> parameter. Otherwise C<$buffer> will be overwritten.
1166 Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
1167 or a negative number on error.
1173 $status = $z->read($buffer, $length)
1174 $status = $z->read($buffer, $length, $offset)
1176 $status = read($z, $buffer, $length)
1177 $status = read($z, $buffer, $length, $offset)
1179 Attempt to read C<$length> bytes of uncompressed data into C<$buffer>.
1181 The main difference between this form of the C<read> method and the
1182 previous one, is that this one will attempt to return I<exactly> C<$length>
1183 bytes. The only circumstances that this function will not is if end-of-file
1184 or an IO error is encountered.
1186 Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
1187 or a negative number on error.
1193 $line = $z->getline()
1196 Reads a single line.
1198 This method fully supports the use of of the variable C<$/> (or
1199 C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to
1200 determine what constitutes an end of line. Paragraph mode, record mode and
1201 file slurp mode are all supported.
1209 Read a single character.
1215 $char = $z->ungetc($string)
1221 $status = $z->inflateSync()
1225 =head2 getHeaderInfo
1229 $hdr = $z->getHeaderInfo();
1230 @hdrs = $z->getHeaderInfo();
1232 This method returns either a hash reference (in scalar context) or a list
1233 or hash references (in array context) that contains information about each
1234 of the header fields in the compressed data stream(s).
1243 Returns the uncompressed file offset.
1252 Returns true if the end of the compressed input stream has been reached.
1256 $z->seek($position, $whence);
1257 seek($z, $position, $whence);
1259 Provides a sub-set of the C<seek> functionality, with the restriction
1260 that it is only legal to seek forward in the input file/buffer.
1261 It is a fatal error to attempt to seek backward.
1263 The C<$whence> parameter takes one the usual values, namely SEEK_SET,
1264 SEEK_CUR or SEEK_END.
1266 Returns 1 on success, 0 on failure.
1275 This is a noop provided for completeness.
1281 Returns true if the object currently refers to a opened file/buffer.
1285 my $prev = $z->autoflush()
1286 my $prev = $z->autoflush(EXPR)
1288 If the C<$z> object is associated with a file or a filehandle, this method
1289 returns the current autoflush setting for the underlying filehandle. If
1290 C<EXPR> is present, and is non-zero, it will enable flushing after every
1291 write/print operation.
1293 If C<$z> is associated with a buffer, this method has no effect and always
1296 B<Note> that the special variable C<$|> B<cannot> be used to set or
1297 retrieve the autoflush setting.
1299 =head2 input_line_number
1301 $z->input_line_number()
1302 $z->input_line_number(EXPR)
1304 Returns the current uncompressed line number. If C<EXPR> is present it has
1305 the effect of setting the line number. Note that setting the line number
1306 does not change the current position within the file/buffer being read.
1308 The contents of C<$/> are used to to determine what constitutes a line
1316 If the C<$z> object is associated with a file or a filehandle, C<fileno>
1317 will return the underlying file descriptor. Once the C<close> method is
1318 called C<fileno> will return C<undef>.
1320 If the C<$z> object is is associated with a buffer, this method will return
1328 Closes the output file/buffer.
1330 For most versions of Perl this method will be automatically invoked if
1331 the IO::Uncompress::Unzip object is destroyed (either explicitly or by the
1332 variable with the reference to the object going out of scope). The
1333 exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In
1334 these cases, the C<close> method will be called automatically, but
1335 not until global destruction of all live objects when the program is
1338 Therefore, if you want your scripts to be able to run on all versions
1339 of Perl, you should call C<close> explicitly and not rely on automatic
1342 Returns true on success, otherwise 0.
1344 If the C<AutoClose> option has been enabled when the IO::Uncompress::Unzip
1345 object was created, and the object is associated with a file, the
1346 underlying file will also be closed.
1352 my $status = $z->nextStream();
1354 Skips to the next compressed data stream in the input file/buffer. If a new
1355 compressed data stream is found, the eof marker will be cleared and C<$.>
1358 Returns 1 if a new stream was found, 0 if none was found, and -1 if an
1359 error was encountered.
1365 my $data = $z->trailingData();
1367 Returns the data, if any, that is present immediately after the compressed
1368 data stream once uncompression is complete. It only makes sense to call
1369 this method once the end of the compressed data stream has been
1372 This option can be used when there is useful information immediately
1373 following the compressed data stream, and you don't know the length of the
1374 compressed data stream.
1376 If the input is a buffer, C<trailingData> will return everything from the
1377 end of the compressed data stream to the end of the buffer.
1379 If the input is a filehandle, C<trailingData> will return the data that is
1380 left in the filehandle input buffer once the end of the compressed data
1381 stream has been reached. You can then use the filehandle to read the rest
1384 Don't bother using C<trailingData> if the input is a filename.
1386 If you know the length of the compressed data stream before you start
1387 uncompressing, you can avoid having to use C<trailingData> by setting the
1388 C<InputLength> option in the constructor.
1392 No symbolic constants are required by this IO::Uncompress::Unzip at present.
1398 Imports C<unzip> and C<$UnzipError>.
1401 use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1407 =head2 Working with Net::FTP
1409 See L<IO::Uncompress::Unzip::FAQ|IO::Uncompress::Unzip::FAQ/"Compressed files and Net::FTP">
1411 =head2 Walking through a zip file
1413 The code below can be used to traverse a zip file, one compressed data
1416 use IO::Uncompress::Unzip qw($UnzipError);
1418 my $zipfile = "somefile.zip";
1419 my $u = new IO::Uncompress::Unzip $zipfile
1420 or die "Cannot open $zipfile: $UnzipError";
1423 for ($status = 1; ! $u->eof(); $status = $u->nextStream())
1426 my $name = $u->getHeaderInfo()->{Name};
1427 warn "Processing member $name\n" ;
1430 while (($status = $u->read($buff)) > 0) {
1434 last unless $status == 0;
1437 die "Error processing $zipfile: $!\n"
1440 Each individual compressed data stream is read until the logical
1441 end-of-file is reached. Then C<nextStream> is called. This will skip to the
1442 start of the next compressed data stream and clear the end-of-file flag.
1444 It is also worth noting that C<nextStream> can be called at any time -- you
1445 don't have to wait until you have exhausted a compressed data stream before
1446 skipping to the next one.
1450 L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress>
1452 L<Compress::Zlib::FAQ|Compress::Zlib::FAQ>
1454 L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
1455 L<Archive::Tar|Archive::Tar>,
1456 L<IO::Zlib|IO::Zlib>
1458 For RFC 1950, 1951 and 1952 see
1459 F<http://www.faqs.org/rfcs/rfc1950.html>,
1460 F<http://www.faqs.org/rfcs/rfc1951.html> and
1461 F<http://www.faqs.org/rfcs/rfc1952.html>
1463 The I<zlib> compression library was written by Jean-loup Gailly
1464 F<gzip@prep.ai.mit.edu> and Mark Adler F<madler@alumni.caltech.edu>.
1466 The primary site for the I<zlib> compression library is
1467 F<http://www.zlib.org>.
1469 The primary site for gzip is F<http://www.gzip.org>.
1473 This module was written by Paul Marquess, F<pmqs@cpan.org>.
1475 =head1 MODIFICATION HISTORY
1477 See the Changes file.
1479 =head1 COPYRIGHT AND LICENSE
1481 Copyright (c) 2005-2008 Paul Marquess. All rights reserved.
1483 This program is free software; you can redistribute it and/or
1484 modify it under the same terms as Perl itself.