6 use vars qw($VERSION $TRANSLATE_UNDERSCORE);
9 # The $TRANSLATE_UNDERSCORE variable controls whether '_' can be used
10 # as a replacement for '-' in header field names.
11 $TRANSLATE_UNDERSCORE = 1 unless defined $TRANSLATE_UNDERSCORE;
13 # "Good Practice" order of HTTP message headers:
19 my @general_headers = qw(
20 Cache-Control Connection Date Pragma Trailer Transfer-Encoding Upgrade
24 my @request_headers = qw(
25 Accept Accept-Charset Accept-Encoding Accept-Language
26 Authorization Expect From Host
27 If-Match If-Modified-Since If-None-Match If-Range If-Unmodified-Since
28 Max-Forwards Proxy-Authorization Range Referer TE User-Agent
31 my @response_headers = qw(
32 Accept-Ranges Age ETag Location Proxy-Authenticate Retry-After Server
36 my @entity_headers = qw(
37 Allow Content-Encoding Content-Language Content-Length Content-Location
38 Content-MD5 Content-Range Content-Type Expires Last-Modified
41 my %entity_header = map { lc($_) => 1 } @entity_headers;
50 # Make alternative representations of @header_order. This is used
51 # for sorting and case matching.
59 $header_order{$lc} = ++$i;
60 $standard_case{$lc} = $_;
69 my $self = bless {}, $class;
70 $self->header(@_) if @_; # set up initial headers
78 Carp::croak('Usage: $h->header($field, ...)') unless @_;
83 my $op = @_ ? ($seen{lc($field)}++ ? 'PUSH' : 'SET') : 'GET';
84 @old = $self->_header($field, shift, $op);
86 return @old if wantarray;
87 return $old[0] if @old <= 1;
100 Carp::croak('Usage: $h->push_header($field, $val)') if @_ != 3;
101 shift->_header(@_, 'PUSH');
107 Carp::croak('Usage: $h->init_header($field, $val)') if @_ != 3;
108 shift->_header(@_, 'INIT');
114 my($self, @fields) = @_;
117 foreach $field (@fields) {
118 $field =~ tr/_/-/ if $field !~ /^:/ && $TRANSLATE_UNDERSCORE;
119 my $v = delete $self->{lc $field};
120 push(@values, ref($v) eq 'ARRAY' ? @$v : $v) if defined $v;
125 sub remove_content_headers
128 unless (defined(wantarray)) {
129 # fast branch that does not create return object
130 delete @$self{grep $entity_header{$_} || /^content-/, keys %$self};
134 my $c = ref($self)->new;
135 for my $f (grep $entity_header{$_} || /^content-/, keys %$self) {
136 $c->{$f} = delete $self->{$f};
144 my($self, $field, $val, $op) = @_;
146 # $push is only used interally sub push_header
147 Carp::croak('Need a field name') unless length($field);
149 unless ($field =~ /^:/) {
150 $field =~ tr/_/-/ if $TRANSLATE_UNDERSCORE;
153 unless(defined $standard_case{$field}) {
154 # generate a %standard_case entry for this field
155 $old =~ s/\b(\w)/\u$1/g;
156 $standard_case{$field} = $old;
160 my $h = $self->{$field};
161 my @old = ref($h) eq 'ARRAY' ? @$h : (defined($h) ? ($h) : ());
163 $op ||= defined($val) ? 'SET' : 'GET';
164 unless ($op eq 'GET' || ($op eq 'INIT' && @old)) {
166 my @new = ($op eq 'PUSH') ? @old : ();
167 if (ref($val) ne 'ARRAY') {
173 $self->{$field} = @new > 1 ? \@new : $new[0];
175 elsif ($op ne 'PUSH') {
176 delete $self->{$field};
183 sub _sorted_field_names
187 ($header_order{$a} || 999) <=> ($header_order{$b} || 999) ||
193 sub header_field_names {
195 return map $standard_case{$_} || $_, $self->_sorted_field_names
203 my($self, $sub) = @_;
205 foreach $key ($self->_sorted_field_names) {
206 next if $key =~ /^_/;
207 my $vals = $self->{$key};
208 if (ref($vals) eq 'ARRAY') {
211 &$sub($standard_case{$key} || $key, $val);
215 &$sub($standard_case{$key} || $key, $vals);
223 my($self, $endl) = @_;
224 $endl = "\n" unless defined $endl;
228 my($field, $val) = @_;
231 # must handle header values with embedded newlines with care
232 $val =~ s/\s+$//; # trailing newlines and space must go
233 $val =~ s/\n\n+/\n/g; # no empty lines
234 $val =~ s/\n([^\040\t])/\n $1/g; # intial space for continuation
235 $val =~ s/\n/$endl/g; # substitute with requested line ending
237 push(@result, "$field: $val");
240 join($endl, @result, '');
247 my $clone = new HTTP::Headers;
248 $self->scan(sub { $clone->push_header(@_);} );
256 my($self, $header, $time) = @_;
257 my($old) = $self->_header($header);
259 $self->_header($header, HTTP::Date::time2str($time));
261 $old =~ s/;.*// if defined($old);
262 HTTP::Date::str2time($old);
266 sub date { shift->_date_header('Date', @_); }
267 sub expires { shift->_date_header('Expires', @_); }
268 sub if_modified_since { shift->_date_header('If-Modified-Since', @_); }
269 sub if_unmodified_since { shift->_date_header('If-Unmodified-Since', @_); }
270 sub last_modified { shift->_date_header('Last-Modified', @_); }
272 # This is used as a private LWP extension. The Client-Date header is
273 # added as a timestamp to a response when it has been received.
274 sub client_date { shift->_date_header('Client-Date', @_); }
276 # The retry_after field is dual format (can also be a expressed as
277 # number of seconds from now), so we don't provide an easy way to
278 # access it until we have know how both these interfaces can be
279 # addressed. One possibility is to return a negative value for
280 # relative seconds and a positive value for epoch based time values.
281 #sub retry_after { shift->_date_header('Retry-After', @_); }
284 my $ct = (shift->_header('Content-Type', @_))[0];
285 return '' unless defined($ct) && length($ct);
286 my @ct = split(/;\s*/, $ct, 2);
291 wantarray ? @ct : $ct[0];
296 return $self->content_type eq 'text/html' || $self->_is_xhtml;
300 my $ct = shift->content_type;
301 for (qw(application/xhtml+xml application/vnd.wap.xhtml+xml)) {
302 return 1 if $_ eq $ct;
309 if (@_ && $_[0] =~ /#/) {
310 # Strip fragment per RFC 2616, section 14.36.
314 $uri->fragment(undef);
321 ($self->_header('Referer', @_))[0];
323 *referrer = \&referer; # on tchrist's request
325 sub title { (shift->_header('Title', @_))[0] }
326 sub content_encoding { (shift->_header('Content-Encoding', @_))[0] }
327 sub content_language { (shift->_header('Content-Language', @_))[0] }
328 sub content_length { (shift->_header('Content-Length', @_))[0] }
330 sub user_agent { (shift->_header('User-Agent', @_))[0] }
331 sub server { (shift->_header('Server', @_))[0] }
333 sub from { (shift->_header('From', @_))[0] }
334 sub warning { (shift->_header('Warning', @_))[0] }
336 sub www_authenticate { (shift->_header('WWW-Authenticate', @_))[0] }
337 sub authorization { (shift->_header('Authorization', @_))[0] }
339 sub proxy_authenticate { (shift->_header('Proxy-Authenticate', @_))[0] }
340 sub proxy_authorization { (shift->_header('Proxy-Authorization', @_))[0] }
342 sub authorization_basic { shift->_basic_auth("Authorization", @_) }
343 sub proxy_authorization_basic { shift->_basic_auth("Proxy-Authorization", @_) }
346 require MIME::Base64;
347 my($self, $h, $user, $passwd) = @_;
348 my($old) = $self->_header($h);
350 Carp::croak("Basic authorization user name can't contain ':'")
352 $passwd = '' unless defined $passwd;
353 $self->_header($h => 'Basic ' .
354 MIME::Base64::encode("$user:$passwd", ''));
356 if (defined $old && $old =~ s/^\s*Basic\s+//) {
357 my $val = MIME::Base64::decode($old);
358 return $val unless wantarray;
359 return split(/:/, $val, 2);
371 HTTP::Headers - Class encapsulating HTTP Message headers
375 require HTTP::Headers;
376 $h = HTTP::Headers->new;
378 $h->header('Content-Type' => 'text/plain'); # set
379 $ct = $h->header('Content-Type'); # get
380 $h->remove_header('Content-Type'); # delete
384 The C<HTTP::Headers> class encapsulates HTTP-style message headers.
385 The headers consist of attribute-value pairs also called fields, which
386 may be repeated, and which are printed in a particular order. The
387 field names are cases insensitive.
389 Instances of this class are usually created as member variables of the
390 C<HTTP::Request> and C<HTTP::Response> classes, internal to the
393 The following methods are available:
397 =item $h = HTTP::Headers->new
399 Constructs a new C<HTTP::Headers> object. You might pass some initial
400 attribute-value pairs as parameters to the constructor. I<E.g.>:
402 $h = HTTP::Headers->new(
403 Date => 'Thu, 03 Feb 1994 00:00:00 GMT',
404 Content_Type => 'text/html; version=3.2',
405 Content_Base => 'http://www.perl.org/');
407 The constructor arguments are passed to the C<header> method which is
412 Returns a copy of this C<HTTP::Headers> object.
414 =item $h->header( $field )
416 =item $h->header( $field => $value, ... )
418 Get or set the value of one or more header fields. The header field
419 name ($field) is not case sensitive. To make the life easier for perl
420 users who wants to avoid quoting before the => operator, you can use
421 '_' as a replacement for '-' in header names.
423 The header() method accepts multiple ($field => $value) pairs, which
424 means that you can update several fields with a single invocation.
426 The $value argument may be a plain string or a reference to an array
427 of strings for a multi-valued field. If the $value is provided as
428 C<undef> then the field is removed. If the $value is not given, then
429 that header field will remain unchanged.
431 The old value (or values) of the last of the header fields is returned.
432 If no such field exists C<undef> will be returned.
434 A multi-valued field will be returned as separate values in list
435 context and will be concatenated with ", " as separator in scalar
436 context. The HTTP spec (RFC 2616) promise that joining multiple
437 values in this way will not change the semantic of a header field, but
438 in practice there are cases like old-style Netscape cookies (see
439 L<HTTP::Cookies>) where "," is used as part of the syntax of a single
444 $header->header(MIME_Version => '1.0',
445 User_Agent => 'My-Web-Client/0.01');
446 $header->header(Accept => "text/html, text/plain, image/*");
447 $header->header(Accept => [qw(text/html text/plain image/*)]);
448 @accepts = $header->header('Accept'); # get multiple values
449 $accepts = $header->header('Accept'); # get values as a single string
451 =item $h->push_header( $field => $value )
453 Add a new field value for the specified header field. Previous values
454 for the same field are retained.
456 As for the header() method, the field name ($field) is not case
457 sensitive and '_' can be used as a replacement for '-'.
459 The $value argument may be a scalar or a reference to a list of
462 $header->push_header(Accept => 'image/jpeg');
463 $header->push_header(Accept => [map "image/$_", qw(gif png tiff)]);
465 =item $h->init_header( $field => $value )
467 Set the specified header to the given value, but only if no previous
468 value for that field is set.
470 The header field name ($field) is not case sensitive and '_'
471 can be used as a replacement for '-'.
473 The $value argument may be a scalar or a reference to a list of
476 =item $h->remove_header( $field, ... )
478 This function removes the header fields with the specified names.
480 The header field names ($field) are not case sensitive and '_'
481 can be used as a replacement for '-'.
483 The return value is the values of the fields removed. In scalar
484 context the number of fields removed is returned.
486 Note that if you pass in multiple field names then it is generally not
487 possible to tell which of the returned values belonged to which field.
489 =item $h->remove_content_headers
491 This will remove all the header fields used to describe the content of
492 a message. All header field names prefixed with C<Content-> falls
493 into this category, as well as C<Allow>, C<Expires> and
494 C<Last-Modified>. RFC 2616 denote these fields as I<Entity Header
497 The return value is a new C<HTTP::Headers> object that contains the
498 removed headers only.
502 This will remove all header fields.
504 =item $h->header_field_names
506 Returns the list of distinct names for the fields present in the
507 header. The field names have case as suggested by HTTP spec, and the
508 names are returned in the recommended "Good Practice" order.
510 In scalar context return the number of distinct field names.
512 =item $h->scan( \&process_header_field )
514 Apply a subroutine to each header field in turn. The callback routine
515 is called with two parameters; the name of the field and a single
516 value (a string). If a header field is multi-valued, then the
517 routine is called once for each value. The field name passed to the
518 callback routine has case as suggested by HTTP spec, and the headers
519 will be visited in the recommended "Good Practice" order.
521 Any return values of the callback routine are ignored. The loop can
522 be broken by raising an exception (C<die>), but the caller of scan()
523 would have to trap the exception itself.
527 =item $h->as_string( $eol )
529 Return the header fields as a formatted MIME header. Since it
530 internally uses the C<scan> method to build the string, the result
531 will use case as suggested by HTTP spec, and it will follow
532 recommended "Good Practice" of ordering the header fields. Long header
533 values are not folded.
535 The optional $eol parameter specifies the line ending sequence to
536 use. The default is "\n". Embedded "\n" characters in header field
537 values will be substituted with this line ending sequence.
541 =head1 CONVENIENCE METHODS
543 The most frequently used headers can also be accessed through the
544 following convenience Methods. These methods can both be used to read
545 and to set the value of a header. The header value is set if you pass
546 an argument to the method. The old header value is always returned.
547 If the given header did not exist then C<undef> is returned.
549 Methods that deal with dates/times always convert their value to system
550 time (seconds since Jan 1, 1970) and they also expect this kind of
551 value when the header value is set.
557 This header represents the date and time at which the message was
560 $h->date(time); # set current date
564 This header gives the date and time after which the entity should be
567 =item $h->if_modified_since
569 =item $h->if_unmodified_since
571 These header fields are used to make a request conditional. If the requested
572 resource has (or has not) been modified since the time specified in this field,
573 then the server will return a C<304 Not Modified> response instead of
576 =item $h->last_modified
578 This header indicates the date and time at which the resource was last
581 # check if document is more than 1 hour old
582 if (my $last_mod = $h->last_modified) {
583 if ($last_mod < time - 60*60) {
588 =item $h->content_type
590 The Content-Type header field indicates the media type of the message
593 $h->content_type('text/html');
595 The value returned will be converted to lower case, and potential
596 parameters will be chopped off and returned as a separate value if in
597 an array context. If there is no such header field, then the empty
598 string is returned. This makes it safe to do the following:
600 if ($h->content_type eq 'text/html') {
601 # we enter this place even if the real header value happens to
602 # be 'TEXT/HTML; version=3.0'
606 =item $h->content_encoding
608 The Content-Encoding header field is used as a modifier to the
609 media type. When present, its value indicates what additional
610 encoding mechanism has been applied to the resource.
612 =item $h->content_length
614 A decimal number indicating the size in bytes of the message content.
616 =item $h->content_language
618 The natural language(s) of the intended audience for the message
619 content. The value is one or more language tags as defined by RFC
620 1766. Eg. "no" for some kind of Norwegian and "en-US" for English the
621 way it is written in the US.
625 The title of the document. In libwww-perl this header will be
626 initialized automatically from the E<lt>TITLE>...E<lt>/TITLE> element
627 of HTML documents. I<This header is no longer part of the HTTP
632 This header field is used in request messages and contains information
633 about the user agent originating the request. I<E.g.>:
635 $h->user_agent('Mozilla/1.2');
639 The server header field contains information about the software being
640 used by the originating server program handling the request.
644 This header should contain an Internet e-mail address for the human
645 user who controls the requesting user agent. The address should be
646 machine-usable, as defined by RFC822. E.g.:
648 $h->from('King Kong <king@kong.com>');
650 I<This header is no longer part of the HTTP standard.>
654 Used to specify the address (URI) of the document from which the
655 requested resource address was obtained.
657 The "Free On-line Dictionary of Computing" as this to say about the
660 <World-Wide Web> A misspelling of "referrer" which
661 somehow made it into the {HTTP} standard. A given {web
662 page}'s referer (sic) is the {URL} of whatever web page
663 contains the link that the user followed to the current
664 page. Most browsers pass this information as part of a
669 By popular demand C<referrer> exists as an alias for this method so you
670 can avoid this misspelling in your programs and still send the right
673 When setting the referrer, this method removes the fragment from the
674 given URI if it is present, as mandated by RFC2616. Note that
675 the removal does I<not> happen automatically if using the header(),
676 push_header() or init_header() methods to set the referrer.
678 =item $h->www_authenticate
680 This header must be included as part of a C<401 Unauthorized> response.
681 The field value consist of a challenge that indicates the
682 authentication scheme and parameters applicable to the requested URI.
684 =item $h->proxy_authenticate
686 This header must be included in a C<407 Proxy Authentication Required>
689 =item $h->authorization
691 =item $h->proxy_authorization
693 A user agent that wishes to authenticate itself with a server or a
694 proxy, may do so by including these headers.
696 =item $h->authorization_basic
698 This method is used to get or set an authorization header that use the
699 "Basic Authentication Scheme". In array context it will return two
700 values; the user name and the password. In scalar context it will
701 return I<"uname:password"> as a single string value.
703 When used to set the header value, it expects two arguments. I<E.g.>:
705 $h->authorization_basic($uname, $password);
707 The method will croak if the $uname contains a colon ':'.
709 =item $h->proxy_authorization_basic
711 Same as authorization_basic() but will set the "Proxy-Authorization"
716 =head1 NON-CANONICALIZED FIELD NAMES
718 The header field name spelling is normally canonicalized including the
719 '_' to '-' translation. There are some application where this is not
720 appropriate. Prefixing field names with ':' allow you to force a
721 specific spelling. For example if you really want a header field name
722 to show up as C<foo_bar> instead of "Foo-Bar", you might set it like
725 $h->header(":foo_bar" => 1);
727 These field names are returned with the ':' intact for
728 $h->header_field_names and the $h->scan callback, but the colons do
729 not show in $h->as_string.
733 Copyright 1995-2005 Gisle Aas.
735 This library is free software; you can redistribute it and/or
736 modify it under the same terms as Perl itself.