1 package HTTP::Negotiate;
4 sub Version { $VERSION; }
11 require HTTP::Headers;
17 my($variants, $request) = @_;
20 unless (defined $request) {
21 # Create a request object from the CGI environment variables
22 $request = new HTTP::Headers;
23 $request->header('Accept', $ENV{HTTP_ACCEPT})
25 $request->header('Accept-Charset', $ENV{HTTP_ACCEPT_CHARSET})
26 if $ENV{HTTP_ACCEPT_CHARSET};
27 $request->header('Accept-Encoding', $ENV{HTTP_ACCEPT_ENCODING})
28 if $ENV{HTTP_ACCEPT_ENCODING};
29 $request->header('Accept-Language', $ENV{HTTP_ACCEPT_LANGUAGE})
30 if $ENV{HTTP_ACCEPT_LANGUAGE};
33 # Get all Accept values from the request. Build a hash initialized
36 # %accept = ( type => { 'audio/*' => { q => 0.2, mbx => 20000 },
37 # 'audio/basic' => { q => 1 },
39 # language => { 'no' => { q => 1 },
47 if ($key =~ s/^Accept-//) {
50 elsif ($key eq "Accept") {
59 for my $name (split(/,/, $val)) {
61 if ($name =~ s/;(.*)//) {
62 for $param (split(/;/, $1)) {
63 my ($pk, $pv) = split(/=/, $param, 2);
68 if (defined $param{'q'}) {
69 $param{'q'} = 1 if $param{'q'} > 1;
70 $param{'q'} = 0 if $param{'q'} < 0;
73 $param{'q'} = $default_q;
75 # This makes sure that the first ones are slightly better off
76 # and therefore more likely to be chosen.
79 $accept{$type}{$name} = \%param;
83 # Check if any of the variants specify a language. We do this
84 # because it influences how we treat those without (they default to
87 for $var (@$variants) {
95 print "Negotiation parameters in the request\n";
96 for $type (keys %accept) {
98 for $name (keys %{$accept{$type}}) {
100 for $pv (keys %{$accept{$type}{$name}}) {
101 print " $pv = $accept{$type}{$name}{$pv}\n";
107 my @Q = (); # This is where we collect the results of the
108 # quality calculations
110 # Calculate quality for all the variants that are available.
112 my($id, $qs, $ct, $enc, $cs, $lang, $bs) = @$_;
113 $qs = 1 unless defined $qs;
114 $ct = '' unless defined $ct;
115 $bs = 0 unless defined $bs;
116 $lang = lc($lang) if $lang; # lg tags are always case-insensitive
118 print "\nEvaluating $id (ct='$ct')\n";
119 printf " qs = %.3f\n", $qs;
120 print " enc = $enc\n" if $enc && !ref($enc);
121 print " enc = @$enc\n" if $enc && ref($enc);
122 print " cs = $cs\n" if $cs;
123 print " lang = $lang\n" if $lang;
124 print " bs = $bs\n" if $bs;
127 # Calculate encoding quality
129 # If the variant has no assigned Content-Encoding, or if no
130 # Accept-Encoding field is present, then the value assigned
131 # is "qe=1". If *all* of the variant's content encodings
132 # are listed in the Accept-Encoding field, then the value
133 # assigned is "qw=1". If *any* of the variant's content
134 # encodings are not listed in the provided Accept-Encoding
135 # field, then the value assigned is "qe=0"
136 if (exists $accept{'encoding'} && $enc) {
137 my @enc = ref($enc) ? @$enc : ($enc);
139 print "Is encoding $_ accepted? " if $DEBUG;
140 unless(exists $accept{'encoding'}{$_}) {
141 print "no\n" if $DEBUG;
146 print "yes\n" if $DEBUG;
151 # Calculate charset quality
153 # If the variant's media-type has no charset parameter,
154 # or the variant's charset is US-ASCII, or if no Accept-Charset
155 # field is present, then the value assigned is "qc=1". If the
156 # variant's charset is listed in the Accept-Charset field,
157 # then the value assigned is "qc=1. Otherwise, if the variant's
158 # charset is not listed in the provided Accept-Encoding field,
159 # then the value assigned is "qc=0".
160 if (exists $accept{'charset'} && $cs && $cs ne 'us-ascii' ) {
161 $qc = 0 unless $accept{'charset'}{$cs};
164 # Calculate language quality
166 if ($lang && exists $accept{'language'}) {
167 my @lang = ref($lang) ? @$lang : ($lang);
168 # If any of the variant's content languages are listed
169 # in the Accept-Language field, the the value assigned is
170 # the largest of the "q" parameter values for those language
174 next unless exists $accept{'language'}{$_};
175 my $this_q = $accept{'language'}{$_}{'q'};
176 $q = $this_q unless defined $q;
177 $q = $this_q if $this_q > $q;
180 $DEBUG and print " -- Exact language match at q=$q\n";
183 # If there was no exact match and at least one of
184 # the Accept-Language field values is a complete
185 # subtag prefix of the content language tag(s), then
186 # the "q" parameter value of the largest matching
188 $DEBUG and print " -- No exact language match\n";
189 my $selected = undef;
190 for $al (keys %{ $accept{'language'} }) {
191 if (index($al, "$lang-") == 0) {
192 # $lang starting with $al isn't enough, or else
193 # Accept-Language: hu (Hungarian) would seem
194 # to accept a document in hup (Hupa)
195 $DEBUG and print " -- $al ISA $lang\n";
196 $selected = $al unless defined $selected;
197 $selected = $al if length($al) > length($selected);
200 $DEBUG and print " -- $lang isn't a $al\n";
203 $q = $accept{'language'}{$selected}{'q'} if $selected;
205 # If none of the variant's content language tags or
206 # tag prefixes are listed in the provided
207 # Accept-Language field, then the value assigned
209 $q = 0.001 unless defined $q;
214 $ql = 0.5 if $any_lang && exists $accept{'language'};
219 # If no Accept field is given, then the value assigned is "q=1".
220 # If at least one listed media range matches the variant's media
221 # type, then the "q" parameter value assigned to the most specific
222 # of those matched is used (e.g. "text/html;version=3.0" is more
223 # specific than "text/html", which is more specific than "text/*",
224 # which in turn is more specific than "*/*"). If not media range
225 # in the provided Accept field matches the variant's media type,
226 # then the value assigned is "q=0".
227 if (exists $accept{'type'} && $ct) {
228 # First we clean up our content-type
231 $params = $1 if $ct =~ s/;(.*)//;
232 my($type, $subtype) = split("/", $ct, 2);
234 for $param (split(/;/, $params)) {
235 my($pk,$pv) = split(/=/, $param, 2);
241 my $sel_specificness = 0;
244 for $at (keys %{ $accept{'type'} }) {
245 print "Consider $at...\n" if $DEBUG;
246 my($at_type, $at_subtype) = split("/", $at, 2);
247 # Is it a match on the type
248 next if $at_type ne '*' && $at_type ne $type;
249 next if $at_subtype ne '*' && $at_subtype ne $subtype;
250 my $specificness = 0;
251 $specificness++ if $at_type ne '*';
252 $specificness++ if $at_subtype ne '*';
253 # Let's see if content-type parameters also match
254 while (($pk, $pv) = each %param) {
255 print "Check if $pk = $pv is true\n" if $DEBUG;
256 next unless exists $accept{'type'}{$at}{$pk};
258 unless $accept{'type'}{$at}{$pk} eq $pv;
259 print "yes it is!!\n" if $DEBUG;
262 print "Hurray, type match with specificness = $specificness\n"
265 if (!defined($sel_q) || $sel_specificness < $specificness) {
266 $sel_q = $accept{'type'}{$at}{'q'};
267 $sel_mbx = $accept{'type'}{$at}{'mbx'};
268 $sel_specificness = $specificness;
276 if (!defined($mbx) || $mbx >= $bs) {
277 $Q = $qs * $qe * $qc * $ql * $q;
281 print "Variant's size is too large ==> Q=0\n" if $DEBUG;
285 $mbx = "undef" unless defined $mbx;
287 print " (q=$q, mbx=$mbx, qe=$qe, qc=$qc, ql=$ql, qs=$qs)\n";
290 push(@Q, [$id, $Q, $bs]);
294 @Q = sort { $b->[1] <=> $a->[1] || $a->[2] <=> $b->[2] } @Q;
296 return @Q if wantarray;
297 return undef unless @Q;
298 return undef if $Q[0][1] == 0;
309 HTTP::Negotiate - choose a variant to serve
313 use HTTP::Negotiate qw(choose);
315 # ID QS Content-Type Encoding Char-Set Lang Size
317 [['var1', 1.000, 'text/html', undef, 'iso-8859-1', 'en', 3000],
318 ['var2', 0.950, 'text/plain', 'gzip', 'us-ascii', 'no', 400],
319 ['var3', 0.3, 'image/gif', undef, undef, undef, 43555],
322 @preferred = choose($variants, $request_headers);
323 $the_one = choose($variants);
327 This module provides a complete implementation of the HTTP content
328 negotiation algorithm specified in F<draft-ietf-http-v11-spec-00.ps>
329 chapter 12. Content negotiation allows for the selection of a
330 preferred content representation based upon attributes of the
331 negotiable variants and the value of the various Accept* header fields
334 The variants are ordered by preference by calling the function
337 The first parameter is reference to an array of the variants to
339 Each element in this array is an array with the values [$id, $qs,
340 $content_type, $content_encoding, $charset, $content_language,
341 $content_length] whose meanings are described
342 below. The $content_encoding and $content_language can be either a
343 single scalar value or an array reference if there are several values.
345 The second optional parameter is either a HTTP::Headers or a HTTP::Request
346 object which is searched for "Accept*" headers. If this
347 parameter is missing, then the accept specification is initialized
348 from the CGI environment variables HTTP_ACCEPT, HTTP_ACCEPT_CHARSET,
349 HTTP_ACCEPT_ENCODING and HTTP_ACCEPT_LANGUAGE.
351 In an array context, choose() returns a list of [variant
352 identifier, calculated quality, size] tuples. The values are sorted by
353 quality, highest quality first. If the calculated quality is the same
354 for two variants, then they are sorted by size (smallest first). I<E.g.>:
356 (['var1', 1, 2000], ['var2', 0.3, 512], ['var3', 0.3, 1024]);
358 Note that also zero quality variants are included in the return list
359 even if these should never be served to the client.
361 In a scalar context, it returns the identifier of the variant with the
362 highest score or C<undef> if none have non-zero quality.
364 If the $HTTP::Negotiate::DEBUG variable is set to TRUE, then a lot of
365 noise is generated on STDOUT during evaluation of choose().
369 A variant is described by a list of the following values. If the
370 attribute does not make sense or is unknown for a variant, then use
377 This is a string that you use as the name for the variant. This
378 identifier for the preferred variants returned by choose().
382 This is a number between 0.000 and 1.000 that describes the "source
383 quality". This is what F<draft-ietf-http-v11-spec-00.ps> says about this
386 Source quality is measured by the content provider as representing the
387 amount of degradation from the original source. For example, a
388 picture in JPEG form would have a lower qs when translated to the XBM
389 format, and much lower qs when translated to an ASCII-art
390 representation. Note, however, that this is a function of the source
391 - an original piece of ASCII-art may degrade in quality if it is
392 captured in JPEG form. The qs values should be assigned to each
393 variant by the content provider; if no qs value has been assigned, the
394 default is generally "qs=1".
398 This is the media type of the variant. The media type does not
399 include a charset attribute, but might contain other parameters.
403 text/html;version=2.0
408 =item content-encoding
410 This is one or more content encodings that has been applied to the
411 variant. The content encoding is generally used as a modifier to the
412 content media type. The most common content encodings are:
417 =item content-charset
419 This is the character set used when the variant contains text.
420 The charset value should generally be C<undef> or one of these:
423 iso-8859-1 ... iso-8859-9
431 =item content-language
433 This describes one or more languages that are used in the variant.
434 Language is described like this in F<draft-ietf-http-v11-spec-00.ps>: A
435 language is in this context a natural language spoken, written, or
436 otherwise conveyed by human beings for communication of information to
437 other human beings. Computer languages are explicitly excluded.
439 The language tags are defined by RFC 3066. Examples
443 en International English
449 This is the number of bytes used to represent the content.
453 =head1 ACCEPT HEADERS
455 The following Accept* headers can be used for describing content
456 preferences in a request (This description is an edited extract from
457 F<draft-ietf-http-v11-spec-00.ps>):
463 This header can be used to indicate a list of media ranges which are
464 acceptable as a response to the request. The "*" character is used to
465 group media types into ranges, with "*/*" indicating all media types
466 and "type/*" indicating all subtypes of that type.
468 The parameter q is used to indicate the quality factor, which
469 represents the user's preference for that range of media types. The
470 parameter mbx gives the maximum acceptable size of the response
471 content. The default values are: q=1 and mbx=infinity. If no Accept
472 header is present, then the client accepts all media types with q=1.
476 Accept: audio/*;q=0.2;mbx=200000, audio/basic
478 would mean: "I prefer audio/basic (of any size), but send me any audio
479 type if it is the best available after an 80% mark-down in quality and
480 its size is less than 200000 bytes"
485 Used to indicate what character sets are acceptable for the response.
486 The "us-ascii" character set is assumed to be acceptable for all user
487 agents. If no Accept-Charset field is given, the default is that any
488 charset is acceptable. Example:
490 Accept-Charset: iso-8859-1, unicode-1-1
493 =item Accept-Encoding
495 Restricts the Content-Encoding values which are acceptable in the
496 response. If no Accept-Encoding field is present, the server may
497 assume that the client will accept any content encoding. An empty
498 Accept-Encoding means that no content encoding is acceptable. Example:
500 Accept-Encoding: compress, gzip
503 =item Accept-Language
505 This field is similar to Accept, but restricts the set of natural
506 languages that are preferred in a response. Each language may be
507 given an associated quality value which represents an estimate of the
508 user's comprehension of that language. For example:
510 Accept-Language: no, en-gb;q=0.8, de;q=0.55
512 would mean: "I prefer Norwegian, but will accept British English (with
513 80% comprehension) or German (with 55% comprehension).
520 Copyright 1996,2001 Gisle Aas.
522 This library is free software; you can redistribute it and/or
523 modify it under the same terms as Perl itself.
527 Gisle Aas <gisle@aas.no>