X-Git-Url: http://vcs.maemo.org/git/?a=blobdiff_plain;f=dev%2Farm%2Flibhtml-parser-perl%2Flibhtml-parser-perl-3.56%2Feg%2Fhrefsub;fp=dev%2Farm%2Flibhtml-parser-perl%2Flibhtml-parser-perl-3.56%2Feg%2Fhrefsub;h=fe14159a827870e808afb972584de186973af31b;hb=f477fa73365d491991707e7ed9217b48d6994551;hp=0000000000000000000000000000000000000000;hpb=da95c414033799c3a62606f299c3c00b5c77ca11;p=dh-make-perl diff --git a/dev/arm/libhtml-parser-perl/libhtml-parser-perl-3.56/eg/hrefsub b/dev/arm/libhtml-parser-perl/libhtml-parser-perl-3.56/eg/hrefsub new file mode 100755 index 0000000..fe14159 --- /dev/null +++ b/dev/arm/libhtml-parser-perl/libhtml-parser-perl-3.56/eg/hrefsub @@ -0,0 +1,93 @@ +#!/usr/bin/perl + +# Perform transformations on link attributes in an HTML document. +# Examples: +# +# $ hrefsub 's/foo/bar/g' index.html +# $ hrefsub '$_=URI->new_abs($_, "http://foo")' index.html +# +# The first argument is a perl expression that might modify $_. +# It is called for each link in the document with $_ set to +# the original value of the link URI. The variables $tag and +# $attr can be used to access the tagname and attributename +# within the tag where the current link is found. +# +# The second argument is the name of a file to process. + +use strict; +use HTML::Parser (); +use URI; + +# Construct a hash of tag names that may have links. +my %link_attr; +{ + # To simplify things, reformat the %HTML::Tagset::linkElements + # hash so that it is always a hash of hashes. + require HTML::Tagset; + while (my($k,$v) = each %HTML::Tagset::linkElements) { + if (ref($v)) { + $v = { map {$_ => 1} @$v }; + } + else { + $v = { $v => 1}; + } + $link_attr{$k} = $v; + } + # Uncomment this to see what HTML::Tagset::linkElements thinks are + # the tags with link attributes + #use Data::Dump; Data::Dump::dump(\%link_attr); exit; +} + +# Create a subroutine named 'edit' to perform the operation +# passed in from the command line. The code should modify $_ +# to change things. +my $code = shift; +my $code = 'sub edit { local $_ = shift; my($attr, $tag) = @_; no strict; ' . + $code . + '; $_; }'; +#print $code; +eval $code; +die $@ if $@; + +# Set up the parser. +my $p = HTML::Parser->new(api_version => 3); + +# The default is to print everything as is. +$p->handler(default => sub { print @_ }, "text"); + +# All links are found in start tags. This handler will evaluate +# &edit for each link attribute found. +$p->handler(start => sub { + my($tagname, $pos, $text) = @_; + if (my $link_attr = $link_attr{$tagname}) { + while (4 <= @$pos) { + # use attribute sets from right to left + # to avoid invalidating the offsets + # when replacing the values + my($k_offset, $k_len, $v_offset, $v_len) = + splice(@$pos, -4); + my $attrname = lc(substr($text, $k_offset, $k_len)); + next unless $link_attr->{$attrname}; + next unless $v_offset; # 0 v_offset means no value + my $v = substr($text, $v_offset, $v_len); + $v =~ s/^([\'\"])(.*)\1$/$2/; + my $new_v = edit($v, $attrname, $tagname); + next if $new_v eq $v; + $new_v =~ s/\"/"/g; # since we quote with "" + substr($text, $v_offset, $v_len) = qq("$new_v"); + } + } + print $text; + }, + "tagname, tokenpos, text"); + +# Parse the file passed in from the command line +my $file = shift || usage(); +$p->parse_file($file) || die "Can't open file $file: $!\n"; + +sub usage +{ + my $progname = $0; + $progname =~ s,^.*/,,; + die "Usage: $progname \n"; +}