vcs.maemo.org Git - pkg-perl/blob - deb-src/libhtml-tree-perl/libhtml-tree-perl-3.23/t/split.t

   1 #!perl -Tw
   2
   3 # Testing of the incremental parsing.  Try to split a HTML document at
   4 # every possible position and make sure that the result is the same as
   5 # when parsing everything in one chunk.
   6
   7 # Now we use a shorter document, because we don't have all day on
   8 # this.
   9
  10 my ($HTML, $notests);
  11 BEGIN {
  12 $HTML = <<'EOT';
  13
  14 <Title>Tittel
  15 </title>
  16
  17 <H1>Overskrift</H1>
  18
  19 <!-- Comment -->
  20
  21 Text <b>bold</b>
  22 <a href="..." name=foo bar>italic</a>
  23 some &#101;ntities (&aring)
  24 EOT
  25
  26 $notests = length($HTML);   # A test for each char in the test doc
  27 $notests *= 3;              #  done twice
  28 $notests += 4;              #  plus more for the the rest of the tests
  29 }
  30 use strict;
  31
  32 use Test::More tests=>$notests; # Tests
  33
  34 BEGIN {
  35     use_ok( 'HTML::TreeBuilder');
  36 }
  37
  38 my $h = new HTML::TreeBuilder;
  39 isa_ok ( $h, "HTML::TreeBuilder");
  40 $h->parse($HTML)->eof;
  41 my $html = $h->as_HTML;
  42 $h->delete;
  43
  44 # Each test here tries to parse the doc when we split it in two.
  45 for my $pos (0 .. length($HTML) - 1) {
  46     my $first = substr($HTML, 0, $pos);
  47     my $last  = substr($HTML, $pos);
  48     is ($first.$last, $HTML, "File split okay");
  49     my $h1;
  50     eval {
  51         $h1 = new HTML::TreeBuilder;
  52         isa_ok( $h1, 'HTML::TreeBuilder' );
  53         $h1->parse($first);
  54         $h1->parse($last);
  55         $h1->eof;
  56     };
  57     if ($@) {
  58         print "Died when splitting at position $pos:\n";
  59         my $before = 10;
  60         $before = $pos if $pos < $before;
  61         print "«", substr($HTML, $pos - $before, $before);
  62         print "»\n«";
  63         print substr($HTML, $pos, 10);
  64         print "»\n";
  65         print "not ok $pos\n";
  66         $h1->delete;
  67         next;
  68     }
  69     my $new_html = $h1->as_HTML;
  70     my $before = 10;
  71     $before = $pos if $pos < $before;
  72     is($new_html, $html, "Still Parsing as the same after split at $pos") or
  73         diag("Something is different when splitting at position $pos:\n",
  74              "«", substr($HTML, $pos - $before, $before), "»\n«",
  75              substr($HTML, $pos, 10), "»\n", "\n$html$new_html\n",
  76              );
  77     $h1->delete;
  78 } # for
  79
  80 # Also try what happens when we feed the document one-char at a time
  81 # print "#\n#\nNow parsing document once char at a time...\n";
  82 my $perChar = new HTML::TreeBuilder;
  83 isa_ok( $perChar, 'HTML::TreeBuilder' );
  84 while ($HTML =~ /(.)/sg) {
  85     $perChar->parse($1);
  86 }
  87 $perChar->eof;
  88 my $new_html = $perChar->as_HTML;
  89 is ($new_html, $html, "Testing per Char parsing");
  90 $perChar->delete;