1 use Test::More tests => 7;
9 Various entities. The parser must never break them in the middle:
19 <li><a href="foo 'bar' baz>" id=33>This is a link</a>
20 <li><a href='foo "bar" baz> å' id=34>This is another one</a>
23 <p><div align="center"><img src="http://www.perl.com/perl.gif"
27 a comment --> and this is not.
29 <!-- this is the kind of >comment< -- --> that Netscape hates -->
31 < this > was not a tag. <this is/not either>
37 #-------------------------------------------------------------------
42 @ISA=qw(HTML::Parser);
49 my $self = $class->SUPER::new;
51 die "Can only have one" if $COUNT++;
58 eval { $self->SUPER::DESTROY; };
64 my($self, $decl) = @_;
70 my($self, $tag, $attr) = @_;
71 $attr = join("/", map "$_=$attr->{$_}", sort keys %$attr);
72 $attr = "/$attr" if length $attr;
73 $OUT .= "<<$tag$attr>>|";
84 my($self, $comment) = @_;
85 $OUT .= "##$comment##|";
90 my($self, $text) = @_;
103 for $chunksize (64*1024, 64, 13, 3, 1, "file", "filehandle") {
104 #for $chunksize (1) {
105 if ($chunksize =~ /^file/) {
106 #print "Parsing from $chunksize";
108 #print "Parsing using $chunksize byte chunks";
112 if ($chunksize =~ /^file/) {
113 # First we must create the file
114 my $tmpfile = "tmp-$$.html";
116 die "$file already exists" if -e $file;
117 open(FILE, ">$file") or die "Can't create $file: $!";
122 if ($chunksize eq "filehandle") {
124 my $fh = FileHandle->new($file) || die "Can't open $file: $!";
128 # then we can parse it.
129 $p->parse_file($file);
130 close $file if $chunksize eq "filehandle";
131 unlink($tmpfile) || warn "Can't unlink $tmpfile: $!";
134 while (length $copy) {
135 my $chunk = substr($copy, 0, $chunksize);
136 substr($copy, 0, $chunksize) = '';
142 my $res = $p->result;
145 # Then we start looking for things that should not happen
146 if ($res =~ /\s\|\s/) {
151 # Make sure entities are not broken
152 '/', '/', 'È', '௖', '', 'å', 'Å',
154 # Some elements that should be produced
155 "|[[DOCTYPE HTML]]|",
156 "|## this is\na comment ##|",
157 "|<<ul>>|\n|<<li>>|<<a/href=foo 'bar' baz>/id=33>>|",
158 '|<<li>>|<<a/href=foo "bar" baz> å/id=34>>',
159 "|>>ul<<|", "|>>body<<|\n\n|",
162 if (index($res, $_) < 0) {
163 diag "Can't find '$_' in parsed document";
168 diag $res if $bad || $ENV{PRINT_RESULTS};
170 # And we check that we get the same result all the time
171 $res =~ s/\|//g; # remove all break marks
172 if ($last_res && $res ne $last_res) {
173 diag "The result is not the same as last time";
178 unless ($res =~ /Various entities/) {
179 diag "Some text must be missing";