1 # Test Unicode entities
5 use Test::More tests => 27;
8 skip "This perl does not support Unicode or Unicode entities not selected",
9 27 if $] < 5.008 || !&HTML::Entities::UNICODE_SUPPORT;
11 is(decode_entities("&euro"), "&euro");
12 is(decode_entities("€"), "\x{20AC}");
14 is(decode_entities("å"), "å");
15 is(decode_entities("å"), "å");
17 is(decode_entities("񺄠"), chr(500000));
19 is(decode_entities("􏿽"), "\x{10FFFD}");
21 is(decode_entities(""), "\x{FFFC}");
24 is(decode_entities(""), "\x{FFFD}");
25 is(decode_entities(""), "\x{FFFD}");
26 is(decode_entities(""), "\x{FFFD}");
27 is(decode_entities(""), "\x{FFFD}");
28 is(decode_entities(""), "\x{FFFD}");
29 is(decode_entities(""), "\x{FFFD}");
30 is(decode_entities("�"), chr(0xFFFD));
31 is(decode_entities("�"), chr(0xFFFD));
33 is(decode_entities("�"), "\0");
34 is(decode_entities("�"), "\0");
35 is(decode_entities("�"), "\0");
36 is(decode_entities("�"), "\0");
38 is(decode_entities("&#ååå࿿"), "&#ååå\x{FFF}");
40 # This might fail when we get more than 64 bit UVs
41 is(decode_entities("�"), "�");
42 is(decode_entities("�"), "�");
45 for ([32, 48], [120, 169], [240, 250], [250, 260], [965, 975], [3000, 3005]) {
46 my $a = join("", map chr, $_->[0] .. $_->[1]);
48 my $e = encode_entities($a);
49 my $d = decode_entities($e);
52 diag "Wrong decoding in range $_->[0] .. $_->[1]";
53 # use Devel::Peek; Dump($a); Dump($d);
60 is(decode_entities("��"), chr(0x100085));
62 is(decode_entities("��"), chr(0x100085));
64 is(decode_entities("�"), chr(0xFFFD));
66 is(decode_entities("\260’\260"), "\x{b0}\x{2019}\x{b0}");