
perlbug-followup at perl
May 24, 2012, 6:41 PM
Post #1 of 1
(42 views)
Permalink
|
|
[perl #33027] encode_entities, unicode and why =~ /\s/ == false
|
|
On Mon Dec 13 06:23:27 2004, tieben wrote: > > This is a bug report for perl from btietze [at] epo, > generated with the help of perlbug 1.35 running under perl v5.8.5. > > ./utest.pl > #!/usr/bin/perl -w > # Fr Dez 3 14:30:04 CET 2004 > # utest.pl > # used for: demonstration of perl wierd behavior > # why is the return value of decode_entities(" ") somehow > unicode but not unicode? > # or why does /\s/ not recognize that magic char? > use strict; > use HTML::Entities; > # main > ### > open THISFILE, $0; print "$0\n", <THISFILE>, "\n"; > my @cells; > format STDOUT = > | @<<<<<<<<< | @>>>>>> | @>>>>>> | @>>>>>> | @>>>>>> | @>>>>>> || > @>>>>>> | @>>>>>> | @>>>>>> | @>>>>>> | @>>>>>> > @cells > . > @cells = ("STRING", "utf8", "bool", "empty", "ws", "non ws", "utf8", > "bool", "empty", "ws", "non ws"); > write; > foreach ( > decode_entities(" "), > decode_entities("/ /"), > decode_entities(" "), > decode_entities(""), > "\n", > ){ > @cells = pt($_); > write; > } > # SUBS > ### > sub pt{ > my $str = shift; > my @ret; > push @ret, '"' . encode_entities($str) . '"', # STRING > utf8::is_utf8($str) ? "true": "false", # utf8 > $str ? "true": "false", # bool > $str=~/^$/ ? "true": "false", # empty > $str=~/\s+/ ? "true": "false", # ws > $str=~/\S+/ ? "true": "false"; # non ws > utf8::upgrade($str); > push @ret, utf8::is_utf8($str) ? "true": "false", #utf8 > $str ? "true": "false", # bool > $str=~/^$/ ? "true": "false", # empty > $str=~/\s+/ ? "true": "false", # ws > $str=~/\S+/ ? "true": "false"; # non ws > return @ret; > } > # vim: sw=4:ts=8:ai: > > | STRING | utf8 | bool | empty | ws | non ws || > utf8 | bool | empty | ws | non ws > | " " | false | true | false | false | true || > true | true | false | true | false > | "/ /" | false | true | false | false | true || > true | true | false | true | true > | " " | false | true | false | true | false || > true | true | false | true | false > | "" | false | false | true | false | false || > true | false | true | false | false > | " | false | true | true | true | false || > true | true | true | true | false > --- > Flags: > category=core > severity=medium > --- > This perlbug was built using Perl v5.8.5 - Fri Oct 1 23:29:33 UTC > 2004 > It is being executed now by Perl v5.8.5 - Fri Oct 1 23:24:00 UTC > 2004. > > Site configuration information for perl v5.8.5: > > Configured by abuild at Fri Oct 1 23:24:00 UTC 2004. > > Summary of my perl5 (revision 5 version 8 subversion 5) configuration: > Platform: > osname=linux, osvers=2.6.8.1, archname=i586-linux-thread-multi > uname='linux g168 2.6.8.1 #1 smp thu jul 1 15:23:45 utc 2004 i686 > i686 i386 gnulinux ' > config_args='-ds -e -Dprefix=/usr -Dvendorprefix=/usr > -Dinstallusrbinperl -Dusethreads -Di_db -Di_dbm -Di_ndbm -Di_gdbm > -Duseshrplib=true -Doptimize=-O2 -march=i586 -mcpu=i686 > -fmessage-length=0 -Wall -Wall -pipe' > hint=recommended, useposix=true, d_sigaction=define > usethreads=define use5005threads=undef useithreads=define > usemultiplicity=define > useperlio=define d_sfio=undef uselargefiles=define usesocks=undef > use64bitint=undef use64bitall=undef uselongdouble=undef > usemymalloc=n, bincompat5005=undef > Compiler: > cc='cc', ccflags ='-D_REENTRANT -D_GNU_SOURCE -DTHREADS_HAVE_PIDS > -fno-strict-aliasing -pipe -D_LARGEFILE_SOURCE > -D_FILE_OFFSET_BITS=64', > optimize='-O2 -march=i586 -mcpu=i686 -fmessage-length=0 -Wall > -Wall -pipe', > cppflags='-D_REENTRANT -D_GNU_SOURCE -DTHREADS_HAVE_PIDS > -fno-strict-aliasing -pipe' > ccversion='', gccversion='3.3.4 (pre 3.3.5 20040809)', > gccosandvers='' > intsize=4, longsize=4, ptrsize=4, doublesize=8, byteorder=1234 > d_longlong=define, longlongsize=8, d_longdbl=define, > longdblsize=12 > ivtype='long', ivsize=4, nvtype='double', nvsize=8, Off_t='off_t', > lseeksize=8 > alignbytes=4, prototype=define > Linker and Libraries: > ld='cc', ldflags ='' > libpth=/lib /usr/lib /usr/local/lib > libs=-lnsl -ldl -lm -lcrypt -lutil -lpthread -lc > perllibs=-lnsl -ldl -lm -lcrypt -lutil -lpthread -lc > libc=, so=so, useshrplib=true, libperl=libperl.so > gnulibc_version='2.3.3' > Dynamic Linking: > dlsrc=dl_dlopen.xs, dlext=so, d_dlsymun=undef, ccdlflags='-Wl,-E > -Wl,-rpath,/usr/lib/perl5/5.8.5/i586-linux-thread-multi/CORE' > cccdlflags='-fPIC', lddlflags='-shared' > > Locally applied patches: > > > --- > @INC for perl v5.8.5: > /usr/lib/perl5/5.8.5/i586-linux-thread-multi > /usr/lib/perl5/5.8.5 > /usr/lib/perl5/site_perl/5.8.5/i586-linux-thread-multi > /usr/lib/perl5/site_perl/5.8.5 > /usr/lib/perl5/site_perl > /usr/lib/perl5/vendor_perl/5.8.5/i586-linux-thread-multi > /usr/lib/perl5/vendor_perl/5.8.5 > /usr/lib/perl5/vendor_perl > . > > --- > Environment for perl v5.8.5: > HOME=/home/ben > LANG=de_DE.UTF-8 > LANGUAGE (unset) > LD_LIBRARY_PATH (unset) > LOGDIR (unset) > PATH=/home/ben/bin:/usr/local/bin:/usr/bin:/usr/X11R6/bin:/bin:/usr/games:/opt/gnome/bin:/opt/kde3/bin:/usr/lib/jvm/jre/bin > PERL_BADLANG (unset) > SHELL=/bin/bash This is an instance of the Unicode Bug; in recent releases of Perl, it's fixed by the unicode_strings feature (or the /u flag on regexen); So I'm marking this as resolved. --- via perlbug: queue: perl5 status: new https://rt.perl.org:443/rt3/Ticket/Display.html?id=33027
|