
richter at apache
Nov 8, 2005, 9:11 PM
Post #1 of 1
(957 views)
Permalink
|
|
svn commit: r331953 - in /perl/embperl/trunk: ./ Embperl/ podsrc/ test/cmp/ test/html/ xsbuilder/maps/
|
|
Author: richter Date: Tue Nov 8 21:11:19 2005 New Revision: 331953 URL: http://svn.apache.org/viewcvs?rev=331953&view=rev Log: - Added better UTF-8 support: - Output escaping now takes Perl's UTF-8 flags into account and will not escape UTF-8 chars anymore - Diretive Embperl_Output_Esc_Charset allows to generaly switch between Latin1, Latin2 and UTF-8. (For UTF-8 this is not necessary when Perl's UTF-8 Flag is correctly set, but this is not the case for all datasources) Added: perl/embperl/trunk/test/cmp/escutf8.htm perl/embperl/trunk/test/html/escutf8.htm (with props) Modified: perl/embperl/trunk/Changes.pod perl/embperl/trunk/DOM.xs perl/embperl/trunk/Embperl/Inline.pm perl/embperl/trunk/MANIFEST perl/embperl/trunk/Old.xs perl/embperl/trunk/embperl.h perl/embperl/trunk/ep.h perl/embperl/trunk/epcfg.h perl/embperl/trunk/epchar.c perl/embperl/trunk/epdat2.h perl/embperl/trunk/epdefault.c perl/embperl/trunk/epdom.c perl/embperl/trunk/epdom.h perl/embperl/trunk/epinit.c perl/embperl/trunk/epmain.c perl/embperl/trunk/eppriv.h perl/embperl/trunk/eputil.c perl/embperl/trunk/podsrc/Config.spod perl/embperl/trunk/test.pl perl/embperl/trunk/xsbuilder/maps/ep_structure.map Modified: perl/embperl/trunk/Changes.pod URL: http://svn.apache.org/viewcvs/perl/embperl/trunk/Changes.pod?rev=331953&r1=331952&r2=331953&view=diff ============================================================================== --- perl/embperl/trunk/Changes.pod (original) +++ perl/embperl/trunk/Changes.pod Tue Nov 8 21:11:19 2005 @@ -2,6 +2,14 @@ =head1 2.0.2_dev (Not yet released, only in the L<"SVN"|SVN.pod>) + - Added better UTF-8 support: + - Output escaping now takes Perl's UTF-8 flags into + account and will not escape UTF-8 chars anymore + - Diretive Embperl_Output_Esc_Charset allows to + generaly switch between Latin1, Latin2 and UTF-8. + (For UTF-8 this is not necessary when Perl's UTF-8 + Flag is correctly set, but this is not the case + for all datasources) - Added Module Embperl::Inline, which allows to embed Embperl code in an ordinary Perl module. This makes is easy to install system wide libraries, which need Modified: perl/embperl/trunk/DOM.xs URL: http://svn.apache.org/viewcvs/perl/embperl/trunk/DOM.xs?rev=331953&r1=331952&r2=331953&view=diff ============================================================================== --- perl/embperl/trunk/DOM.xs (original) +++ perl/embperl/trunk/DOM.xs Tue Nov 8 21:11:19 2005 @@ -65,7 +65,7 @@ RETVAL = NULL ; /* avoid warning */ SvGETMAGIC_P4(sText) ; s = SV2String (sText, l) ; - Node_replaceChildWithCDATA (CurrApp, DomTree_self(pDomNode -> xDomTree), pDomNode -> xNode, r -> Component.nCurrRepeatLevel, s, l, (r -> Component.nCurrEscMode & 11)== 3?1 + (r -> Component.nCurrEscMode & 4):r -> Component.nCurrEscMode, 0) ; + Node_replaceChildWithCDATA (CurrApp, DomTree_self(pDomNode -> xDomTree), pDomNode -> xNode, r -> Component.nCurrRepeatLevel, s, l, (SvUTF8(sText)?nflgEscUTF8:0) + ((r -> Component.nCurrEscMode & 11)== 3?1 + (r -> Component.nCurrEscMode & 4):r -> Component.nCurrEscMode), 0) ; r -> Component.nCurrEscMode = r -> Component.Config.nEscMode ; r -> Component.bEscModeSet = -1 ; /*SvREFCNT_inc (sText) ;*/ @@ -86,7 +86,7 @@ RETVAL = NULL ; /* avoid warning */ SvGETMAGIC_P4(sText) ; s = SV2String (sText, l) ; - Node_replaceChildWithCDATA (CurrApp, DomTree_self(xDomTree), xOldChild, r -> Component.nCurrRepeatLevel, s, l, (r -> Component.nCurrEscMode & 11)== 3?1 + (r -> Component.nCurrEscMode & 4):r -> Component.nCurrEscMode, 0) ; + Node_replaceChildWithCDATA (CurrApp, DomTree_self(xDomTree), xOldChild, r -> Component.nCurrRepeatLevel, s, l, (SvUTF8(sText)?nflgEscUTF8:0) + ((r -> Component.nCurrEscMode & 11)== 3?1 + (r -> Component.nCurrEscMode & 4):r -> Component.nCurrEscMode), 0) ; r -> Component.nCurrEscMode = r -> Component.Config.nEscMode ; r -> Component.bEscModeSet = -1 ; /*SvREFCNT_inc (sText) ;*/ @@ -107,7 +107,7 @@ r -> Component.bSubNotEmpty = 1 ; SvGETMAGIC_P4(sText) ; s = SV2String (sText, l) ; - Node_replaceChildWithCDATA (r -> pApp, DomTree_self(r -> Component.xCurrDomTree), xOldChild, r -> Component.nCurrRepeatLevel, s, l, (r -> Component.nCurrEscMode & 11)== 3?1 + (r -> Component.nCurrEscMode & 4):r -> Component.nCurrEscMode, 0) ; + Node_replaceChildWithCDATA (r -> pApp, DomTree_self(r -> Component.xCurrDomTree), xOldChild, r -> Component.nCurrRepeatLevel, s, l, (SvUTF8(sText)?nflgEscUTF8:0) + ((r -> Component.nCurrEscMode & 11)== 3?1 + (r -> Component.nCurrEscMode & 4):r -> Component.nCurrEscMode), 0) ; r -> Component.nCurrEscMode = r -> Component.Config.nEscMode ; r -> Component.bEscModeSet = -1 ; /*SvREFCNT_inc (sText) ;*/ @@ -205,10 +205,17 @@ tReq * r = CurrReq ; CODE: STRLEN nText ; + tNodeData * pNode ; + tNode xNode ; + int nEscMode = (SvUTF8(sText)?escHtmlUtf8:0) + ((r -> Component.nCurrEscMode & 11)== 3?1 + (r -> Component.nCurrEscMode & 4):r -> Component.nCurrEscMode) ; char * sT = SV2String (sText, nText) ; tDomTree * pDomTree = DomTree_self(xDomTree) ; - Node_appendChild (r -> pApp, pDomTree, xParent, r -> Component.nCurrRepeatLevel, (tNodeType)nType, 0, sT, nText, 0, 0, NULL) ; - + xNode = Node_appendChild (r -> pApp, pDomTree, xParent, r -> Component.nCurrRepeatLevel, (tNodeType)nType, 0, sT, nText, 0, 0, NULL) ; + pNode = Node_self(pDomTree,xNode) ; + pNode -> nType = (nEscMode & 8)?ntypText:((nEscMode & 3)?ntypTextHTML:ntypCDATA) ; + pNode -> bFlags &= ~(nflgEscUTF8 + nflgEscUrl + nflgEscChar) ; + pNode -> bFlags |= (nEscMode ^ nflgEscChar) & (nflgEscUTF8 + nflgEscUrl + nflgEscChar) ; + char * embperl_Node_iChildsText (xDomTree, xChild, bDeep=0) @@ -296,7 +303,7 @@ sT = SV2String (sText, nText) ; sA = SV2String (sAttr, nAttr) ; - sEscapedText = Escape (r, sT, nText, r -> Component.nCurrEscMode, NULL, '\0') ; + sEscapedText = Escape (r, sT, nText, (SvUTF8(sText)?escHtmlUtf8:0) + r -> Component.nCurrEscMode, NULL, '\0') ; sT = SV2String (sEscapedText, nText) ; pDomTree = DomTree_self (pDomNode -> xDomTree) ; @@ -320,7 +327,7 @@ STRLEN nText ; char * sT = SV2String (sText, nText) ; char * sA = SV2String (sAttr, nAttr) ; - sEscapedText = Escape (r, sT, nText, r -> Component.nCurrEscMode, NULL, '\0') ; + sEscapedText = Escape (r, sT, nText, (SvUTF8(sText)?escHtmlUtf8:0) + r -> Component.nCurrEscMode, NULL, '\0') ; sT = SV2String (sEscapedText, nText) ; pDomTree = DomTree_self (xDomTree) ; Modified: perl/embperl/trunk/Embperl/Inline.pm URL: http://svn.apache.org/viewcvs/perl/embperl/trunk/Embperl/Inline.pm?rev=331953&r1=331952&r2=331953&view=diff ============================================================================== --- perl/embperl/trunk/Embperl/Inline.pm (original) +++ perl/embperl/trunk/Embperl/Inline.pm Tue Nov 8 21:11:19 2005 @@ -1,90 +1,99 @@ - -################################################################################### -# -# Embperl - Copyright (c) 1997-2005 Gerald Richter / ecos gmbh www.ecos.de -# -# You may distribute under the terms of either the GNU General Public -# License or the Artistic License, as specified in the Perl README file. -# -# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR -# IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED -# WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. -# -# $Id$ -# -################################################################################### - - -package Embperl::Inline ; - -sub compile - - { - my ($code, $line, $file, $package) = @_ ; - - Embperl::Execute ({ 'inputfile' => $file, - 'input' => $code, - 'mtime' => -M $file, - 'import' => 0, - 'firstline' => $line, - 'package' => $package, - 'use_env' => 1}) ; - } - - -use Filter::Simple ; -use Embperl ; - -FILTER - { - s/\n__EMBPERL__(.+)$/\nBEGIN { my \$line = __LINE__ - 2 ; my \$code = q{$1}; Embperl::Inline::compile (\\\$code, \$line, __FILE__, __PACKAGE__)}/s ; - } ; - - -1 ; - -=pod - -=head1 NAME - -Embperl::Inline - Inline Embperl code in Perl modules - -=head1 SYNOPSIS - - package MyTest ; - - use Embperl::Inline ; - - __EMBPERL__ - - [$ sub foo $] - - [- - $a = 99 ; - -] - - <p>a=[+ $a +]</p> - [$endsub$] - - -=head1 DESCRIPTION - -Embperl::Inline allow to inline Embperl code in Perl modules. -The benfit is that you are able to install it like a normal -Perl module and it's available site wide, without the need -for any programm to know where it resides. - -Also it allows to add markup sections to Perl objects and -calling (and overriding it) like normal Perl methods. - -The only thing that needs to be done for using it, is to -use Embperl::Inline and to place your Embperl code after -the C<__EMBPERL__> keyword. - -=head1 Author - -G. Richter (richter [at] dev) - -=head1 See Also - -perl(1), Embperl + +################################################################################### +# +# Embperl - Copyright (c) 1997-2005 Gerald Richter / ecos gmbh www.ecos.de +# +# You may distribute under the terms of either the GNU General Public +# License or the Artistic License, as specified in the Perl README file. +# +# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED +# WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. +# +# $Id$ +# +################################################################################### + + +package Embperl::Inline ; + +sub compile + + { + my ($code, $line, $file, $package) = @_ ; + + my $debug = 0 ; #0x7fffffff ; + if ($Embperl::req) + { + #$debug = $Embperl::req -> config -> debug ; + } + + #print STDERR "compile: $file, code = $code\n" ; + + Embperl::Execute ({ 'inputfile' => $file, + 'input' => $code, + 'mtime' => -M $file, + 'import' => 0, + 'firstline' => $line, + 'package' => $package, + #'debug' => $debug, + 'use_env' => 1}) ; + } + + +use Filter::Simple ; +use Embperl ; + +FILTER + { + s/\n__EMBPERL__(.+)$/\nBEGIN { my \$line = __LINE__ - 2 ; my \$code = q{$1}; Embperl::Inline::compile (\\\$code, \$line, __FILE__, __PACKAGE__)}/s ; + } ; + + +1 ; + +=pod + +=head1 NAME + +Embperl::Inline - Inline Embperl code in Perl modules + +=head1 SYNOPSIS + + package MyTest ; + + use Embperl::Inline ; + + __EMBPERL__ + + [$ sub foo $] + + [- + $a = 99 ; + -] + + <p>a=[+ $a +]</p> + [$endsub$] + + +=head1 DESCRIPTION + +Embperl::Inline allow to inline Embperl code in Perl modules. +The benfit is that you are able to install it like a normal +Perl module and it's available site wide, without the need +for any programm to know where it resides. + +Also it allows to add markup sections to Perl objects and +calling (and overriding it) like normal Perl methods. + +The only thing that needs to be done for using it, is to +use Embperl::Inline and to place your Embperl code after +the C<__EMBPERL__> keyword. + +=head1 Author + +G. Richter (richter [at] dev) + +=head1 See Also + +perl(1), Embperl Modified: perl/embperl/trunk/MANIFEST URL: http://svn.apache.org/viewcvs/perl/embperl/trunk/MANIFEST?rev=331953&r1=331952&r2=331953&view=diff ============================================================================== --- perl/embperl/trunk/MANIFEST (original) +++ perl/embperl/trunk/MANIFEST Tue Nov 8 21:11:19 2005 @@ -26,7 +26,6 @@ Embperl/Inline.pm Embperl/Log.pm Embperl/Mail.pm -Embperl/Module.pm Embperl/Object.pm Embperl/Out.pm Embperl/Recipe.pm @@ -251,6 +250,7 @@ test/cmp/errpage.htm test/cmp/escape.htm test/cmp/escraw.htm +test/cmp/escutf8.htm test/cmp/execfirst.htm test/cmp/execgetsess.htm test/cmp/execnotfound.htm @@ -452,6 +452,7 @@ test/html/error.htm test/html/escape.htm test/html/escraw.htm +test/html/escutf8.htm test/html/execfirst.htm test/html/execgetsess.htm test/html/execnotfound.htm Modified: perl/embperl/trunk/Old.xs URL: http://svn.apache.org/viewcvs/perl/embperl/trunk/Old.xs?rev=331953&r1=331952&r2=331953&view=diff ============================================================================== --- perl/embperl/trunk/Old.xs (original) +++ perl/embperl/trunk/Old.xs Tue Nov 8 21:11:19 2005 @@ -99,7 +99,7 @@ { char * p = SvPV (sText, l) ; r -> Component.bSubNotEmpty = 1 ; - r -> Component.xCurrNode = Node_insertAfter_CDATA (r->pApp, p, l, (r -> Component.nCurrEscMode & 3)== 3?1 + (r -> Component.nCurrEscMode & 4):r -> Component.nCurrEscMode, DomTree_self (r -> Component.xCurrDomTree), r -> Component.xCurrNode, r -> Component.nCurrRepeatLevel) ; + r -> Component.xCurrNode = Node_insertAfter_CDATA (r->pApp, p, l, (SvUTF8(sText)?nflgEscUTF8:0) + ((r -> Component.nCurrEscMode & 3)== 3?1 + (r -> Component.nCurrEscMode & 4):r -> Component.nCurrEscMode), DomTree_self (r -> Component.xCurrDomTree), r -> Component.xCurrNode, r -> Component.nCurrRepeatLevel) ; r -> Component.bEscModeSet = 0 ; } Modified: perl/embperl/trunk/embperl.h URL: http://svn.apache.org/viewcvs/perl/embperl/trunk/embperl.h?rev=331953&r1=331952&r2=331953&view=diff ============================================================================== --- perl/embperl/trunk/embperl.h (original) +++ perl/embperl/trunk/embperl.h Tue Nov 8 21:11:19 2005 @@ -189,7 +189,9 @@ escEscape = 4, escXML = 8, - escStd = 7 + escStd = 7, + + escHtmlUtf8 = 128 } ; /* --- output mode --- */ @@ -198,6 +200,15 @@ { omodeHtml = 0, omodeXml = 1, + } ; + +/* --- output esc charset --- */ + +enum tOutputEscChareset + { + ocharsetUtf8 = 0, + ocharsetLatin1 = 1, + ocharsetLatin2 = 2, } ; /* --- input escaping --- */ Modified: perl/embperl/trunk/ep.h URL: http://svn.apache.org/viewcvs/perl/embperl/trunk/ep.h?rev=331953&r1=331952&r2=331953&view=diff ============================================================================== --- perl/embperl/trunk/ep.h (original) +++ perl/embperl/trunk/ep.h Tue Nov 8 21:11:19 2005 @@ -353,6 +353,8 @@ extern struct tCharTrans Char2Html [] ; +extern struct tCharTrans Char2HtmlLatin2 [] ; +extern struct tCharTrans Char2HtmlMin [] ; extern struct tCharTrans Char2Url [] ; extern struct tCharTrans Char2XML [] ; extern struct tCharTrans Html2Char [] ; Modified: perl/embperl/trunk/epcfg.h URL: http://svn.apache.org/viewcvs/perl/embperl/trunk/epcfg.h?rev=331953&r1=331952&r2=331953&view=diff ============================================================================== --- perl/embperl/trunk/epcfg.h (original) +++ perl/embperl/trunk/epcfg.h Tue Nov 8 21:11:19 2005 @@ -41,6 +41,7 @@ EPCFG_INTOPT (ReqConfig, unsigned, bOptions, OPTIONS) EPCFG_INTOPT (ReqConfig, int , nSessionMode, SESSION_MODE) EPCFG_INTOPT (ReqConfig, int , nOutputMode, OUTPUT_MODE) +EPCFG_INTOPT (ReqConfig, int , nOutputEscCharset, OUTPUT_ESC_CHARSET) #endif Modified: perl/embperl/trunk/epchar.c URL: http://svn.apache.org/viewcvs/perl/embperl/trunk/epchar.c?rev=331953&r1=331952&r2=331953&view=diff ============================================================================== --- perl/embperl/trunk/epchar.c (original) +++ perl/embperl/trunk/epchar.c Tue Nov 8 21:11:19 2005 @@ -288,6 +288,535 @@ { 'þ' , "þ" }, /* Small thorn, Icelandic */ { '\255', "ÿ" }, /* Small y, diæresis / umlaut */ } ; + + + +struct tCharTrans Char2HtmlMin [] = + + { + { ' ' , "" }, /* � Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /* 	 Horizontal tab */ + { ' ' , "" }, /* Line feed */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /* Carriage Return */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*   Space */ + { '!' , "" }, /* ! Exclamation mark */ + { '"' , """ }, /* Quotation mark */ + { '#' , "" }, /* # Number sign */ + { '$' , "" }, /* $ Dollar sign */ + { '%' , "" }, /* % Percent sign */ + { '&' , "&" }, /* Ampersand */ + { '\'' , "" }, /* ' Apostrophe */ + { '(' , "" }, /* ( Left parenthesis */ + { ')' , "" }, /* ) Right parenthesis */ + { '*' , "" }, /* * Asterisk */ + { '+' , "" }, /* + Plus sign */ + { ',' , "" }, /* , Comma */ + { '-' , "" }, /* - Hyphen */ + { '.' , "" }, /* . Period (fullstop) */ + { '/' , "" }, /* / Solidus (slash) */ + { '0' , "" }, /* 0 Digit 0 */ + { '1' , "" }, /* 1 Digit 1 */ + { '2' , "" }, /* 2 Digit 2 */ + { '3' , "" }, /* 3 Digit 3 */ + { '4' , "" }, /* 4 Digit 4 */ + { '5' , "" }, /* 5 Digit 5 */ + { '6' , "" }, /* 6 Digit 6 */ + { '7' , "" }, /* 7 Digit 7 */ + { '8' , "" }, /* 8 Digit 8 */ + { '9' , "" }, /* 9 Digit 9 */ + { ':' , "" }, /* : Colon */ + { ';' , "" }, /* ; Semicolon */ + { '<' , "<" }, /* Less than */ + { '=' , "" }, /* = Equals sign */ + { '>' , ">" }, /* Greater than */ + { '?' , "" }, /* ? Question mark */ + { '@' , "" }, /* @ Commercial at */ + { 'A' , "" }, /* A Capital A */ + { 'B' , "" }, /* B Capital B */ + { 'C' , "" }, /* C Capital C */ + { 'D' , "" }, /* D Capital D */ + { 'E' , "" }, /* E Capital E */ + { 'F' , "" }, /* F Capital F */ + { 'G' , "" }, /* G Capital G */ + { 'H' , "" }, /* H Capital H */ + { 'I' , "" }, /* I Capital I */ + { 'J' , "" }, /* J Capital J */ + { 'K' , "" }, /* K Capital K */ + { 'L' , "" }, /* L Capital L */ + { 'M' , "" }, /* M Capital M */ + { 'N' , "" }, /* N Capital N */ + { 'O' , "" }, /* O Capital O */ + { 'P' , "" }, /* P Capital P */ + { 'Q' , "" }, /* Q Capital Q */ + { 'R' , "" }, /* R Capital R */ + { 'S' , "" }, /* S Capital S */ + { 'T' , "" }, /* T Capital T */ + { 'U' , "" }, /* U Capital U */ + { 'V' , "" }, /* V Capital V */ + { 'W' , "" }, /* W Capital W */ + { 'X' , "" }, /* X Capital X */ + { 'Y' , "" }, /* Y Capital Y */ + { 'Z' , "" }, /* Z Capital Z */ + { '[.' , "" }, /* [ Left square bracket */ + { '\\' , "" }, /* \ Reverse solidus (backslash) */ + { ']' , "" }, /* ] Right square bracket */ + { '^' , "" }, /* ^ Caret */ + { '_' , "" }, /* _ Horizontal bar (underscore) */ + { '`' , "" }, /* ` Acute accent */ + { 'a' , "" }, /* a Small a */ + { 'b' , "" }, /* b Small b */ + { 'c' , "" }, /* c Small c */ + { 'd' , "" }, /* d Small d */ + { 'e' , "" }, /* e Small e */ + { 'f' , "" }, /* f Small f */ + { 'g' , "" }, /* g Small g */ + { 'h' , "" }, /* h Small h */ + { 'i' , "" }, /* i Small i */ + { 'j' , "" }, /* j Small j */ + { 'k' , "" }, /* k Small k */ + { 'l' , "" }, /* l Small l */ + { 'm' , "" }, /* m Small m */ + { 'n' , "" }, /* n Small n */ + { 'o' , "" }, /* o Small o */ + { 'p' , "" }, /* p Small p */ + { 'q' , "" }, /* q Small q */ + { 'r' , "" }, /* r Small r */ + { 's' , "" }, /* s Small s */ + { 't' , "" }, /* t Small t */ + { 'u' , "" }, /* u Small u */ + { 'v' , "" }, /* v Small v */ + { 'w' , "" }, /* w Small w */ + { 'x' , "" }, /* x Small x */ + { 'y' , "" }, /* y Small y */ + { 'z' , "" }, /* z Small z */ + { '{' , "" }, /* { Left curly brace */ + { '|' , "" }, /* | Vertical bar */ + { '}' , "" }, /* } Right curly brace */ + { '~' , "" }, /* ~ Tilde */ + { '' , "" }, /*  Unused */ + { '' , "" }, /* € Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /* ‚ Unused */ + { ' ' , "" }, /* ƒ Unused */ + { ' ' , "" }, /* „ Unused */ + { ' ' , "" }, /* … Unused */ + { ' ' , "" }, /* † Unused */ + { ' ' , "" }, /* ‡ Unused */ + { ' ' , "" }, /* ˆ Unused */ + { ' ' , "" }, /* ‰ Unused */ + { ' ' , "" }, /* Š Horizontal tab */ + { ' ' , "" }, /* ‹ Line feed */ + { ' ' , "" }, /* Œ Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /* Ž Carriage Return */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /* ‘ Unused */ + { ' ' , "" }, /* ’ Unused */ + { ' ' , "" }, /* “ Unused */ + { ' ' , "" }, /* ” Unused */ + { ' ' , "" }, /* • Unused */ + { ' ' , "" }, /* – Unused */ + { ' ' , "" }, /* — Unused */ + { ' ' , "" }, /* ˜ Unused */ + { ' ' , "" }, /* ™ Unused */ + { ' ' , "" }, /* š Unused */ + { ' ' , "" }, /* › Unused */ + { ' ' , "" }, /* œ Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /* ž Unused */ + { ' ' , "" }, /* Ÿ Unused */ + { ' ' , "" }, /* Non-breaking Space */ + { '¡' , "" }, /* Inverted exclamation */ + { '¢' , "" }, /* Cent sign */ + { '£' , "" }, /* Pound sterling */ + { '¤' , "" }, /* General currency sign */ + { '¥' , "" }, /* Yen sign */ +/* { '¦' , "" }, / * Broken vertical bar */ + { '¦' , "" }, /* Broken vertical bar */ + { '§' , "" }, /* Section sign */ +/* { '¨' , "" }, / * Diæresis / Umlaut */ + { '¨' , "" }, /* Diæresis / Umlaut */ + { '©' , "" }, /* Copyright */ + { 'ª' , "" }, /* Feminine ordinal */ + { '«' , "" }, /* Left angle quote, guillemot left */ + { '¬' , "" }, /* Not sign */ + { '' , "" }, /* Soft hyphen */ + { '®' , "" }, /* Registered trademark */ +/* { '¯' , "" }, / * Macron accent */ + { '¯' , "" }, /* Macron accent */ + { '°' , "" }, /* Degree sign */ + { '±' , "" }, /* Plus or minus */ + { '²' , "" }, /* Superscript two */ + { '³' , "" }, /* Superscript three */ + { '´' , "" }, /* Acute accent */ + { 'µ' , "" }, /* Micro sign */ + { '¶' , "" }, /* Paragraph sign */ + { '·' , "" }, /* Middle dot */ + { '¸' , "" }, /* Cedilla */ + { '¹' , "" }, /* Superscript one */ + { 'º' , "" }, /* Masculine ordinal */ + { '»' , "" }, /* Right angle quote, guillemot right */ + { '¼' , "" }, /* Fraction one-fourth */ + { '½' , "" }, /* Fraction one-half */ + { '¾' , "" }, /* Fraction three-fourths */ + { '¿' , "" }, /* Inverted question mark */ + { 'À' , "" }, /* Capital A, grave accent */ + { 'Á' , "" }, /* Capital A, acute accent */ + { 'Â' , "" }, /* Capital A, circumflex */ + { 'Ã' , "" }, /* Capital A, tilde */ + { 'Ä' , "" }, /* Capital A, diæresis / umlaut */ + { 'Å' , "" }, /* Capital A, ring */ + { 'Æ' , "" }, /* Capital AE ligature */ + { 'Ç' , "" }, /* Capital C, cedilla */ + { 'È' , "" }, /* Capital E, grave accent */ + { 'É' , "" }, /* Capital E, acute accent */ + { 'Ê' , "" }, /* Capital E, circumflex */ + { 'Ë' , "" }, /* Capital E, diæresis / umlaut */ + { 'Ì' , "" }, /* Capital I, grave accent */ + { 'Í' , "" }, /* Capital I, acute accent */ + { 'Î' , "" }, /* Capital I, circumflex */ + { 'Ï' , "" }, /* Capital I, diæresis / umlaut */ + { 'Ð' , "" }, /* Capital Eth, Icelandic */ + { 'Ñ' , "" }, /* Capital N, tilde */ + { 'Ò' , "" }, /* Capital O, grave accent */ + { 'Ó' , "" }, /* Capital O, acute accent */ + { 'Ô' , "" }, /* Capital O, circumflex */ + { 'Õ' , "" }, /* Capital O, tilde */ + { 'Ö' , "" }, /* Capital O, diæresis / umlaut */ + { '×' , "" }, /* Multiply sign */ + { 'Ø' , "" }, /* Capital O, slash */ + { 'Ù' , "" }, /* Capital U, grave accent */ + { 'Ú' , "" }, /* Capital U, acute accent */ + { 'Û' , "" }, /* Capital U, circumflex */ + { 'Ü' , "" }, /* Capital U, diæresis / umlaut */ + { 'Ý' , "" }, /* Capital Y, acute accent */ + { 'Þ' , "" }, /* Capital Thorn, Icelandic */ + { 'ß' , "" }, /* Small sharp s, German sz */ + { 'à' , "" }, /* Small a, grave accent */ + { 'ß' , "" }, /* Small a, acute accent */ + { 'â' , "" }, /* Small a, circumflex */ + { 'ã' , "" }, /* Small a, tilde */ + { 'ä' , "" }, /* Small a, diæresis / umlaut */ + { 'å' , "" }, /* Small a, ring */ + { 'æ' , "" }, /* Small ae ligature */ + { 'ç' , "" }, /* Small c, cedilla */ + { 'è' , "" }, /* Small e, grave accent */ + { 'é' , "" }, /* Small e, acute accent */ + { 'ê' , "" }, /* Small e, circumflex */ + { 'ë' , "" }, /* Small e, diæresis / umlaut */ + { 'ì' , "" }, /* Small i, grave accent */ + { 'í' , "" }, /* Small i, acute accent */ + { 'î' , "" }, /* Small i, circumflex */ + { 'ï' , "" }, /* Small i, diæresis / umlaut */ + { 'ð' , "" }, /* Small eth, Icelandic */ + { 'ñ' , "" }, /* Small n, tilde */ + { 'ò' , "" }, /* Small o, grave accent */ + { 'ó' , "" }, /* Small o, acute accent */ + { 'ô' , "" }, /* Small o, circumflex */ + { 'õ' , "" }, /* Small o, tilde */ + { 'ö' , "" }, /* Small o, diæresis / umlaut */ + { '÷' , "" }, /* Division sign */ + { 'ø' , "" }, /* Small o, slash */ + { 'ù' , "" }, /* Small u, grave accent */ + { 'ú' , "" }, /* Small u, acute accent */ + { 'û' , "" }, /* Small u, circumflex */ + { 'ü' , "" }, /* Small u, diæresis / umlaut */ + { 'ý' , "" }, /* Small y, acute accent */ + { 'þ' , "" }, /* Small thorn, Icelandic */ + { '\255', "" }, /* Small y, diæresis / umlaut */ + } ; + +struct tCharTrans Char2HtmlLatin2 [] = + + { + { ' ' , "" }, /* � Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /* 	 Horizontal tab */ + { ' ' , "" }, /* Line feed */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /* Carriage Return */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*   Space */ + { '!' , "" }, /* ! Exclamation mark */ + { '"' , """ }, /* Quotation mark */ + { '#' , "" }, /* # Number sign */ + { '$' , "" }, /* $ Dollar sign */ + { '%' , "" }, /* % Percent sign */ + { '&' , "&" }, /* Ampersand */ + { '\'' , "" }, /* ' Apostrophe */ + { '(' , "" }, /* ( Left parenthesis */ + { ')' , "" }, /* ) Right parenthesis */ + { '*' , "" }, /* * Asterisk */ + { '+' , "" }, /* + Plus sign */ + { ',' , "" }, /* , Comma */ + { '-' , "" }, /* - Hyphen */ + { '.' , "" }, /* . Period (fullstop) */ + { '/' , "" }, /* / Solidus (slash) */ + { '0' , "" }, /* 0 Digit 0 */ + { '1' , "" }, /* 1 Digit 1 */ + { '2' , "" }, /* 2 Digit 2 */ + { '3' , "" }, /* 3 Digit 3 */ + { '4' , "" }, /* 4 Digit 4 */ + { '5' , "" }, /* 5 Digit 5 */ + { '6' , "" }, /* 6 Digit 6 */ + { '7' , "" }, /* 7 Digit 7 */ + { '8' , "" }, /* 8 Digit 8 */ + { '9' , "" }, /* 9 Digit 9 */ + { ':' , "" }, /* : Colon */ + { ';' , "" }, /* ; Semicolon */ + { '<' , "<" }, /* Less than */ + { '=' , "" }, /* = Equals sign */ + { '>' , ">" }, /* Greater than */ + { '?' , "" }, /* ? Question mark */ + { '@' , "" }, /* @ Commercial at */ + { 'A' , "" }, /* A Capital A */ + { 'B' , "" }, /* B Capital B */ + { 'C' , "" }, /* C Capital C */ + { 'D' , "" }, /* D Capital D */ + { 'E' , "" }, /* E Capital E */ + { 'F' , "" }, /* F Capital F */ + { 'G' , "" }, /* G Capital G */ + { 'H' , "" }, /* H Capital H */ + { 'I' , "" }, /* I Capital I */ + { 'J' , "" }, /* J Capital J */ + { 'K' , "" }, /* K Capital K */ + { 'L' , "" }, /* L Capital L */ + { 'M' , "" }, /* M Capital M */ + { 'N' , "" }, /* N Capital N */ + { 'O' , "" }, /* O Capital O */ + { 'P' , "" }, /* P Capital P */ + { 'Q' , "" }, /* Q Capital Q */ + { 'R' , "" }, /* R Capital R */ + { 'S' , "" }, /* S Capital S */ + { 'T' , "" }, /* T Capital T */ + { 'U' , "" }, /* U Capital U */ + { 'V' , "" }, /* V Capital V */ + { 'W' , "" }, /* W Capital W */ + { 'X' , "" }, /* X Capital X */ + { 'Y' , "" }, /* Y Capital Y */ + { 'Z' , "" }, /* Z Capital Z */ + { '[.' , "" }, /* [ Left square bracket */ + { '\\' , "" }, /* \ Reverse solidus (backslash) */ + { ']' , "" }, /* ] Right square bracket */ + { '^' , "" }, /* ^ Caret */ + { '_' , "" }, /* _ Horizontal bar (underscore) */ + { '`' , "" }, /* ` Acute accent */ + { 'a' , "" }, /* a Small a */ + { 'b' , "" }, /* b Small b */ + { 'c' , "" }, /* c Small c */ + { 'd' , "" }, /* d Small d */ + { 'e' , "" }, /* e Small e */ + { 'f' , "" }, /* f Small f */ + { 'g' , "" }, /* g Small g */ + { 'h' , "" }, /* h Small h */ + { 'i' , "" }, /* i Small i */ + { 'j' , "" }, /* j Small j */ + { 'k' , "" }, /* k Small k */ + { 'l' , "" }, /* l Small l */ + { 'm' , "" }, /* m Small m */ + { 'n' , "" }, /* n Small n */ + { 'o' , "" }, /* o Small o */ + { 'p' , "" }, /* p Small p */ + { 'q' , "" }, /* q Small q */ + { 'r' , "" }, /* r Small r */ + { 's' , "" }, /* s Small s */ + { 't' , "" }, /* t Small t */ + { 'u' , "" }, /* u Small u */ + { 'v' , "" }, /* v Small v */ + { 'w' , "" }, /* w Small w */ + { 'x' , "" }, /* x Small x */ + { 'y' , "" }, /* y Small y */ + { 'z' , "" }, /* z Small z */ + { '{' , "" }, /* { Left curly brace */ + { '|' , "" }, /* | Vertical bar */ + { '}' , "" }, /* } Right curly brace */ + { '~' , "" }, /* ~ Tilde */ + { '' , "" }, /*  Unused */ + { '' , "" }, /* € Unused */ + + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /* ‚ Unused */ + { ' ' , "" }, /* ƒ Unused */ + { ' ' , "" }, /* „ Unused */ + { ' ' , "" }, /* … Unused */ + { ' ' , "" }, /* † Unused */ + { ' ' , "" }, /* ‡ Unused */ + { ' ' , "" }, /* ˆ Unused */ + { ' ' , "" }, /* ‰ Unused */ + { ' ' , "" }, /* Š Horizontal tab */ + { ' ' , "" }, /* ‹ Line feed */ + { ' ' , "" }, /* Œ Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /* Ž Carriage Return */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /* ‘ Unused */ + { ' ' , "" }, /* ’ Unused */ + { ' ' , "" }, /* “ Unused */ + { ' ' , "" }, /* ” Unused */ + { ' ' , "" }, /* • Unused */ + { ' ' , "" }, /* – Unused */ + { ' ' , "" }, /* — Unused */ + { ' ' , "" }, /* ˜ Unused */ + { ' ' , "" }, /* ™ Unused */ + { ' ' , "" }, /* š Unused */ + { ' ' , "" }, /* › Unused */ + { ' ' , "" }, /* œ Unused */ + { ' ' , "" }, /*  Unused */ + { ' ' , "" }, /* ž Unused */ + { ' ' , "" }, /* Ÿ Unused */ + { ' ', " " }, /*   - nobreakspace */ + { '¡', "" }, /* ¡ - Aogonek */ + { '¢', "" }, /* ¢ - breve */ + { '£', "" }, /* £ - Lstroke */ + { '¤', "¤" }, /* ¤ - currency */ + { '¥', "" }, /* ¥ - Lcaron */ + { '¦', "" }, /* ¦ - Sacute */ + { '§', "§" }, /* § - section */ + { '¨', "¨" }, /* ¨ - diaeresis */ + { '©', "" }, /* © - Scaron */ + { 'ª', "" }, /* ª - Scedilla */ + { '«', "" }, /* « - Tcaron */ + { '¬', "" }, /* ¬ - Zacute */ + { '', "­" }, /* ­ - hyphen */ + { '®', "" }, /* ® - Zcaron */ + { '¯', "" }, /* ¯ - Zabovedot */ + { '°', "°ree;" }, /* ° - degree */ + { '±', "" }, /* ± - aogonek */ + { '²', "" }, /* ² - ogonek */ + { '³', "" }, /* ³ - lstroke */ + { '´', "´" }, /* ´ - acute */ + { 'µ', "" }, /* µ - lcaron */ + { '¶', "" }, /* ¶ - sacute */ + { '·', "" }, /* · - caron */ + { '¸', "¸" }, /* ¸ - cedilla */ + { '¹', "" }, /* ¹ - scaron */ + { 'º', "" }, /* º - scedilla */ + { '»', "" }, /* » - tcaron */ + { '¼', "" }, /* ¼ - zacute */ + { '½', "" }, /* ½ - doubleacute */ + { '¾', "" }, /* ¾ - zcaron */ + { '¿', "" }, /* ¿ - zabovedot */ + { 'À', "" }, /* À - Racute */ + { 'Á', "Á" }, /* Á - Aacute */ + { 'Â', "Â" }, /*  - Acircumflex */ + { 'Ã', "" }, /* à - Abreve */ + { 'Ä', "Ä" }, /* Ä - Adiaeresis */ + { 'Å', "" }, /* Å - Lacute */ + { 'Æ', "" }, /* Æ - Cacute */ + { 'Ç', "Ç" }, /* Ç - Ccedilla */ + { 'È', "" }, /* È - Ccaron */ + { 'É', "É" }, /* É - Eacute */ + { 'Ê', "" }, /* Ê - Eogonek */ + { 'Ë', "Ë" }, /* Ë - Ediaeresis */ + { 'Ì', "" }, /* Ì - Ecaron */ + { 'Í', "Í" }, /* Í - Iacute */ + { 'Î', "Î" }, /* Î - Icircumflex */ + { 'Ï', "" }, /* Ï - Dcaron */ + { 'Ð', "Ð" }, /* Ð - Eth */ + { 'Ñ', "" }, /* Ñ - Nacute */ + { 'Ò', "" }, /* Ò - Ncaron */ + { 'Ó', "Ó" }, /* Ó - Oacute */ + { 'Ô', "Ô" }, /* Ô - Ocircumflex */ + { 'Õ', "" }, /* Õ - Odoubleacute */ + { 'Ö', "Ö" }, /* Ö - Odiaeresis */ + { '×', "×" }, /* × - multiply */ + { 'Ø', "" }, /* Ø - Rcaron */ + { 'Ù', "" }, /* Ù - Uring */ + { 'Ú', "Ú" }, /* Ú - Uacute */ + { 'Û', "" }, /* Û - Udoubleacute */ + { 'Ü', "Ü" }, /* Ü - Udiaeresis */ + { 'Ý', "Ý" }, /* Ý - Yacute */ + { 'Þ', "" }, /* Þ - Tcedilla */ + { 'ß', "ß" }, /* ß - ssharp */ + { 'à', "" }, /* à - racute */ + { 'ß', "á" }, /* á - aacute */ + { 'â', "â" }, /* â - acircumflex */ + { 'ã', "" }, /* ã - abreve */ + { 'ä', "ä" }, /* ä - adiaeresis */ + { 'å', "" }, /* å - lacute */ + { 'æ', "" }, /* æ - cacute */ + { 'ç', "ç" }, /* ç - ccedilla */ + { 'è', "" }, /* è - ccaron */ + { 'é', "é" }, /* é - eacute */ + { 'ê', "" }, /* ê - eogonek */ + { 'ë', "ë" }, /* ë - ediaeresis */ + { 'ì', "" }, /* ì - ecaron */ + { 'í', "í" }, /* í - iacute */ + { 'î', "î" }, /* î - icircumflex */ + { 'ï', "" }, /* ï - dcaron */ + { 'ð', "ð" }, /* ð - eth */ + { 'ñ', "" }, /* ñ - nacute */ + { 'ò', "" }, /* ò - ncaron */ + { 'ó', "ó" }, /* ó - oacute */ + { 'ô', "ô" }, /* ô - ocircumflex */ + { 'õ', "" }, /* õ - odoubleacute */ + { 'ö', "ö" }, /* ö - odiaeresis */ + { '÷', "÷" }, /* ÷ - division */ + { 'ø', "" }, /* ø - rcaron */ + { 'ù', "" }, /* ù - uring */ + { 'ú', "ú" }, /* ú - uacute */ + { 'û', "" }, /* û - udoubleacute */ + { 'ü', "ü" }, /* ü - udiaeresis */ + { 'ý', "ý" }, /* ý - yacute */ + { 'þ', "" }, /* þ - tcedilla */ + { '\255', "" }, /* ÿ - abovedot */ + } ; + struct tCharTrans Char2Url [] = Modified: perl/embperl/trunk/epdat2.h URL: http://svn.apache.org/viewcvs/perl/embperl/trunk/epdat2.h?rev=331953&r1=331952&r2=331953&view=diff ============================================================================== --- perl/embperl/trunk/epdat2.h (original) +++ perl/embperl/trunk/epdat2.h Tue Nov 8 21:11:19 2005 @@ -108,6 +108,7 @@ char cMultFieldSep ; AV * pPathAV ; int nOutputMode ; + int nOutputEscCharset ; /**< 0 = utf-8 (min) 1 = latin1 2 = latin2 */ unsigned bDebug ; unsigned bOptions ; int nSessionMode ; /**< sets how to pass the session id, see smodeXXX constants */ Modified: perl/embperl/trunk/epdefault.c URL: http://svn.apache.org/viewcvs/perl/embperl/trunk/epdefault.c?rev=331953&r1=331952&r2=331953&view=diff ============================================================================== --- perl/embperl/trunk/epdefault.c (original) +++ perl/embperl/trunk/epdefault.c Tue Nov 8 21:11:19 2005 @@ -72,6 +72,7 @@ { pCfg -> cMultFieldSep = '\t' ; pCfg -> nSessionMode = smodeUDatCookie ; + pCfg -> nOutputEscCharset = ocharsetLatin1 ; } Modified: perl/embperl/trunk/epdom.c URL: http://svn.apache.org/viewcvs/perl/embperl/trunk/epdom.c?rev=331953&r1=331952&r2=331953&view=diff ============================================================================== --- perl/embperl/trunk/epdom.c (original) +++ perl/embperl/trunk/epdom.c Tue Nov 8 21:11:19 2005 @@ -3127,8 +3127,8 @@ if (nEscMode != -1) { pNew -> nType = (nEscMode & 8)?ntypText:((nEscMode & 3)?ntypTextHTML:ntypCDATA) ; - pNew -> bFlags &= ~(nflgEscXML + nflgEscUrl + nflgEscChar) ; - pNew -> bFlags |= (nEscMode ^ nflgEscChar) & (nflgEscXML + nflgEscUrl + nflgEscChar) ; + pNew -> bFlags &= ~(nflgEscUTF8 + nflgEscUrl + nflgEscChar) ; + pNew -> bFlags |= (nEscMode ^ nflgEscChar) & (nflgEscUTF8 + nflgEscUrl + nflgEscChar) ; } else pNew -> nType = ntypCDATA ; @@ -3286,8 +3286,8 @@ if (nEscMode != -1) { pNew -> nType = (nEscMode & 8)?ntypText:((nEscMode & 3)?ntypTextHTML:ntypCDATA) ; - pNew -> bFlags &= ~(nflgEscXML + nflgEscUrl + nflgEscChar) ; - pNew -> bFlags |= (nEscMode ^ nflgEscChar) & (nflgEscXML + nflgEscUrl + nflgEscChar) ; + pNew -> bFlags &= ~(nflgEscUTF8 + nflgEscUrl + nflgEscChar) ; + pNew -> bFlags |= (nEscMode ^ nflgEscChar) & (nflgEscUTF8 + nflgEscUrl + nflgEscChar) ; } else pNew -> nType = ntypCDATA ; @@ -3362,8 +3362,8 @@ if (nEscMode != -1) { pOldChild -> nType = (nEscMode & 8)?ntypText:((nEscMode & 3)?ntypTextHTML:ntypCDATA) ; - pOldChild -> bFlags &= ~(nflgEscXML + nflgEscUrl + nflgEscChar) ; - pOldChild -> bFlags |= (nEscMode ^ nflgEscChar) & (nflgEscXML + nflgEscUrl + nflgEscChar) ; + pOldChild -> bFlags &= ~(nflgEscUTF8 + nflgEscUrl + nflgEscChar) ; + pOldChild -> bFlags |= (nEscMode ^ nflgEscChar) & (nflgEscUTF8 + nflgEscUrl + nflgEscChar) ; } else pOldChild -> nType = ntypCDATA ; @@ -3655,6 +3655,14 @@ tRepeatLevel nRepeatLevel = *pRepeatLevel ; tNodeData * pNode = Node_self (pDomTree, xNode) ; tNodeData * pLast ; + struct tCharTrans * pChar2Html ; + + if (r -> Config.nOutputEscCharset == ocharsetLatin1) + pChar2Html = Char2Html ; + else if (r -> Config.nOutputEscCharset == ocharsetLatin2) + pChar2Html = Char2HtmlLatin2 ; + else + pChar2Html = Char2HtmlMin ; if (pNode -> nType == ntypDocumentFraq) @@ -3846,7 +3854,7 @@ char * s ; int l ; Ndx2StringLen (pNode -> nText,s,l) ; - OutputEscape (r, s, l, (pNode -> bFlags & nflgEscXML)?Char2XML:(pNode -> bFlags & nflgEscUrl)?Char2Url:Char2Html, (char)((pNode -> bFlags & nflgEscChar)?'\\':0)) ; + OutputEscape (r, s, l, (pNode -> bFlags & nflgEscUTF8)?(pNode -> bFlags & nflgEscUrl)?Char2Url:Char2HtmlMin:(pNode -> bFlags & nflgEscUrl)?Char2Url:pChar2Html, (char)((pNode -> bFlags & nflgEscChar)?'\\':0)) ; } else { Modified: perl/embperl/trunk/epdom.h URL: http://svn.apache.org/viewcvs/perl/embperl/trunk/epdom.h?rev=331953&r1=331952&r2=331953&view=diff ============================================================================== --- perl/embperl/trunk/epdom.h (original) +++ perl/embperl/trunk/epdom.h Tue Nov 8 21:11:19 2005 @@ -141,11 +141,11 @@ nflgOK = 1, nflgEscUrl = 2, nflgEscChar = 4, - nflgEscXML = 8, nflgIgnore = 16, /**< Ignore this node */ nflgNewLevelNext = 32, /**< Next sibling has new RepeatLevel */ nflgNewLevelPrev = 64, /**< Previous sibling has new RepeatLevel */ - nflgStopOutput = 128 /**< Do not make any further output after this node */ + nflgStopOutput = 8, /**< Do not make any further output after this node */ + nflgEscUTF8 = 128 } ; enum tAttrFlags Modified: perl/embperl/trunk/epinit.c URL: http://svn.apache.org/viewcvs/perl/embperl/trunk/epinit.c?rev=331953&r1=331952&r2=331953&view=diff ============================================================================== --- perl/embperl/trunk/epinit.c (original) +++ perl/embperl/trunk/epinit.c Tue Nov 8 21:11:19 2005 @@ -175,6 +175,15 @@ OPTION_OMODE(Xml) } ; +#define OPTION_OCHARSET(a) OPTION(ocharset##a, a) + +tOptionEntry OptionsOUTPUT_ESC_CHARSET[] = + { + OPTION_OCHARSET(Utf8) + OPTION_OCHARSET(Latin1) + OPTION_OCHARSET(Latin2) + } ; + #define OPTION_SMODE(a) OPTION(smode##a, a) tOptionEntry OptionsSESSION_MODE[] = Modified: perl/embperl/trunk/epmain.c URL: http://svn.apache.org/viewcvs/perl/embperl/trunk/epmain.c?rev=331953&r1=331952&r2=331953&view=diff ============================================================================== --- perl/embperl/trunk/epmain.c (original) +++ perl/embperl/trunk/epmain.c Tue Nov 8 21:11:19 2005 @@ -380,7 +380,18 @@ if (r -> Component.Config.nEscMode & escXML && !r -> Component.bEscInUrl) r -> Component.pNextEscape = Char2XML ; else if (r -> Component.Config.nEscMode & escHtml && !r -> Component.bEscInUrl) - r -> Component.pNextEscape = Char2Html ; + { + struct tCharTrans * pChar2Html ; + + if (r -> Config.nOutputEscCharset == ocharsetLatin1) + pChar2Html = Char2Html ; + else if (r -> Config.nOutputEscCharset == ocharsetLatin2) + pChar2Html = Char2HtmlLatin2 ; + else + pChar2Html = Char2HtmlMin ; + + r -> Component.pNextEscape = pChar2Html ; + } else if (r -> Component.Config.nEscMode & escUrl) r -> Component.pNextEscape = Char2Url ; else Modified: perl/embperl/trunk/eppriv.h URL: http://svn.apache.org/viewcvs/perl/embperl/trunk/eppriv.h?rev=331953&r1=331952&r2=331953&view=diff ============================================================================== --- perl/embperl/trunk/eppriv.h (original) +++ perl/embperl/trunk/eppriv.h Tue Nov 8 21:11:19 2005 @@ -223,6 +223,7 @@ extern tOptionEntry OptionsESCMODE[] ; extern tOptionEntry OptionsINPUT_ESCMODE[] ; extern tOptionEntry OptionsOUTPUT_MODE[] ; +extern tOptionEntry OptionsOUTPUT_ESC_CHARSET[] ; extern tOptionEntry OptionsSESSION_MODE[] ; Modified: perl/embperl/trunk/eputil.c URL: http://svn.apache.org/viewcvs/perl/embperl/trunk/eputil.c?rev=331953&r1=331952&r2=331953&view=diff ============================================================================== --- perl/embperl/trunk/eputil.c (original) +++ perl/embperl/trunk/eputil.c Tue Nov 8 21:11:19 2005 @@ -161,7 +161,19 @@ if ((nEscMode & escXML) && !r -> Component.bEscInUrl) pEscTab = Char2XML ; else if ((nEscMode & escHtml) && !r -> Component.bEscInUrl) - pEscTab = Char2Html ; + { + struct tCharTrans * pChar2Html ; + + if (nEscMode & escHtmlUtf8) + pChar2Html = Char2HtmlMin ; + else if (r -> Config.nOutputEscCharset == ocharsetLatin1) + pChar2Html = Char2Html ; + else if (r -> Config.nOutputEscCharset == ocharsetLatin2) + pChar2Html = Char2HtmlLatin2 ; + else + pChar2Html = Char2HtmlMin ; + pEscTab = pChar2Html ; + } else if (nEscMode & escUrl) pEscTab = Char2Url ; else Modified: perl/embperl/trunk/podsrc/Config.spod URL: http://svn.apache.org/viewcvs/perl/embperl/trunk/podsrc/Config.spod?rev=331953&r1=331952&r2=331953&view=diff ============================================================================== --- perl/embperl/trunk/podsrc/Config.spod (original) +++ perl/embperl/trunk/podsrc/Config.spod Tue Nov 8 21:11:19 2005 @@ -841,6 +841,31 @@ B<NOTE:> If you set output_mode to XML you should also change L<escmode> to XML escaping. +=head2 *CFG $request / Embperl_Output_Esc_Charset / output_esc_charset / no / 2.0.2 / ocharsetLatin1 = 1 + +Set the charset which to assume when escaping. This can only be set +before the request starts (e.g. httpd.conf or top of the page). +Setting it inside the page has undefined results. + +=over + +=item ocharsetUtf8 = 0 + +UTF-8 or any non known charset. Characters with codes above 128 will not be escaped +at all + +=item ocharsetLatin1 = 1 + +ISO-8859-1, the default. When a Perl string has it's utf-8 bit set, this mode +will behave the same as mode 0, i.e. will not escape anything above 128. + +=item ocharsetLatin2 = 2 + +ISO-8859-2. When a Perl string has it's utf-8 bit set, this mode +will behave the same as mode 0, i.e. will not escape anything above 128. + +=back + =head2 *CFG $request / Embperl_SESSION_MODE / session_mode / no / 2.0b6 / smodeUDatCookie = 1 Specifies how the id for the session data is passed between requests. Modified: perl/embperl/trunk/test.pl URL: http://svn.apache.org/viewcvs/perl/embperl/trunk/test.pl?rev=331953&r1=331952&r2=331953&view=diff ============================================================================== --- perl/embperl/trunk/test.pl (original) +++ perl/embperl/trunk/test.pl Tue Nov 8 21:11:19 2005 @@ -200,6 +200,8 @@ 'escraw.htm' => { 'version' => 1, }, + 'escutf8.htm' => { + }, 'spaces.htm' => { 'version' => 1, }, Added: perl/embperl/trunk/test/cmp/escutf8.htm URL: http://svn.apache.org/viewcvs/perl/embperl/trunk/test/cmp/escutf8.htm?rev=331953&view=auto ============================================================================== --- perl/embperl/trunk/test/cmp/escutf8.htm (added) +++ perl/embperl/trunk/test/cmp/escutf8.htm Tue Nov 8 21:11:19 2005 @@ -0,0 +1,101 @@ + +<html> +<head> +<title>Some tests for Embperl UTF8 escaping</title> +</head> + +<body> +------------------------------------------------ +$epreq -> config -> output_esc_charset = 1 +$escmode = $escmodestd ; +abc!"$%&()<>=äöüÄÖÜß -> abc!'"0&()<>=äöüÄÖÜß© +<input name="test" value="abc!'"0&()<>=äöüÄÖÜß©"> +<textarea name="test">abc!'"0&()<>=äöüÄÖÜß©</textarea> +utf8: +abc!"$%&()<>=äöüÄÖÜß -> abc!'"0&()<>=äöüÃÃÃé +<input name="utf8" value="abc!'"0&()<>=äöüÃÃÃé"> +<textarea name="utf8">abc!'"0&()<>=äöüÃÃÃé</textarea> + +$escmode = 8 ; +abc!"$%&()<>=äöüÄÖÜß -> abc!'"0&()<>=äöüÄÖÜß© +<input name="test" value="abc!'"0&()<>=äöüÄÖÜß©"> +<textarea name="test">abc!'"0&()<>=äöüÄÖÜß©</textarea> +utf8: +abc!"$%&()<>=äöüÄÖÜß -> abc!'"0&()<>=äöüÃÃÃé +<input name="utf8" value="abc!'"0&()<>=äöüÃÃÃé"> +<textarea name="utf8">abc!'"0&()<>=äöüÃÃÃé</textarea> + +$escmode = 2 ; +abc!"$%&()<>=äöüÄÖÜß -> abc!%27%220%26()%3C%3E%3D%E4%F6%FC%C4%D6%DC%DF%A9 +<input name="test" value="abc!%27%220%26()%3C%3E%3D%E4%F6%FC%C4%D6%DC%DF%A9"> +<textarea name="test">abc!%27%220%26()%3C%3E%3D%E4%F6%FC%C4%D6%DC%DF%A9</textarea> +utf8: +abc!"$%&()<>=äöüÄÖÜß -> abc!%27%220%26()%3C%3E%3D%C3%A4%C3%B6%C3%BC%C3%84%C3%96%C3%9C%C3%9F%C2%A9 +<input name="utf8" value="abc!%27%220%26()%3C%3E%3D%C3%A4%C3%B6%C3%BC%C3%84%C3%96%C3%9C%C3%9F%C2%A9"> +<textarea name="utf8">abc!%27%220%26()%3C%3E%3D%C3%A4%C3%B6%C3%BC%C3%84%C3%96%C3%9C%C3%9F%C2%A9</textarea> + + +------------------------------------------------ +$epreq -> config -> output_esc_charset = 2 +$escmode = $escmodestd ; +abc!"$%&()<>=äöüÄÖÜß -> abc!'"0&()<>=äöüÄÖÜß© +<input name="test" value="abc!'"0&()<>=äöüÄÖÜß©"> +<textarea name="test">abc!'"0&()<>=äöüÄÖÜß©</textarea> +utf8: +abc!"$%&()<>=äöüÄÖÜß -> abc!'"0&()<>=äöüÃÃÃé +<input name="utf8" value="abc!'"0&()<>=äöüÃÃÃé"> +<textarea name="utf8">abc!'"0&()<>=äöüÃÃÃé</textarea> + +$escmode = 8 ; +abc!"$%&()<>=äöüÄÖÜß -> abc!'"0&()<>=äöüÄÖÜß© +<input name="test" value="abc!'"0&()<>=äöüÄÖÜß©"> +<textarea name="test">abc!'"0&()<>=äöüÄÖÜß©</textarea> +utf8: +abc!"$%&()<>=äöüÄÖÜß -> abc!'"0&()<>=äöüÃÃÃé +<input name="utf8" value="abc!'"0&()<>=äöüÃÃÃé"> +<textarea name="utf8">abc!'"0&()<>=äöüÃÃÃé</textarea> + +$escmode = 2 ; +abc!"$%&()<>=äöüÄÖÜß -> abc!%27%220%26()%3C%3E%3D%E4%F6%FC%C4%D6%DC%DF%A9 +<input name="test" value="abc!%27%220%26()%3C%3E%3D%E4%F6%FC%C4%D6%DC%DF%A9"> +<textarea name="test">abc!%27%220%26()%3C%3E%3D%E4%F6%FC%C4%D6%DC%DF%A9</textarea> +utf8: +abc!"$%&()<>=äöüÄÖÜß -> abc!%27%220%26()%3C%3E%3D%C3%A4%C3%B6%C3%BC%C3%84%C3%96%C3%9C%C3%9F%C2%A9 +<input name="utf8" value="abc!%27%220%26()%3C%3E%3D%C3%A4%C3%B6%C3%BC%C3%84%C3%96%C3%9C%C3%9F%C2%A9"> +<textarea name="utf8">abc!%27%220%26()%3C%3E%3D%C3%A4%C3%B6%C3%BC%C3%84%C3%96%C3%9C%C3%9F%C2%A9</textarea> + + +------------------------------------------------ +$epreq -> config -> output_esc_charset = 0 +$escmode = $escmodestd ; +abc!"$%&()<>=äöüÄÖÜß -> abc!'"0&()<>=äöüÄÖÜß© +<input name="test" value="abc!'"0&()<>=äöüÄÖÜß©"> +<textarea name="test">abc!'"0&()<>=äöüÄÖÜß©</textarea> +utf8: +abc!"$%&()<>=äöüÄÖÜß -> abc!'"0&()<>=äöüÃÃÃé +<input name="utf8" value="abc!'"0&()<>=äöüÃÃÃé"> +<textarea name="utf8">abc!'"0&()<>=äöüÃÃÃé</textarea> + +$escmode = 8 ; +abc!"$%&()<>=äöüÄÖÜß -> abc!'"0&()<>=äöüÄÖÜß© +<input name="test" value="abc!'"0&()<>=äöüÄÖÜß©"> +<textarea name="test">abc!'"0&()<>=äöüÄÖÜß©</textarea> +utf8: +abc!"$%&()<>=äöüÄÖÜß -> abc!'"0&()<>=äöüÃÃÃé +<input name="utf8" value="abc!'"0&()<>=äöüÃÃÃé"> +<textarea name="utf8">abc!'"0&()<>=äöüÃÃÃé</textarea> + +$escmode = 2 ; +abc!"$%&()<>=äöüÄÖÜß -> abc!%27%220%26()%3C%3E%3D%E4%F6%FC%C4%D6%DC%DF%A9 +<input name="test" value="abc!%27%220%26()%3C%3E%3D%E4%F6%FC%C4%D6%DC%DF%A9"> +<textarea name="test">abc!%27%220%26()%3C%3E%3D%E4%F6%FC%C4%D6%DC%DF%A9</textarea> +utf8: +abc!"$%&()<>=äöüÄÖÜß -> abc!%27%220%26()%3C%3E%3D%C3%A4%C3%B6%C3%BC%C3%84%C3%96%C3%9C%C3%9F%C2%A9 +<input name="utf8" value="abc!%27%220%26()%3C%3E%3D%C3%A4%C3%B6%C3%BC%C3%84%C3%96%C3%9C%C3%9F%C2%A9"> +<textarea name="utf8">abc!%27%220%26()%3C%3E%3D%C3%A4%C3%B6%C3%BC%C3%84%C3%96%C3%9C%C3%9F%C2%A9</textarea> + +------------------------------------------------ +$epreq -> config -> output_esc_charset = 1 +$escmode = $escmodestd ; +</body> +</html> \ No newline at end of file Added: perl/embperl/trunk/test/html/escutf8.htm URL: http://svn.apache.org/viewcvs/perl/embperl/trunk/test/html/escutf8.htm?rev=331953&view=auto ============================================================================== --- perl/embperl/trunk/test/html/escutf8.htm (added) +++ perl/embperl/trunk/test/html/escutf8.htm Tue Nov 8 21:11:19 2005 @@ -0,0 +1,129 @@ + +<html> +<head> +<title>Some tests for Embperl UTF8 escaping</title> +</head> + +<body> +[.- +use Encode ; +$fdat{test} = "abc!'\"$%&()<>=äöüÄÖÜß©" ; +$fdat{utf8} = decode("iso-8859-1", $fdat{test}) ; +$escmodestd = $escmode ; +-] + +------------------------------------------------ +$epreq -> config -> output_esc_charset = 1 +[- $epreq -> config -> output_esc_charset (1) -] + +$escmode = $escmodestd ; +[- $escmode = $escmodestd ; -] +abc!"$%&()<>=äöüÄÖÜß -> [+ $fdat{test} +] +<input name="test"> +<textarea name="test"></textarea> +utf8: +abc!"$%&()<>=äöüÄÖÜß -> [+ $fdat{utf8} +] +<input name="utf8"> +<textarea name="utf8"></textarea> + +$escmode = 8 ; +[- $escmode = 8 ; -] +abc!"$%&()<>=äöüÄÖÜß -> [+ $fdat{test} +] +<input name="test"> +<textarea name="test"></textarea> +utf8: +abc!"$%&()<>=äöüÄÖÜß -> [+ $fdat{utf8} +] +<input name="utf8"> +<textarea name="utf8"></textarea> + +$escmode = 2 ; +[- $escmode = 2 ; -] +abc!"$%&()<>=äöüÄÖÜß -> [+ $fdat{test} +] +<input name="test"> +<textarea name="test"></textarea> +utf8: +abc!"$%&()<>=äöüÄÖÜß -> [+ $fdat{utf8} +] +<input name="utf8"> +<textarea name="utf8"></textarea> + + +------------------------------------------------ +$epreq -> config -> output_esc_charset = 2 +[- $epreq -> config -> output_esc_charset (2) -] + +$escmode = $escmodestd ; +[- $escmode = $escmodestd ; -] +abc!"$%&()<>=äöüÄÖÜß -> [+ $fdat{test} +] +<input name="test"> +<textarea name="test"></textarea> +utf8: +abc!"$%&()<>=äöüÄÖÜß -> [+ $fdat{utf8} +] +<input name="utf8"> +<textarea name="utf8"></textarea> + +$escmode = 8 ; +[- $escmode = 8 ; -] +abc!"$%&()<>=äöüÄÖÜß -> [+ $fdat{test} +] +<input name="test"> +<textarea name="test"></textarea> +utf8: +abc!"$%&()<>=äöüÄÖÜß -> [+ $fdat{utf8} +] +<input name="utf8"> +<textarea name="utf8"></textarea> + +$escmode = 2 ; +[- $escmode = 2 ; -] +abc!"$%&()<>=äöüÄÖÜß -> [+ $fdat{test} +] +<input name="test"> +<textarea name="test"></textarea> +utf8: +abc!"$%&()<>=äöüÄÖÜß -> [+ $fdat{utf8} +] +<input name="utf8"> +<textarea name="utf8"></textarea> + + +------------------------------------------------ +$epreq -> config -> output_esc_charset = 0 +[- $epreq -> config -> output_esc_charset (0) -] + +$escmode = $escmodestd ; +[- $escmode = $escmodestd ; -] +abc!"$%&()<>=äöüÄÖÜß -> [+ $fdat{test} +] +<input name="test"> +<textarea name="test"></textarea> +utf8: +abc!"$%&()<>=äöüÄÖÜß -> [+ $fdat{utf8} +] +<input name="utf8"> +<textarea name="utf8"></textarea> + +$escmode = 8 ; +[- $escmode = 8 ; -] +abc!"$%&()<>=äöüÄÖÜß -> [+ $fdat{test} +] +<input name="test"> +<textarea name="test"></textarea> +utf8: +abc!"$%&()<>=äöüÄÖÜß -> [+ $fdat{utf8} +] +<input name="utf8"> +<textarea name="utf8"></textarea> + +$escmode = 2 ; +[- $escmode = 2 ; -] +abc!"$%&()<>=äöüÄÖÜß -> [+ $fdat{test} +] +<input name="test"> +<textarea name="test"></textarea> +utf8: +abc!"$%&()<>=äöüÄÖÜß -> [+ $fdat{utf8} +] +<input name="utf8"> +<textarea name="utf8"></textarea> + +------------------------------------------------ +$epreq -> config -> output_esc_charset = 1 +[- $epreq -> config -> output_esc_charset (1) -] + +$escmode = $escmodestd ; +[- $escmode = $escmodestd ; -] + + + +</body> +</html> \ No newline at end of file Propchange: perl/embperl/trunk/test/html/escutf8.htm ------------------------------------------------------------------------------ svn:executable = * Modified: perl/embperl/trunk/xsbuilder/maps/ep_structure.map URL: http://svn.apache.org/viewcvs/perl/embperl/trunk/xsbuilder/maps/ep_structure.map?rev=331953&r1=331952&r2=331953&view=diff ============================================================================== --- perl/embperl/trunk/xsbuilder/maps/ep_structure.map (original) +++ perl/embperl/trunk/xsbuilder/maps/ep_structure.map Tue Nov 8 21:11:19 2005 @@ -236,6 +236,7 @@ bOptions | options nSessionMode | session_mode nOutputMode | output_mode + nOutputEscCharset | output_esc_charset new ! private </tReqConfig> --------------------------------------------------------------------- To unsubscribe, e-mail: embperl-cvs-unsubscribe [at] perl For additional commands, e-mail: embperl-cvs-help [at] perl
|