# ================================================================== # Gossamer Forum - Advanced web community # # Website : http://gossamer-threads.com/ # Support : http://gossamer-threads.com/scripts/support/ # Revision : $Id: Convert.pm,v 1.52.2.4 2003/04/13 20:14:25 jagerman Exp $ # # Copyright (c) 2003 Gossamer Threads Inc. All Rights Reserved. # Redistribution in part or in whole strictly prohibited. Please # see LICENSE file for full details. # ================================================================== # # Converts the HTML and Markup in posts and messages # package GForum::Convert; use strict; use GForum qw/:user :forum $DB $CFG $IN/; use GT::Plugins; use Exporter; use GT::AutoLoader; use vars qw/@EXPORT @ISA $No_Image $Leave_Dot/; @ISA = qw/Exporter/; @EXPORT = qw/escape_html unescape_html escape_string unescape_string convert_signature convert_markup/; # Converts all [signature] tags. Takes two scalar refs as arguments. # All occurances of [signature] within the first argument are replaced # with the second argument (both dereferenced, of course). # Returned is the number of substitutions made. Occurances of [.signature] will # be treated as escaped, and will simply remove the . (or the first . if there # are multiple) and not perform any other replacement. sub convert_signature { GT::Plugins->dispatch("$CFG->{admin_root_path}/Plugins/GForum", 'convert_signature', \&_plg_convert_signature, @_) } sub _plg_convert_signature { my ($text, $sig) = @_; $$text =~ s/\[\s*(\.*)\s*\Q$CFG->{markup_signature_tag}\E\s*\]/ if ($1) { '[' . substr($1, 1) . $CFG->{markup_signature_tag} . ']' } elsif ($$sig =~ m|\S|) { $CFG->{markup_signature_prefix} . $$sig . $CFG->{markup_signature_suffix} } else { '' } /egi } # This converts forum markup, such as [#FF0000], into HTML, such as . # Takes a scalar reference which is changed. Nothing is returned. sub convert_markup { GT::Plugins->dispatch("/Plugins/GForum", 'convert_markup', \&_plg_convert_markup, shift) } sub _plg_convert_markup { my $text = shift; # We need this hash to get at the value, since markup_custom could contain Foo, # but we need to match foo, fOO, FOO, etc. my %lc_custom = map { (lc($_) => $CFG->{markup_custom}->{$_}) } keys %{$CFG->{markup_custom}}; my $text_repl = '| (' . join('|', map quotemeta, sort { length($b) <=> length($a) } keys %lc_custom) . ')'; $text_repl = '' if $text_repl eq '| ()'; # And now, everything else. my ($txt, $last_pos, $enclosing_img, $enclosing_url, $enclosing_email, $save_pre) = ('', 0); # $in_enclosing lets us know if we are inside [img]...[/img] # We have to do this as a while(//g) because we need to adjust pos() if a tag isn't matched. while ($$text =~ m{ (\s*) # $1 / $pre - any whitespace before the tag ( # $2 / $whole - keeps track of the full tag entered \[ # Starts a tag \s* # Any whitespace at the beginning of the tag: [ b] ([^]\s]+) # $3 / $tag - the tag name. For example: smile ( # $4 / $args - An argument list. (?: \s+ # Whitespace is required at the beginning and between arguments (?: "[^"]*" # "double quotes" | '[^']*' # 'single quotes' | [^]\s]+ # no_quotes ) ){1,9} )? # End of argument list \s* # Whitespace at the end of the tag: [b ] \] # ] ends the tag ) (\r?\n?) # $5 / $post - keep track of a new line after the tag $text_repl # This will either be: '| (a|b|...)', or empty. }gix) { my ($pre, $whole, $tag, $args, $post, $custom) = ($1, $2, lc($3), $4, $5, $6); # Handling custom is the easiest, so get it out of the way first my $this_pos = pos($$text); my $skipped_length = $this_pos - ($custom ? length($custom) : (length($post) + length($whole) + length($pre))) - $last_pos; unless ($enclosing_img or $enclosing_url or $enclosing_email) { # Print out everything up to this tag since the last tag $txt .= substr($$text, $last_pos, $skipped_length) unless $skipped_length <= 0; if (defined $custom) { $txt .= $lc_custom{lc $custom}; $last_pos = $this_pos; next; } } my @args; if ($args) { # Parse the argument list into @args @args = grep defined, $args =~ /"([^"]*)"|'([^']*)'|(\S+)/g; } if (!@args and $CFG->{markup_allow_image} and not $No_Image and not $enclosing_url and not $enclosing_email) { if ($tag =~ /^ima?ge?$/i) { $save_pre = $pre; $enclosing_img = 1; } elsif ($enclosing_img and $tag =~ /^\/ima?ge?$/i) { $enclosing_img = 0; my $img = substr($$text, $last_pos, $skipped_length); $img =~ s/&/&/g; $img = "http://$img" unless $img =~ m{^(?:https?|ftp)://}i; escape_string($img); $txt .= qq{$save_pre$post}; $last_pos = $this_pos; next; } elsif ($enclosing_img) { next; } } if (!@args and $CFG->{markup_allow_url} and not $enclosing_img) { if (lc $tag eq 'url' and not $enclosing_email) { $save_pre = $pre; $enclosing_url = 1; } elsif ($enclosing_url and lc $tag eq '/url' and not $enclosing_email) { $enclosing_url = 0; # URL tags such as: [url]http://gossamer-threads.com[/url]. my $inner = substr($$text, $last_pos, $skipped_length); my ($leading, $url, $trailing) = $inner =~ /^(\s*)(.*?)(\s*)$/; $url = "http://$url" unless $url =~ m{^(?:https?|ftp|news|nntp|telnet|gopher|mailto|aim):}i; my $eurl = $url; $eurl =~ s/&/&/g; $eurl =~ s/;/&/g; $eurl = GT::CGI::escape($eurl); # People logging in without cookies have to redirect through gforum.cgi so that # their session= isn't made available via HTTP_REFERER. Everyone else can go # straight to the URL. if ($GForum::SESSION_TYPE and $GForum::SESSION_TYPE eq 'param') { $txt .= qq|$save_pre$leading$url$trailing$post|; } else { $url =~ s/"/"/g; $txt .= qq|$save_pre$leading$url$trailing$post|; } $last_pos = $this_pos; next; } elsif ($enclosing_url) { next; } elsif (lc $tag eq 'email' and not $enclosing_url) { $save_pre = $pre; $enclosing_email = 1; } elsif ($enclosing_email and $tag eq '/email') { $enclosing_email = 0; my $inner = substr($$text, $last_pos, $skipped_length); my ($leading, $email, $trailing) = $inner =~ /^(\s*)(.*?)(\s*)$/; escape_string($email); $txt .= qq|$save_pre$leading$email$trailing$post|; $last_pos = $this_pos; next; } elsif ($enclosing_email) { next; } } if ($enclosing_url or $enclosing_email or $enclosing_img) { $last_pos = $this_pos; next; } $tag =~ s/&/&/g; if ($tag =~ /^(\.*\Q$CFG->{markup_signature_tag}\E)$/) { $txt .= "$pre\[$1]$post" } elsif (substr($tag, 0, 1) eq '.') { $whole =~ s/\.// unless $Leave_Dot; # Allows for tag escaping by using a . $txt .= "$pre$whole$post"; } elsif ( (@args ? (exists $CFG->{markup_tags}->{"$tag()"}) : (exists $CFG->{markup_tags}->{$tag})) and ($CFG->{markup_allow_url} or $tag ne 'url' and $tag ne '/url') and ($CFG->{markup_allow_font} or $tag ne 'font' and $tag ne '/font') and ($CFG->{markup_allow_size} or $tag ne 'size' and $tag ne '/size') ) { my ($val, $clear) = @{$CFG->{markup_tags}->{@args ? "$tag()" : $tag}}[0, 2]; if ($tag eq 'url') { unless($GForum::SESSION_TYPE and $GForum::SESSION_TYPE eq 'param') { # Cookie-authentication users can skip the redirection. This tries to translate # the redirection into a normal URL for those users. $val =~ s|escape($args[$_ - 1]); $val =~ s/%c$_%/$cgi_escape/g; $val =~ s/%$_%/$html_escaped/g; } $val =~ s/%\d+%//g; } $val =~ s/<%image_url%>/$CFG->{image_url}/gi; $val =~ s/<%cgi_root_url%>/$CFG->{cgi_root_url}/gi; if ($clear) { # Clear means following newlines should be stripped; leading ones too if there is a closing tag if (exists $CFG->{markup_tags}->{"/$tag"}) { # an opening tag - it shouldn't keep a following newline $txt .= "$pre$val" } else { # This is closing or a normal tag - it shouldn't keep either leading or following newlines $txt .= $val } } else { $txt .= "$pre$val$post" } } elsif ($CFG->{markup_allow_custom_color} and $tag =~ /^(#[\dA-Fa-f]{6})$/) { $txt .= qq{$pre$post} } elsif ($CFG->{markup_allow_custom_color} and substr($tag, 0, 2) eq '/#') { $txt .= "$pre$post" } else { # We got something like: [ some normal text [tag] # Reset the position to ^ here, instead of ^ here $this_pos -= (length($post) + length($whole) - 1); pos($$text) = $this_pos; $txt .= "$pre\["; } $last_pos = $this_pos; } $txt .= substr($$text, $last_pos); $$text = $txt; return; } # The following subroutines behave differently, depending on context. # # void context - escapes the passed in argument directly # # scalar or list - returns an escaped version of the passed in argument # # Note that something like: sub foo { _escape_amp(@_) } will call _escape_amp # in the context that foo() was called! You probably mean to do this instead: # sub foo { _escape_amp(@_); return }, which always calls it in void context. # This HTML escapes any & < or > characters in the string passed in. sub escape_html { if (defined wantarray) { my $str = shift; _escape_amp($str); _escape_lt($str); _escape_gt($str); return $str; } else { _escape_amp($_[0]); _escape_lt($_[0]); _escape_gt($_[0]); } } # This does the exact opposite of escape_html - # unescape_html(escape_html($str)) should return $str. sub unescape_html { if (defined wantarray) { my $str = shift; _unescape_lt($str); _unescape_gt($str); _unescape_amp($str); return $str; } else { _unescape_lt($_[0]); _unescape_gt($_[0]); _unescape_amp($_[0]); } } # This escapes & and " - suitable to be used in a string. sub escape_string { if (defined wantarray) { my $str = shift; _escape_amp($str); _escape_quot($str); return $str; } else { _escape_amp($_[0]); _escape_quot($_[0]); } } # This does the opposite of escape_string - # unescape_string(escape_string($str)) should return $str. sub unescape_string { if (defined wantarray) { my $str = shift; _unescape_amp($str); _unescape_quot($str); return $str; } else { _unescape_amp($_[0]); _unescape_quot($_[0]); } } # HTML escapes & sub _escape_amp { if (defined wantarray) { my $str = shift; $str =~ s/&/&/g; return $str; } else { $_[0] =~ s/&/&/g; return; } } sub _unescape_amp { if (defined wantarray) { my $str = shift; $str =~ s/&/&/g; return $str; } else { $_[0] =~ s/&/&/g; } } # HTML escapes < sub _escape_lt { if (defined wantarray) { my $str = shift; $str =~ s/ sub _escape_gt { if (defined wantarray) { my $str = shift; $str =~ s/>/>/g; return $str; } else { $_[0] =~ s/>/>/g; return; } } sub _unescape_gt { if (defined wantarray) { my $str = shift; $str =~ s/>/>/g; return $str; } else { $_[0] =~ s/>/>/g; return; } } # HTML escapes " # These aren't needed in an HTML escape, however ARE needed when the escape # is being used in a string (such as ). In the same string, # _escape_lt and _escape_gt are _not_ needed. sub _escape_quot { if (defined wantarray) { my $str = shift; $str =~ s/"/"/g; return $str; } else { $_[0] =~ s/"/"/g; return; } } sub _unescape_quot { if (defined wantarray) { my $str = shift; $str =~ s/"/"/g; return $str; } else { $_[0] =~ s/"/"/g; return; } } # This attempts to convert HTML into Markup. The HTML produced should have been # produced by the Advanced/WYSIWYG Editor. $COMPILE{advanced_editor_convert} = __LINE__ . <<'END_OF_SUB'; sub advanced_editor_convert { my $html = shift; my ($post, $code) = advanced_editor_tags(); require GT::HTML::Parser; my $parser = GT::HTML::Parser->new(events => $code); $parser->parse($html); # $post is created from the relevant pieces of $html return $$post; } END_OF_SUB # Returns a scalar reference and a hash ref. The hash contains keys and code # references meant for advanced_editor_tags. The code references should append # to the scalar when called so that after calling GT::HTML::Parser->parse, the # scalar reference will contain the markup. # Typically, to extend markup with a plugin, you will use a POST hook, then add # to the code reference, appending to the scalar reference. $COMPILE{advanced_editor_tags} = __LINE__ . <<'END_OF_SUB'; sub advanced_editor_tags { my $post = ''; my (@font_stack, @p_stack); # $simple_start and $simple_end are used for simple replacements like => [b], => [/i] my $simple_start = sub { $post .= "[" . pop()->{name} . "]" }; my $simple_end = sub { $post .= "[/" . pop()->{name} . "]" }; my $found_text; my %html_unescape = (nbsp => ' ', gt => '>', lt => '<', quot => '"', amp => '&'); my %code = ( text => sub { my $data = pop()->{data}; $data =~ s/\r?\n//g; $data =~ s/&(nbsp|gt|lt|quot|amp);/$html_unescape{$1}/g; $post .= $data; $found_text = 1 }, space => sub { $post .= " " if $found_text }, start_br => sub { $post .= "\n" }, start_p => sub { my $tags = pop()->{tags}; if ($tags->{align} and $tags->{align}->[0] =~ /^(left|center|right)$/) { $post .= "[$1]"; push @p_stack, $1; } elsif ($found_text) { $post .= "\n\n"; } }, end_p => sub { if (@p_stack) { $post .= "[/" . pop(@p_stack) . "]"; } }, skip_comment => sub { my $data = pop()->{data}; if ($data =~ m{^((/)?(?:reply|quote|pre|code))$}) { $post =~ s/(?:\r?\n[ \t]*){3}$/\n\n/ if $2; # For some reason, the editor puts in an extra line $post .= "[$data]\n"; return 'end_table'; } else { return undef; } }, start_font => sub { my $tags = pop()->{tags}; my $i = ++$#font_stack; if ($tags->{face} and index($tags->{face}->[0], '"') == -1) { $post .= qq|[font "$tags->{face}->[0]"]|; $font_stack[$i]->[0] = 1; } if ($tags->{color} and $tags->{color}->[0] =~ /^(black|blue|green|orange|purple|red|silver|white|yellow|#[0-9A-F]{6})$/i) { $post .= "[$1]"; $font_stack[$i]->[1] = $1; } if ($tags->{size} and $tags->{size}->[0] =~ /^(\d+)$/) { $post .= "[size $1]"; $font_stack[$i]->[2] = 1; } }, end_font => sub { my $closing = pop @font_stack; if ($closing->[2]) { $post .= qq|[/size]|; } if ($closing->[1]) { $post .= qq|[/$closing->[1]]|; } if ($closing->[0]) { $post .= qq|[/font]|; } }, start_strong => sub { $post .= "[b]" }, end_strong => sub { $post .= "[/b]" }, start_em => sub { $post .= "[i]" }, end_em => sub { $post .= "[/i]" }, start_hr => $simple_start, start_blockquote => sub { $post .= "[indent]" }, end_blockquote => sub { $post .= "[/indent]" }, start_a => sub { my $tags = pop()->{tags}; if ($tags->{href}) { my $url = $tags->{href}->[0]; # The advanced editor will change: into a full URL. # So, find the relative URL from cgi_root_url, and if the a href contains it, take it out and # unescape it for the markup tag. my ($web_relative) = $CFG->{cgi_root_url} =~ m{^(?:\w+://[a-zA-Z0-9.-]+)?(.*?)/?$}; if ($web_relative) { if ($url =~ s{^(?:\w+://[a-zA-Z0-9.-]+)?\Q$web_relative\E/gforum\.cgi\?url=([^&;]+)}//) { my $eurl = $1; $eurl =~ s/%([0-9A-F]{2})/chr hex $1/eg; $url = $eurl; } } $url =~ s/&/&/g; $post .= qq|[url "$url"]|; } }, end_a => sub { $post .= qq|[/url]| }, start_img => sub { my $src = pop()->{tags}->{src}->[0]; if ($src =~ m{^\Q$CFG->{image_url}\E/(angelic|blush|cool|crazy|frown|laugh|mad|pirate|shocked|sly|smile|tongue|unimpressed|unsure|wink)\.gif$}) { my $icon = $1; if ($icon eq 'frown') { $icon = ':('; } elsif ($icon eq 'smile') { $icon = ':)'; } elsif ($icon eq 'wink') { $icon = ';)'; } $post .= "[$icon]"; } elsif ($src and $CFG->{markup_allow_image}) { $post .= "[img]$src\[/img]"; } } ); for (qw/b i u s strike ul ol li/) { $code{"start_$_"} = $simple_start; $code{"end_$_"} = $simple_end; } return(\$post, \%code); } END_OF_SUB 1;