# ================================================================== # Gossamer Forum - Advanced web community # # Website : http://gossamer-threads.com/ # Support : http://gossamer-threads.com/scripts/support/ # Revision : $Id: Convert.pm,v 1.43 2002/05/05 20:30:26 jagerman Exp $ # # Copyright (c) 2001 Gossamer Threads Inc. All Rights Reserved. # Redistribution in part or in whole strictly prohibited. Please # see LICENSE file for full details. # ================================================================== # # Converts the HTML and Markup in posts and messages # package GForum::Convert; use strict; use GForum qw/:user :forum $DB $CFG $IN/; use GT::Plugins; use Exporter; use GT::AutoLoader; use vars qw/@EXPORT @ISA $No_Image $Leave_Dot/; @ISA = qw/Exporter/; @EXPORT = qw/escape_html unescape_html escape_string unescape_string convert_signature convert_markup/; # Converts all [signature] tags. Takes two scalar refs as arguments. # All occurances of [signature] within the first argument are replaced # with the second argument (both dereferenced, of course). # Returned is the number of substitutions made. Occurances of [.signature] will # be treated as escaped, and will simply remove the . (or the first . if there # are multiple) and not perform any other replacement. sub convert_signature { GT::Plugins->dispatch("$CFG->{admin_root_path}/Plugins/GForum", 'convert_signature', \&_plg_convert_signature, @_) } sub _plg_convert_signature { my ($text, $sig) = @_; $$text =~ s/\[\s*(\.*)\s*\Q$CFG->{markup_signature_tag}\E\s*\]/ if ($1) { '[' . substr($1, 1) . $CFG->{markup_signature_tag} . ']' } else { $$sig }/egi } # This converts forum markup, such as [#FF0000], into HTML, such as . # Takes a scalar reference which is changed. Nothing is returned. sub convert_markup { GT::Plugins->dispatch("/Plugins/GForum", 'convert_markup', \&_plg_convert_markup, shift) } sub _plg_convert_markup { my $text = shift; if ($CFG->{markup_allow_url}) { # URL tags such as: [url]http://gossamer-threads.com[/url]. $$text =~ s{\[\s*url\s*\](\s*)(.*?)(\s*)\[\s*/url\s*\]} { my ($pre, $post) = ($1, $3); my $eurl = my $url = $2; $eurl =~ s/&/&/g; $eurl =~ s/;/&/g; $eurl = "http://$eurl" unless $eurl =~ m{^\w+:}; $eurl = GT::CGI::escape($eurl); qq|$pre$url$post| }egi; # And e-mail addresses such as [email]jason@gossamer-threads.com[/email]([url "mailto:address"] is a more flexible alternative): $$text =~ s{\[email\](\s*)(.*?)(\s*)\[/email\]} { my $email = escape_string($2); qq|$1$email$3| }egi; } if ($CFG->{markup_allow_image} and not $No_Image) { $$text =~ s{\[\s*ima?ge?\s*\]\s*(.*?)\s*\[\s*/ima?ge?\s*\]} { my $img = $1; $img =~ s/&/&/g; # Keep it from being double-escaped on the next line $img = "http://$img" unless $img =~ m{^(?:https?|ftp)://}; escape_string($img); qq{}; }egi; } # And now, everything else. my ($txt, $last_pos) = ('', 0); # We have to do this as a while(//g) because we need to adjust pos() if a tag isn't matched. while ($$text =~ m{ (\s*) # $1 / $pre - any whitespace before the tag ( # $2 / $whole - keeps track of the full tag entered \[ # Starts a tag \s* # Any whitespace at the beginning of the tag: [ b] ([^]\s]+) # $3 / $tag - the tag name. For example: smile ( # $4 / $args - An argument list. (?: \s+ # Whitespace is required at the beginning and between arguments (?: "[^"]*" # "double quotes" | '[^']*' # 'single quotes' | [^]\s]+ # no_quotes ) )+ )? # End of argument list \s* # Whitespace at the end of the tag: [b ] \] # ] ends the tag ) (\r?\n?) # $5 / $post - keep track of a new line after the tag }gx) { my ($pre, $whole, $tag, $args, $post) = ($1, $2, lc($3), $4, $5); my $this_pos = pos($$text); my $skipped_length = $this_pos - length($post) - length($whole) - length($pre) - $last_pos; $txt .= substr($$text, $last_pos, $skipped_length) unless $skipped_length <= 0; my @args; if ($args) { # Parse the argument list into @args @args = grep defined, $args =~ /"([^"]*)"|'([^']*)'|(\S+)/g; } $tag =~ s/&/&/g; if ($tag =~ /^(\.*\Q$CFG->{markup_signature_tag}\E)$/) { $txt .= "$pre\[$1]$post" } elsif (substr($tag, 0, 1) eq '.') { $whole =~ s/\.// unless $Leave_Dot; # Allows for tag escaping by using a . $txt .= "$pre$whole$post"; } elsif ( (@args ? (exists $CFG->{markup_tags}->{"$tag()"}) : (exists $CFG->{markup_tags}->{$tag})) and ($CFG->{markup_allow_url} or $tag ne 'url' and $tag ne '/url') and ($CFG->{markup_allow_font} or $tag ne 'font' and $tag ne '/font') and ($CFG->{markup_allow_size} or $tag ne 'size' and $tag ne '/size') ) { my ($val, $clear) = @{$CFG->{markup_tags}->{@args ? "$tag()" : $tag}}[0, 2]; if (@args) { for (1 .. @args) { $args[$_ - 1] =~ s/&/&/g; my $cgi_escape = $IN->escape($args[$_ - 1]); $val =~ s/%c$_%/$cgi_escape/g; my $html_escape = escape_html($args[$_ - 1]); $val =~ s/%$_%/$args[$_ - 1]/g; } $val =~ s/%\d+%//g; } $val =~ s/<%image_url%>/$CFG->{image_url}/gi; $val =~ s/<%cgi_root_url%>/$CFG->{cgi_root_url}/gi; if ($clear) { # Clear means following newlines should be stripped; leading ones too if there is a closing tag if (exists $CFG->{markup_tags}->{"/$tag"}) { # an opening tag - it shouldn't keep a following newline $txt .= "$pre$val" } else { # This is closing or a normal tag - it shouldn't keep either leading or following newlines $txt .= $val } } else { $txt .= "$pre$val$post" } } elsif ($CFG->{markup_allow_custom_color} and $tag =~ /^(#[\dA-Fa-f]{6})$/) { $txt .= qq{$pre$post} } elsif ($CFG->{markup_allow_custom_color} and substr($tag, 0, 2) eq '/#') { $txt .= "$pre$post" } else { # We got something like: [ some normal text [tag] # Reset the position to ^ here, instead of ^ here $this_pos -= (length($post) + length($whole) - 1); pos($$text) = $this_pos; $txt .= "$pre\["; } $last_pos = $this_pos; } $txt .= substr($$text, $last_pos); $$text = $txt; for (keys %{$CFG->{markup_custom}}) { my $val = $CFG->{markup_custom}->{$_}; $val =~ s/<%image_url%>/$CFG->{image_url}/gi; $$text =~ s/\Q$_/$val/ig; } return; } # The following subroutines behave different, depending on context. # # void context - escapes the passed in argument directly # # scalar or list - returns an escaped version of the passed in argument # # Note that something like: sub foo { _escape_amp(@_) } will call _escape_amp # in the context that foo() was called! You probably mean to do this instead: # sub foo { _escape_amp(@_); return }, which always calls it in void context. # This HTML escapes any & < or > characters in the string passed in. sub escape_html { if (defined wantarray) { my $str = shift; _escape_amp($str); _escape_lt($str); _escape_gt($str); return $str; } else { _escape_amp($_[0]); _escape_lt($_[0]); _escape_gt($_[0]); } } # This does the exact opposite of escape_html - # unescape_html(escape_html($str)) should return $str. sub unescape_html { if (defined wantarray) { my $str = shift; _unescape_lt($str); _unescape_gt($str); _unescape_amp($str); return $str; } else { _unescape_lt($_[0]); _unescape_gt($_[0]); _unescape_amp($_[0]); } } # This escapes & and " - suitable to be used in a string. sub escape_string { if (defined wantarray) { my $str = shift; _escape_amp($str); _escape_quot($str); return $str; } else { _escape_amp($_[0]); _escape_quot($_[0]); } } # This does the opposite of escape_string - # unescape_string(escape_string($str)) should return $str. sub unescape_string { if (defined wantarray) { my $str = shift; _unescape_amp($str); _unescape_quot($str); return $str; } else { _unescape_amp($_[0]); _unescape_quot($_[0]); } } # HTML escapes & sub _escape_amp { if (defined wantarray) { my $str = shift; $str =~ s/&/&/g; return $str; } else { $_[0] =~ s/&/&/g; return; } } sub _unescape_amp { if (defined wantarray) { my $str = shift; $str =~ s/&/&/g; return $str; } else { $_[0] =~ s/&/&/g; } } # HTML escapes < sub _escape_lt { if (defined wantarray) { my $str = shift; $str =~ s/ sub _escape_gt { if (defined wantarray) { my $str = shift; $str =~ s/>/>/g; return $str; } else { $_[0] =~ s/>/>/g; return; } } sub _unescape_gt { if (defined wantarray) { my $str = shift; $str =~ s/>/>/g; return $str; } else { $_[0] =~ s/>/>/g; return; } } # HTML escapes " # These aren't needed in an HTML escape, however ARE needed when the escape # is being used in a string (such as ). In the same string, # _escape_lt and _escape_gt are _not_ needed. sub _escape_quot { if (defined wantarray) { my $str = shift; $str =~ s/"/"/g; return $str; } else { $_[0] =~ s/"/"/g; return; } } sub _unescape_quot { if (defined wantarray) { my $str = shift; $str =~ s/"/"/g; return $str; } else { $_[0] =~ s/"/"/g; return; } } # This attempts to convert HTML into Markup. The HTML produced should have been # produced by the Advanced/WYSIWYG Editor. $COMPILE{advanced_editor_convert} = __LINE__ . <<'END_OF_SUB'; sub advanced_editor_convert { my $html = shift; my ($post, $code) = advanced_editor_tags(); require GT::HTML::Parser; my $parser = GT::HTML::Parser->new(events => $code); $parser->parse($html); # $post is created from the relevant pieces of $html return $$post; } END_OF_SUB # Returns a scalar reference and a hash ref. The hash contains keys and code # references meant for advanced_editor_tags. The code references should append # to the scalar when called so that after calling GT::HTML::Parser->parse, the # scalar reference will contain the markup. # Typically, to extend markup with a plugin, you will use a POST hook, then add # to the code reference, appending to the scalar reference. $COMPILE{advanced_editor_tags} = __LINE__ . <<'END_OF_SUB'; sub advanced_editor_tags { my $post = ''; my (@font_stack, @p_stack); # $simple_start and $simple_end are used for simple replacements like => [b], => [/i] my $simple_start = sub { $post .= "[" . pop()->{name} . "]" }; my $simple_end = sub { $post .= "[/" . pop()->{name} . "]" }; my $found_text; my %html_unescape = (nbsp => ' ', gt => '>', lt => '<', quot => '"', amp => '&'); my %code = ( text => sub { my $data = pop()->{data}; $data =~ s/\r?\n//g; $data =~ s/&(nbsp|gt|lt|quot|amp);/$html_unescape{$1}/g; $post .= $data; $found_text = 1 }, space => sub { $post .= " " if $found_text }, start_br => sub { $post .= "\n" }, start_p => sub { my $tags = pop()->{tags}; if ($tags->{align} and $tags->{align}->[0] =~ /^(left|center|right)$/) { $post .= "[$1]"; push @p_stack, $1; } elsif ($found_text) { $post .= "\n\n"; } }, end_p => sub { if (@p_stack) { $post .= "[/" . pop(@p_stack) . "]"; } }, skip_comment => sub { my $data = pop()->{data}; if ($data =~ m{^((/)?(?:reply|quote|pre|code))$}) { $post =~ s/(?:\r?\n[ \t]*){3}$/\n\n/ if $2; # For some reason, the editor puts in an extra line $post .= "[$data]\n"; return 'end_table'; } else { return undef; } }, start_font => sub { my $tags = pop()->{tags}; my $i = ++$#font_stack; if ($tags->{face} and index($tags->{face}->[0], '"') == -1) { $post .= qq|[font "$tags->{face}->[0]"]|; $font_stack[$i]->[0] = 1; } if ($tags->{color} and $tags->{color}->[0] =~ /^(black|blue|green|orange|purple|red|silver|white|yellow|#[0-9A-F]{6})$/i) { $post .= "[$1]"; $font_stack[$i]->[1] = $1; } if ($tags->{size} and $tags->{size}->[0] =~ /^(\d+)$/) { $post .= "[size $1]"; $font_stack[$i]->[2] = 1; } }, end_font => sub { my $closing = pop @font_stack; if ($closing->[2]) { $post .= qq|[/size]|; } if ($closing->[1]) { $post .= qq|[/$closing->[1]]|; } if ($closing->[0]) { $post .= qq|[/font]|; } }, start_strong => sub { $post .= "[b]" }, end_strong => sub { $post .= "[/b]" }, start_em => sub { $post .= "[i]" }, end_em => sub { $post .= "[/i]" }, start_hr => $simple_start, start_blockquote => sub { $post .= "[indent]" }, end_blockquote => sub { $post .= "[/indent]" }, start_a => sub { my $tags = pop()->{tags}; if ($tags->{href}) { my $url = $tags->{href}->[0]; # The advanced editor will change: into a full URL. # So, find the relative URL from cgi_root_url, and if the a href contains it, take it out and # unescape it for the markup tag. my ($web_relative) = $CFG->{cgi_root_url} =~ m{^(?:\w+://[a-zA-Z0-9.-]+)?(.*?)/?$}; if ($web_relative) { if ($url =~ s{^(?:\w+://[a-zA-Z0-9.-]+)?\Q$web_relative\E/gforum\.cgi\?url=([^&;]+)}//) { my $eurl = $1; $eurl =~ s/%([0-9A-F]{2})/chr hex $1/eg; $url = $eurl; } } escape_html($url); $post .= qq|[url "$url"]|; } }, end_a => sub { $post .= qq|[/url]| }, start_img => sub { my $src = pop()->{tags}->{src}->[0]; if ($src =~ m{^\Q$CFG->{image_url}\E/(angelic|blush|cool|crazy|frown|laugh|mad|pirate|shocked|sly|smile|tongue|unimpressed|unsure|wink)\.gif$}) { my $icon = $1; if ($icon eq 'frown') { $icon = ':('; } elsif ($icon eq 'smile') { $icon = ':)'; } elsif ($icon eq 'wink') { $icon = ';)'; } $post .= "[$icon]"; } elsif ($src and $CFG->{markup_allow_image}) { $post .= "[img]$src\[/img]"; } } ); for (qw/b i u s strike ul ol li/) { $code{"start_$_"} = $simple_start; $code{"end_$_"} = $simple_end; } return(\$post, \%code); } END_OF_SUB 1;