# ==================================================================
# Gossamer Forum - Advanced web community
#
#   Website  : http://gossamer-threads.com/
#   Support  : http://gossamer-threads.com/scripts/support/
#   Revision : $Id: Convert.pm,v 1.43 2002/05/05 20:30:26 jagerman Exp $
# 
# Copyright (c) 2001 Gossamer Threads Inc.  All Rights Reserved.
# Redistribution in part or in whole strictly prohibited. Please
# see LICENSE file for full details.
# ==================================================================
#
# Converts the HTML and Markup in posts and messages
#
package GForum::Convert;
use strict;

use GForum qw/:user :forum $DB $CFG $IN/;
use GT::Plugins;
use Exporter;
use GT::AutoLoader;

use vars qw/@EXPORT @ISA $No_Image $Leave_Dot/;

@ISA = qw/Exporter/;
@EXPORT = qw/escape_html unescape_html escape_string unescape_string
             convert_signature convert_markup/;

# Converts all [signature] tags. Takes two scalar refs as arguments.
# All occurances of [signature] within the first argument are replaced
# with the second argument (both dereferenced, of course).
# Returned is the number of substitutions made. Occurances of [.signature] will
# be treated as escaped, and will simply remove the . (or the first . if there
# are multiple) and not perform any other replacement.
sub convert_signature { GT::Plugins->dispatch("$CFG->{admin_root_path}/Plugins/GForum", 'convert_signature', \&_plg_convert_signature, @_) }
sub _plg_convert_signature {
    my ($text, $sig) = @_;
    $$text =~ s/\[\s*(\.*)\s*\Q$CFG->{markup_signature_tag}\E\s*\]/
        if ($1) {
            '[' . substr($1, 1) . $CFG->{markup_signature_tag} . ']'
        }
        else {
            $$sig
        }/egi
}

# This converts forum markup, such as [#FF0000], into HTML, such as <font color="#FF0000">.
# Takes a scalar reference which is changed. Nothing is returned.
sub convert_markup { GT::Plugins->dispatch("/Plugins/GForum", 'convert_markup', \&_plg_convert_markup, shift) }
sub _plg_convert_markup {
    my $text = shift;

    if ($CFG->{markup_allow_url}) {
        # URL tags such as: [url]http://gossamer-threads.com[/url].
        $$text =~ s{\[\s*url\s*\](\s*)(.*?)(\s*)\[\s*/url\s*\]}
            {
                my ($pre, $post) = ($1, $3);
                my $eurl = my $url = $2;
                $eurl =~ s/&amp;/&/g;
                $eurl =~ s/;/&/g;
                $eurl = "http://$eurl" unless $eurl =~ m{^\w+:};
                $eurl = GT::CGI::escape($eurl);
                qq|<a href="gforum.cgi?url=$eurl" target="_blank">$pre$url$post</a>|
            }egi;
        # And e-mail addresses such as [email]jason@gossamer-threads.com[/email]([url "mailto:address"] is a more flexible alternative):
        $$text =~ s{\[email\](\s*)(.*?)(\s*)\[/email\]}
            {
                my $email = escape_string($2);
                qq|<a href="mailto:$email">$1$email$3</a>|
            }egi;
    }
    if ($CFG->{markup_allow_image} and not $No_Image) {
        $$text =~ s{\[\s*ima?ge?\s*\]\s*(.*?)\s*\[\s*/ima?ge?\s*\]}
            {
                my $img = $1;
                $img =~ s/&amp;/&/g; # Keep it from being double-escaped on the next line
                $img = "http://$img" unless $img =~ m{^(?:https?|ftp)://};
                escape_string($img);
                qq{<img src="$img" border="0" />};
            }egi;
    }
    # And now, everything else.
    my ($txt, $last_pos) = ('', 0); # We have to do this as a while(//g) because we need to adjust pos() if a tag isn't matched.
    while ($$text =~ m{
            (\s*)               # $1 / $pre - any whitespace before the tag
            (                   # $2 / $whole - keeps track of the full tag entered
                \[              # Starts a tag
                    \s*         # Any whitespace at the beginning of the tag: [  b]
                    ([^]\s]+)   # $3 / $tag - the tag name. For example: smile
                    (           # $4 / $args - An argument list.
                        (?:
                            \s+         # Whitespace is required at the beginning and between arguments
                            (?:
                                "[^"]*" # "double quotes"
                                |
                                '[^']*' # 'single quotes'
                                |
                                [^]\s]+ # no_quotes
                            )
                        )+
                    )?          # End of argument list
                    \s*         # Whitespace at the end of the tag: [b  ]
                \]              # ] ends the tag
            )
            (\r?\n?)            # $5 / $post - keep track of a new line after the tag
    }gx) {
        my ($pre, $whole, $tag, $args, $post) = ($1, $2, lc($3), $4, $5);
        my $this_pos = pos($$text);
        my $skipped_length = $this_pos - length($post) - length($whole) - length($pre) - $last_pos;
        $txt .= substr($$text, $last_pos, $skipped_length) unless $skipped_length <= 0;
        my @args;
        if ($args) {
            # Parse the argument list into @args
            @args = grep defined, $args =~ /"([^"]*)"|'([^']*)'|(\S+)/g;
        }
        $tag =~ s/&amp;/&/g;
        if ($tag =~ /^(\.*\Q$CFG->{markup_signature_tag}\E)$/) {
            $txt .= "$pre\[$1]$post"
        }
        elsif (substr($tag, 0, 1) eq '.') {
            $whole =~ s/\.// unless $Leave_Dot; # Allows for tag escaping by using a .
            $txt .= "$pre$whole$post";
        }
        elsif (
            (@args ? (exists $CFG->{markup_tags}->{"$tag()"}) : (exists $CFG->{markup_tags}->{$tag})) and
            ($CFG->{markup_allow_url}  or $tag ne 'url'  and $tag ne '/url')  and
            ($CFG->{markup_allow_font} or $tag ne 'font' and $tag ne '/font') and
            ($CFG->{markup_allow_size} or $tag ne 'size' and $tag ne '/size')
        ) {
            my ($val, $clear) = @{$CFG->{markup_tags}->{@args ? "$tag()" : $tag}}[0, 2];
            if (@args) {
                for (1 .. @args) {
                    $args[$_ - 1] =~ s/&amp;/&/g;
                    my $cgi_escape = $IN->escape($args[$_ - 1]);
                    $val =~ s/%c$_%/$cgi_escape/g;
                    my $html_escape = escape_html($args[$_ - 1]);
                    $val =~ s/%$_%/$args[$_ - 1]/g;
                }
                $val =~ s/%\d+%//g;
            }

            $val =~ s/<%image_url%>/$CFG->{image_url}/gi;
            $val =~ s/<%cgi_root_url%>/$CFG->{cgi_root_url}/gi;

            if ($clear) { # Clear means following newlines should be stripped; leading ones too if there is a closing tag
                if (exists $CFG->{markup_tags}->{"/$tag"}) { # an opening tag - it shouldn't keep a following newline
                    $txt .= "$pre$val"
                }
                else { # This is closing or a normal tag - it shouldn't keep either leading or following newlines
                    $txt .= $val
                }
            }
            else {
                $txt .= "$pre$val$post"
            }
        }
        elsif ($CFG->{markup_allow_custom_color} and $tag =~ /^(#[\dA-Fa-f]{6})$/) {
            $txt .= qq{$pre<font color="$1">$post}
        }
        elsif ($CFG->{markup_allow_custom_color} and substr($tag, 0, 2) eq '/#') {
            $txt .= "$pre</font>$post"
        }
        else {
            # We got something like: [ some normal text [tag]
            # Reset the position to   ^ here, instead of     ^ here
            $this_pos -= (length($post) + length($whole) - 1);
            pos($$text) = $this_pos;
            $txt .= "$pre\[";
        }
        $last_pos = $this_pos;
    }
    $txt .= substr($$text, $last_pos);
    $$text = $txt;
    for (keys %{$CFG->{markup_custom}}) {
        my $val = $CFG->{markup_custom}->{$_};
        $val =~ s/<%image_url%>/$CFG->{image_url}/gi;
        $$text =~ s/\Q$_/$val/ig;
    }
    return;
}

# The following subroutines behave different, depending on context.
#
# void context - escapes the passed in argument directly
#
# scalar or list - returns an escaped version of the passed in argument
#
# Note that something like: sub foo { _escape_amp(@_) } will call _escape_amp
# in the context that foo() was called! You probably mean to do this instead:
# sub foo { _escape_amp(@_); return }, which always calls it in void context.

# This HTML escapes any & < or > characters in the string passed in.
sub escape_html {
    if (defined wantarray) {
        my $str = shift;
        _escape_amp($str);
        _escape_lt($str);
        _escape_gt($str);
        return $str;
    }
    else {
        _escape_amp($_[0]);
        _escape_lt($_[0]);
        _escape_gt($_[0]);
    }
}

# This does the exact opposite of escape_html -
# unescape_html(escape_html($str)) should return $str.
sub unescape_html {
    if (defined wantarray) {
        my $str = shift;
        _unescape_lt($str);
        _unescape_gt($str);
        _unescape_amp($str);
        return $str;
    }
    else {
        _unescape_lt($_[0]);
        _unescape_gt($_[0]);
        _unescape_amp($_[0]);
    }
}

# This escapes & and " - suitable to be used in a string.
sub escape_string {
    if (defined wantarray) {
        my $str = shift;
        _escape_amp($str);
        _escape_quot($str);
        return $str;
    }
    else {
        _escape_amp($_[0]);
        _escape_quot($_[0]);
    }
}

# This does the opposite of escape_string -
# unescape_string(escape_string($str)) should return $str.
sub unescape_string {
    if (defined wantarray) {
        my $str = shift;
        _unescape_amp($str);
        _unescape_quot($str);
        return $str;
    }
    else {
        _unescape_amp($_[0]);
        _unescape_quot($_[0]);
    }
}

# HTML escapes &
sub _escape_amp {
    if (defined wantarray) {
        my $str = shift;
        $str =~ s/&/&amp;/g;
        return $str;
    }
    else {
        $_[0] =~ s/&/&amp;/g;
        return;
    }
}

sub _unescape_amp {
    if (defined wantarray) {
        my $str = shift;
        $str =~ s/&amp;/&/g;
        return $str;
    }
    else {
        $_[0] =~ s/&amp;/&/g;
    }
}

# HTML escapes <
sub _escape_lt {
    if (defined wantarray) {
        my $str = shift;
        $str =~ s/</&lt;/g;
        return $str;
    }
    else {
        $_[0] =~ s/</&lt;/g;
        return;
    }
}

sub _unescape_lt {
    if (defined wantarray) {
        my $str = shift;
        $str =~ s/&lt;/</g;
        return $str;
    }
    else {
        $_[0] =~ s/&lt;/</g;
        return;
    }
}

# HTML escapes >
sub _escape_gt {
    if (defined wantarray) {
        my $str = shift;
        $str =~ s/>/&gt;/g;
        return $str;
    }
    else {
        $_[0] =~ s/>/&gt;/g;
        return;
    }
}

sub _unescape_gt {
    if (defined wantarray) {
        my $str = shift;
        $str =~ s/&gt;/>/g;
        return $str;
    }
    else {
        $_[0] =~ s/&gt;/>/g;
        return;
    }
}

# HTML escapes "
# These aren't needed in an HTML escape, however ARE needed when the escape
# is being used in a string (such as <a href="HERE">). In the same string,
# _escape_lt and _escape_gt are _not_ needed.
sub _escape_quot {
    if (defined wantarray) {
        my $str = shift;
        $str =~ s/"/&quot;/g;
        return $str;
    }
    else {
        $_[0] =~ s/"/&quot;/g;
        return;
    }
}

sub _unescape_quot {
    if (defined wantarray) {
        my $str = shift;
        $str =~ s/&quot;/"/g;
        return $str;
    }
    else {
        $_[0] =~ s/&quot;/"/g;
        return;
    }
}

# This attempts to convert HTML into Markup. The HTML produced should have been
# produced by the Advanced/WYSIWYG Editor.
$COMPILE{advanced_editor_convert} = __LINE__ . <<'END_OF_SUB';
sub advanced_editor_convert {
    my $html = shift;

    my ($post, $code) = advanced_editor_tags();

    require GT::HTML::Parser;
    my $parser = GT::HTML::Parser->new(events => $code);
    $parser->parse($html); # $post is created from the relevant pieces of $html

    return $$post;
}
END_OF_SUB

# Returns a scalar reference and a hash ref. The hash contains keys and code
# references meant for advanced_editor_tags. The code references should append
# to the scalar when called so that after calling GT::HTML::Parser->parse, the
# scalar reference will contain the markup.
# Typically, to extend markup with a plugin, you will use a POST hook, then add
# to the code reference, appending to the scalar reference.
$COMPILE{advanced_editor_tags} = __LINE__ . <<'END_OF_SUB';
sub advanced_editor_tags {
    my $post = '';
    my (@font_stack, @p_stack);
    # $simple_start and $simple_end are used for simple replacements like <b> => [b], </i> => [/i]
    my $simple_start = sub { $post .= "[" . pop()->{name} . "]" };
    my $simple_end = sub { $post .= "[/" . pop()->{name} . "]" };
    my $found_text;
    my %html_unescape = (nbsp => ' ', gt => '>', lt => '<', quot => '"', amp => '&');
    my %code = (
        text => sub { my $data = pop()->{data}; $data =~ s/\r?\n//g; $data =~ s/&(nbsp|gt|lt|quot|amp);/$html_unescape{$1}/g; $post .= $data; $found_text = 1 },
        space => sub { $post .= " " if $found_text },
        start_br => sub { $post .= "\n" },
        start_p => sub {
            my $tags = pop()->{tags};
            if ($tags->{align} and $tags->{align}->[0] =~ /^(left|center|right)$/) {
                $post .= "[$1]";
                push @p_stack, $1;
            }
            elsif ($found_text) {
                $post .= "\n\n";
            }
        },
        end_p => sub {
            if (@p_stack) {
                $post .= "[/" . pop(@p_stack) . "]";
            }
        },
        skip_comment => sub {
            my $data = pop()->{data};
            if ($data =~ m{^((/)?(?:reply|quote|pre|code))$}) {
                $post =~ s/(?:\r?\n[ \t]*){3}$/\n\n/ if $2; # For some reason, the editor puts in an extra line
                $post .= "[$data]\n";
                return 'end_table';
            }
            else {
                return undef;
            }
        },
        start_font => sub {
            my $tags = pop()->{tags};
            my $i = ++$#font_stack;
            if ($tags->{face} and index($tags->{face}->[0], '"') == -1) {
                $post .= qq|[font "$tags->{face}->[0]"]|;
                $font_stack[$i]->[0] = 1;
            }
            if ($tags->{color} and $tags->{color}->[0] =~ /^(black|blue|green|orange|purple|red|silver|white|yellow|#[0-9A-F]{6})$/i) {
                $post .= "[$1]";
                $font_stack[$i]->[1] = $1;
            }
            if ($tags->{size} and $tags->{size}->[0] =~ /^(\d+)$/) {
                $post .= "[size $1]";
                $font_stack[$i]->[2] = 1;
            }
        },
        end_font => sub {
            my $closing = pop @font_stack;
            if ($closing->[2]) {
                $post .= qq|[/size]|;
            }
            if ($closing->[1]) {
                $post .= qq|[/$closing->[1]]|;
            }
            if ($closing->[0]) {
                $post .= qq|[/font]|;
            }
        },
        start_strong => sub { $post .= "[b]" },
        end_strong => sub { $post .= "[/b]" },
        start_em => sub { $post .= "[i]" },
        end_em => sub { $post .= "[/i]" },
        start_hr => $simple_start,
        start_blockquote => sub { $post .= "[indent]" },
        end_blockquote => sub { $post .= "[/indent]" },
        start_a => sub {
            my $tags = pop()->{tags};
            if ($tags->{href}) {
                my $url = $tags->{href}->[0];

# The advanced editor will change: <a href="gforum.cgi?url=http://foobar"> into a full URL.
# So, find the relative URL from cgi_root_url, and if the a href contains it, take it out and
# unescape it for the markup tag.
                my ($web_relative) = $CFG->{cgi_root_url} =~ m{^(?:\w+://[a-zA-Z0-9.-]+)?(.*?)/?$};
                if ($web_relative) {
                    if ($url =~ s{^(?:\w+://[a-zA-Z0-9.-]+)?\Q$web_relative\E/gforum\.cgi\?url=([^&;]+)}//) {
                        my $eurl = $1;
                        $eurl =~ s/%([0-9A-F]{2})/chr hex $1/eg;
                        $url = $eurl;
                    }
                }

                escape_html($url);
                $post .= qq|[url "$url"]|;
            }
        },
        end_a => sub { $post .= qq|[/url]| },
        start_img => sub {
            my $src = pop()->{tags}->{src}->[0];
            if ($src =~ m{^\Q$CFG->{image_url}\E/(angelic|blush|cool|crazy|frown|laugh|mad|pirate|shocked|sly|smile|tongue|unimpressed|unsure|wink)\.gif$}) {
                my $icon = $1;
                if ($icon eq 'frown') {
                    $icon = ':(';
                }
                elsif ($icon eq 'smile') {
                    $icon = ':)';
                }
                elsif ($icon eq 'wink') {
                    $icon = ';)';
                }

                $post .= "[$icon]";
            }
            elsif ($src and $CFG->{markup_allow_image}) {
                $post .= "[img]$src\[/img]";
            }
        }
    );

    for (qw/b i u s strike ul ol li/) {
        $code{"start_$_"} = $simple_start;
        $code{"end_$_"}   = $simple_end;
    }

    return(\$post, \%code);
}
END_OF_SUB

1;