# ================================================================== # Gossamer Threads Module Library - http://gossamer-threads.com/ # # GT::Text::Tools # Author : Scott Beck # CVS Info : # $Id: Tools.pm,v 1.4 2002/04/07 03:35:45 jagerman Exp $ # # Copyright (c) 2000 Gossamer Threads Inc. All Rights Reserved. # ================================================================== # # Description: A general purpose text parsing module. # package GT::Text::Tools; # ================================================================== # Pragmas use strict; # Internal mules use bases 'GT::Base' => ''; sub linesplit { # -------------------------------------------------------------------- # my @words = GT::Text::Tools->linesplit($regex, $line); # ------------------------------------------------------ # Splits $line by $regex outside of quotes ['"] # If regex is false defaults to \s+. # # Ganged and modified from Text::ParseWords local $^W; my ($class, $delimiter, $line) = @_; $delimiter ||= '\s+'; $delimiter =~ s/(\s)/\\$1/g; my ($quote, $quoted, $unquoted, $delim, $word, @pieces); while (length($line)) { ($quote, $quoted, undef, $unquoted, $delim, undef) = $line =~ m/^(["']) # a $quote ((?:\\.|(?!\1)[^\\])*) # and $quoted text \1 # followed by the same quote ([\000-\377]*) # and the rest | # --OR-- ^((?:\\.|[^\\"'])*?) # an $unquoted text (\Z(?!\n)|(?:$delimiter)|(?!^)(?=["'])) # plus EOL, delimiter, or quote ([\000-\377]*) # the rest /x; # extended layout return () unless ( $quote || length ($unquoted) || length ($delim)); $line = $+; $quoted = "$quote$quoted$quote"; $word .= defined $quote ? $quoted : $unquoted; if (length($delim)) { push(@pieces, $word); undef $word; } if (!length($line)) { push(@pieces, $word); } } return (@pieces); } sub linewrap { # -------------------------------------------------------------------- # GT::Text::Tools->linewrap( $string, $number, { # nowrap => $regexs, # eol => "\n", # max_line_length => 50000 # }); # ---------------------------------------------- # linewrap takes a string, a number of characters per line and a # hesh ref of options. String will be wrapped to the number of # characters specified on spaces. # The following options apply: # nowrap => array ref of regexes that if matched, will # not be wrapped. # eol => What to wrape the lines with, defaults to # \n. # max_line_length => The maximum length a line can be that will # be wrapped on a space. Any line reaching # this length will be wrapped without # looking for spaces. Defaults to 50_000, set # to non-true value to avoid this affect. # my ( $class, $string, $i, $opts ) = @_; my $max_len = exists( $opts->{max_line_length} ) ? $opts->{max_line_length} : 50_000; my $regexs = $opts->{nowrap} || []; my $nl = $opts->{eol} || "\n"; $regexs = (ref ($regexs) eq 'ARRAY') ? $regexs : [$regexs || ()]; my @t = split /$nl/, $string; my $r = ""; while (@t) { my $match = 0; if (($t[0] =~ /^\s*$/) or (length( $t[0] ) <= $i)) { $r .= shift (@t) . $nl; $match = 1; } elsif ($max_len and length( $t[0] ) > $max_len) { # Line is too long. my $line = shift @t; while ($line) { $r .= substr($line, 0, $i) . "\n"; substr($line, 0, $i) = ''; } } elsif (@{$regexs}) { foreach my $regex (@{$regexs}) { if ($t[0] =~ m,^(.*?)(\s?)($regex)\s(.*)$,) { shift(@t); my $pre = _unexpand($i, $nl, $1); my $aft = _unexpand($i, $nl, $4); my $mat = $3; my $s = $2 || ''; $r .= $pre . $s . $mat . $aft . $nl; $match = 1; last; } } } next if $match; $r .= _unexpand($i, $nl, shift(@t) || ''); } return $r; } sub _unexpand { # -------------------------------------------------------------------- # _unexpand($length, $string); # ---------------------------- # Internal method called by linewrap() to wrape a line. # my ($i, $e); $i = $e = shift; my $nl = shift; my $r; defined $_[0] or return ''; if (length $_[0] < $i) { return $_[0]; } while (@_) { defined($_[0]) or last; if ($_[0] =~ /^(.{$i})\s(.+)$/) { shift() and $r .= $1 . $nl; $i = $e; if (defined($2) and length($2) <= $e) { $r .= $2 . $nl } else { unshift(@_, $2) } } elsif ($i-- == 0) { $i = $e; shift() =~ /^(.{$i})(.+)$/ and $r .= $1 . $nl; if (defined($2) and length($2) <= $e) { $r .= $2 . $nl } else { unshift(@_, $2) } } } return defined($r) ? $r : ''; } 1;