
werdna at svn
Jul 17, 2008, 7:18 PM
Post #1 of 1
(32 views)
Permalink
|
|
SVN: [37807] trunk/extensions/AbuseFilter/AbuseFilter.class.php
|
|
Revision: 37807 Author: werdna Date: 2008-07-18 02:18:58 +0000 (Fri, 18 Jul 2008) Log Message: ----------- AbuseFilter: * Improve normalisation: use AntiSpoof. * Allow callers to prevent the use of a condition counter. It's kinda annoying in batch processes. * Improved caching of tokenisation - reduces average time to process a condition from 4ms right down to 200us * Some new modifiers, caching of modifier data. Modified Paths: -------------- trunk/extensions/AbuseFilter/AbuseFilter.class.php Modified: trunk/extensions/AbuseFilter/AbuseFilter.class.php =================================================================== --- trunk/extensions/AbuseFilter/AbuseFilter.class.php 2008-07-18 02:16:50 UTC (rev 37806) +++ trunk/extensions/AbuseFilter/AbuseFilter.class.php 2008-07-18 02:18:58 UTC (rev 37807) @@ -8,9 +8,12 @@ public static $condCheckCount = array(); public static $condMatchCount = array(); public static $statsStoragePeriod = 86400; - public static $modifierWords = array( 'norm', 'supernorm', 'lcase', 'length', 'specialratio', 'htmldecode', 'htmlencode', 'urlencode', 'urldecode' ); + public static $modifierWords = array( 'norm', 'supernorm', 'lcase', 'length', 'specialratio', 'htmldecode', 'htmlencode', 'urlencode', 'urldecode', 'htmlfullencode' ); public static $operatorWords = array( 'eq', 'neq', 'gt', 'lt', 'regex', 'contains' ); public static $validJoinConditions = array( '!', '|', '&' ); + public static $condLimitEnabled = true; + public static $tokenCache = array(); + public static $modifyCache = array(); public static function generateUserVars( $user ) { $vars = array(); @@ -28,6 +31,11 @@ return $vars; } + + public static function disableConditionLimit() { + // For use in batch scripts and the like + self::$condLimitEnabled = false; + } public static function generateTitleVars( $title, $prefix ) { $vars = array(); @@ -51,7 +59,7 @@ $fname = __METHOD__; global $wgAbuseFilterConditionLimit; - if (self::$condCount > $wgAbuseFilterConditionLimit) { + if (self::$condCount > $wgAbuseFilterConditionLimit && self::$condLimitEnabled) { return false; } @@ -88,7 +96,7 @@ } // We've hit the limit. - if (self::$condCount > $wgAbuseFilterConditionLimit) { + if (self::$condCount > $wgAbuseFilterConditionLimit && self::$condLimitEnabled) { return false; } @@ -145,8 +153,12 @@ // Get the rest of the string after the operator. $parameters = explode( ' ', $conds, $wordNum+2); $parameters = trim($parameters[$wordNum+1]); - - if (in_array( $parameters, array_keys( $vars ) )) { + + list($firstWord,$rest) = explode( ' ', $parameters, 2 ); + if (in_array( $firstWord, self::$modifierWords ) && in_array( $rest, array_keys($vars))) { + // Allow the compare target to be modified, too. + $parameters = self::modifyValue( $firstWord, $vars[$rest] ); + } elseif (in_array( $parameters, array_keys( $vars ) )) { $parameters = $vars[$parameters]; } @@ -167,6 +179,12 @@ public static function tokeniseList( $list ) { wfProfileIn( __METHOD__ ); + + + if (isset(self::$tokenCache[$list])) { + return self::$tokenCache[$list]; + } + // Parse it, character by character. $escapeNext = false; $listLevel = 0; @@ -220,6 +238,8 @@ // Put any leftovers in $allTokens[] = $thisToken; + + self::$tokenCache[$list] = $allTokens; wfProfileOut( __METHOD__ ); @@ -227,26 +247,41 @@ } public static function modifyValue( $modifier, $value ) { + if (isset(self::$modifyCache[$modifier][$value])) + return self::$modifyCache[$modifier][$value]; + + if ($modifier == 'norm') { - return self::normalise( $value ); + $val = self::normalise( $value ); } elseif ($modifier == 'supernorm') { - return self::superNormalise( $value ); + $val = self::superNormalise( $value ); } elseif ($modifier == 'lcase') { - return strtolower($value); + $val = strtolower($value); } elseif ($modifier == 'length') { - return strlen($value); + $val = strlen($value); } elseif ($modifier == 'specialratio') { $specialsonly = preg_replace('/\w/', '', $value ); - return (strlen($specialsonly) / strlen($value)); + $val = (strlen($specialsonly) / strlen($value)); } elseif ($modifier == 'htmlencode') { - return htmlspecialchars($value); + $val = htmlspecialchars($value); } elseif ($modifier == 'htmldecode') { - return htmlspecialchars_decode($value); + $val = htmlspecialchars_decode($value); } elseif ($modifier == 'urlencode') { - return urlencode($value); + $val = urlencode($value); } elseif ($modifier == 'urldecode') { - return urldecode($value); + $val = urldecode($value); + } elseif ($modifier == 'htmlfullencode') { + $val = htmlentities( $value ); + } elseif ($modifier == 'simplenorm') { + $val = preg_replace( '/[\d\W]+/', '', $value ); + $val = strtolower( $value ); } + + if (count(self::$modifyCache[$modifier][$value]) > 1000) { + self::$modifyCache = array(); + } + + return self::$modifyCache[$modifier][$value] = $val; } public static function checkOperator( $operator, $value, $parameters ) { @@ -269,19 +304,18 @@ public static function superNormalise( $text ) { $text = self::normalise( $text ); - $text = preg_split( '//', $text, -1, PREG_SPLIT_NO_EMPTY ); // Split to a char array. + $text = AntiSpoof::stringToList($text); // Split to a char array. sort($text); $text = array_unique( $text ); // Remove duplicate characters. - $text = implode( '', $text ); + $text = AntiSpoof::listToString( $text ); return $text; } public static function normalise( $text ) { $old_text = $text; - $text = preg_replace( '/\W/', '', $text ); // Remove any special characters. $text = strtolower($text); - $text = preg_split( '//', $text, -1, PREG_SPLIT_NO_EMPTY ); // Split to a char array. + $text = AntiSpoof::stringToList( $text ); $text = AntiSpoof::equivString( $text ); // Normalise // Remove repeated characters, but not all duplicates. @@ -293,7 +327,9 @@ } } - $text = implode('', $text ); // Sort in alphabetical order, put back as it was. + $text = AntiSpoof::listToString( $text ); // Sort in alphabetical order, put back as it was. + + $text = preg_replace( '/\W/', '', $text ); // Remove any special characters. return $text; } @@ -328,9 +364,12 @@ $filter_matched[$row->af_id] = false; } } + + // Don't store stats if the cond limit is disabled. + // It's probably a batch process or similar. + if (!self::$condLimitEnabled) + self::recordStats( $filter_matched ); - self::recordStats( $filter_matched ); - if (count($blocking_filters) == 0 ) { // No problems. return true; _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS[at]lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
|