#!/usr/bin/perl -w # ================================================================== # Links SQL - enhanced directory management system # # Website : http://gossamer-threads.com/ # Support : http://gossamer-threads.com/scripts/support/ # Revision : $Id: nph-verify.cgi,v 1.29 2001/12/30 23:17:07 alex Exp $ # # Copyright (c) 2001 Gossamer Threads Inc. All Rights Reserved. # Redistribution in part or in whole strictly prohibited. Please # see LICENSE file for full details. # ================================================================== use strict; use lib '.'; use vars qw/$USE_HTML $BLOCK_SIZE $MAX_WORKUNIT $MIN_WORKUNIT $MAX_CHILDREN $SPAWN_DELAY $FONT $TODAY $GOOD $BAD @LINKS_TO_CHECK/; use Links qw/$IN $DB $CFG/; use GT::SQL::Condition; use Links::Parallel; use Links::Tools; $| = 1; local $SIG{__DIE__} = \&Links::fatal; Links::init('.'); main(); sub main { # ------------------------------------------------------------------- # specific configuration, you can tweak for optimimum performance # you should not need to change these parameters, if you do, you should # know what you are doing # $BLOCK_SIZE = 5000; # the bulk link handler $MAX_WORKUNIT = 100; # a child unit can up to X links to check $MIN_WORKUNIT = 3; # but it must have at least (# links permitting) Y links to check $MAX_CHILDREN = 10; # the number of children allowed to exist $SPAWN_DELAY = 2; # the amount of time between successive spawns (to prevent load spiking) $FONT = 'font face="Tahoma,Arial,Helvetica" size="2"'; @LINKS_TO_CHECK = (); $GOOD = 0; $BAD = 0; $USE_HTML = exists $ENV{REQUEST_METHOD} ? 1 : 0; $TODAY = get_date(); # get the option parameters my $params = 0; my $id = 0; my ($method); if ($USE_HTML) { print $IN->header ( -nph => $CFG->{nph_headers} ); $_ = $IN->param ("do"); } else { $_ = join " ", @ARGV; # convert command line params to parseable commands CMD_LINE: { if (/--check_all/) { $_ = "check_links"; $method=5; last CMD_LINE; } if (/--check_from\s*([0-9]+)/) { $_ = "check_links"; $method=1; $params=$1; last CMD_LINE; } if (/--check_new/) { $_ = "check_links"; $method=4; last CMD_LINE; } if (/--check_problem/) { $_ = "check_links"; $method= 3; last CMD_LINE; } if (/--check_status\s*(-?[0-9]+)/) { $_ = "check_links"; $method=6; $params=$1; last CMD_LINE; } if (/--check\s*(-?[0-9]+)/) { $_ = "check_links"; $method=7; $id=$1; last CMD_LINE; } if (/--fix_302/) { $_ = "fix_302"; last CMD_LINE; } $_ = undef; }; }; SWITCH: { if (not defined) { if ($USE_HTML) { Links::admin_page ('tools_verify.html'); last SWITCH; } else { &command_line_help (); last SWITCH; }; } if (/^check_links/) { if ($USE_HTML) { if (defined $IN->param("method")) { check_links ( $IN->param("method") || 5, $IN->param("status") || 0, $IN->param("ID") || 0, $IN->param("days") || 0, $IN->param("since"), $IN->param("to") ); } else { html_verify_select ( calclinkstatus () ); }; } else { if (defined $method) { check_links ( $method, $params, $id, $params ); } else { }; }; last SWITCH; } }; } sub command_line_help { # ------------------------------------------------------------------- # Print out a usage summary. # print qq| This script checks the Links SQL database for link integrity. The following parameters may be used from the command line: For checking links: (one of) --check_from number_of_days_ago --check_problem --check_new --check_status status_code --check_all For fixing links: (one of) --fix_302 |; }; sub check_links { #---------------------------------------------------------------------- # generates the sql query that nabs the link subset that we want # to check then checks the links # my ($action,$status,$id,$days,$since,$to) = @_; # make sure everything is what we expect it to be $status = int ( $status ); $id = int ( $id ); $days = int ( $days ); $since ||= ''; $since =~ /([0-9]{4}\/[0-9]{1,2}\/[0-9]{1,2})/; $since = $1; $to ||= ''; $to =~ /([0-9]{4}\/[0-9]{1,2}\/[0-9]{1,2})/; $to = $1; # build the query condition. my $cond = new GT::SQL::Condition; if ( $action == 1 ) { # not checked in the last N days my $tmp_date = &get_date (time - (86400 * $days)); $cond->add ("Date_Checked", "<", $tmp_date); } elsif ( $action == 2 ) { # checked last between N and O $cond->add ("Date_Checked", ">", $since); $cond->add ("Date_Checked", "<", $to); } elsif ( $action == 3 ) { # problem links my $val = join ( ",", keys %Links::Tools::STATUS_BAD ); $cond->add ("Status", "IN", \"($val)"); } elsif ( $action == 4 ) { # new links $cond->add ("Status", "=", 0); } elsif ( $action == 5 ) { # everything $cond = {}; } elsif ( $action == 6) { # check something based on status code $cond->add ("Status", "=", $status); } elsif ( $action == 7 ) { # check a certain link $cond->add ("ID", "=", $id); }; # find out how many items need to be checked..., my $link_db = $DB->table ('Links'); my $count = $link_db->count($cond); if ($USE_HTML) { print qq~
'; } else { print "Checking $count links ... \n"; } # demo # print "\nChecking links has been disabled in the demo\n\n"; # return; if ($count == 0) { print "No links to check! Quitting!\n\n"; } else { my $offset = 0; my @stats; # Remove mod_perl env so verify-child doesn't think it's under mod_perl. my $env = delete $ENV{GATEWAY_INTERFACE}; while (1) { # run the query and register it $link_db->select_options ("LIMIT $offset,$BLOCK_SIZE"); my $sth = $link_db->select ($cond, ['ID']); $offset += $BLOCK_SIZE; if ($sth->rows()<=0) { last; }; while (($id) = $sth->fetchrow_array) { push @LINKS_TO_CHECK, int($id); } my $checker = new Links::Parallel ( 'MAX_WORKUNIT' => $MAX_WORKUNIT, 'MIN_WORKUNIT' => $MIN_WORKUNIT, 'SPAWN_DELAY' => $SPAWN_DELAY, 'MAX_CHILDREN' => $MAX_CHILDREN, 'child_path' => "$CFG->{admin_root_path}/verify-child.pl", 'path_to_perl' => $CFG->{path_to_perl}, 'on_response' => \&link_report, 'debug_level' => 1 ); $checker->{to_check} = \@LINKS_TO_CHECK; $checker->wait (); push @stats, $checker->get_stats (); }; &print_summary (\@stats); } if ($USE_HTML) { print "