I down loaded the copy from my server. Here it is:
<br>
#!/usr/bin/perl <br>
<br>
print "Content-type: text/html \n\n"; <br>
<br>
# helps us catch nasty errors use CGI::Carp qw(fatalsToBrowser); <br>
<br>
$full = 1; # if only wanting everything bar regional and world...use this! <br>
<br>
###################################################### <br>
# GET THE DUMP FILE STYARTS HERE ##################### <br>
###################################################### <br>
<br>
# get rid of the old file... # <br>
<br>
# unlink "content.rdf.u8"; <br>
<br>
# $main_rdf_start_time = time; <br>
<br>
# `wget --no-directories http://dmoz.org/rdf/content.rdf.u8.gz`; <br>
<br>
# `gzip -d content.rdf.u8.gz`; # finished with raf.u8.gz, so delete now...keep
space! <br>
<br>
# unlink "content.rdf.u8.gz"; <br>
<br>
#$main_rdf_end_time = time; <br>
<br>
#$main_rdf_total_time = $main_rdf_end_time - $main_rdf_start_time; <br>
<br>
# open(MAIL,"|/usr/sbin/sendmail -t") || die &error("Unable
to open Sendmail. Reason: $!"); <br>
# $webmaster = 'webmaster@assistantdirectors.com'; <br>
# print MAIL "To: $webmaster \n"; <br>
# print MAIL "From: $webmaster \n"; <br>
# print MAIL "Reply-to: $webmaster \n"; <br>
# print MAIL "Subject: RE Dump... \n\n"; <br>
# print MAIL "content.rdf.u8.gz has successfully been downloaded and decompressed.
Took $main_rdf_total_time\n"; <br>
# print MAIL "\n \n Thanks"; <br>
# print MAIL "\n"; <br>
# print MAIL "A.J.Newby \n"; <br>
# print MAIL "Ace Installer \n"; <br>
# close(MAIL); <br>
<br>
################################################### <br>
### END THE GETTING OF THE MAIN DUMP FILE ######### <br>
<br>
################################################### <br>
<br>
################################################## <br>
### CUT THE DUMP INTO 17 SMALLER CATEGORIES ###### <br>
################################################## <br>
<br>
$categories = "Top\/Adult::Top\/Arts"; <br>
$categories .= "~Top\/Arts::Top\/Business"; <br>
$categories .= "~Top\/Business::Top\/Computers"; <br>
$categories .= "~Top\/Computers::Top\/Games"; <br>
$categories .= "~Top\/Games::Top\/Health"; <br>
$categories .= "~Top\/Health::Top\/Home"; <br>
$categories .= "~Top\/News::Top\/Recreation"; <br>
$categories .= "~Top\/Reference::Top\/Regional"; <br>
$categories .= "~Top\/Regional::Top\/Science"; <br>
$categories .= "~Top\/Science::Top\/Shopping"; <br>
$categories .= "~Top\/Shopping::Top\/Society"; <br>
$categories .= "~Top\/Sports::Top\/World"; <br>
$categories .= "~Top\/Home::Top\/Kids_and_Teens"; <br>
<br>
@categories = split("~", $categories); # now loop through them all....
<br>
<br>
foreach (@categories) { <br>
@aaa = split("::", $_); <br>
$start_line = $aaa[0]; <br>
$end_line = $aaa[1]; <br>
$file_save = lc($start_line); <br>
$file_save =~ s/Top//i; # open up the main dmoz dump u8 file <br>
<br>
open(DMOZ, "./content.rdf.u8") || &error("Unable to read dump
file. Reason: $!"); # category <br>
open(CLEAN_DUMP, ">./$file_save.dump.slice"); <br>
print CLEAN_DUMP ""; close(CLEAN_DUMP); # to make the file blank...
<br>
open(DUMP_FILE, ">>./$file_save.dump.slice") or &error("cant
do it: $! : ./$file_save.dump.slice"); # open ready for input.... <br>
<br>
# start a while..not closed til right near the end... <br>
$do = 0; <br>
while (<DMOZ>) { <br>
# doing the arts category only needs this...then if the lines matches the regex
we are moved onto the next category.. <br>
# check to see when we wanna start, otherwise use next; <br>
if ($start_line) { <br>
if ($_ =~ /<Topic r:id=\"$start_line\">/) { $do = 1; } <br>
} <br>
if ($_ =~ /<Topic r:id=\"$end_line\">/) { close(DUMP_FILE); &import_done_email($start_line);
last; } <br>
else { if ($do) { print DUMP_FILE "$_\n"; } } <br>
} # end the while <br>
<br>
close(DMOZ); # close up the main file... <br>
<br>
} # end the foreach <br>
<br>
<br>
sub import_done_email { <br>
<br>
my $cat = shift; <br>
open(MAIL,"|/usr/sbin/sendmail -t") || die &error("Unable to
open Sendmail. Reason: $!"); <br>
$webmaster = 'webmaster@assistantdirectors'; <br>
print MAIL "To: $webmaster \n"; <br>
print MAIL "From: $webmaster \n"; <br>
print MAIL "Reply-to: $webmaster \n"; <br>
print MAIL "Subject: RE Main $cat Dump... \n\n"; <br>
print MAIL "$cat has now been inported into the SQL database.... \n";
<br>
print MAIL "\n \n Thanks"; <br>
print MAIL "\n"; <br>
print MAIL "A.J.Newby \n"; <br>
print MAIL "Ace Installer \n"; <br>
close(MAIL); <br>
} <br>
<br>
<br>
# error incase stuff goes wrong... <br>
sub error { <br>
my ($error) = shift; <br>
print $error; exit; <br>
}
<br>
#!/usr/bin/perl <br>
<br>
print "Content-type: text/html \n\n"; <br>
<br>
# helps us catch nasty errors use CGI::Carp qw(fatalsToBrowser); <br>
<br>
$full = 1; # if only wanting everything bar regional and world...use this! <br>
<br>
###################################################### <br>
# GET THE DUMP FILE STYARTS HERE ##################### <br>
###################################################### <br>
<br>
# get rid of the old file... # <br>
<br>
# unlink "content.rdf.u8"; <br>
<br>
# $main_rdf_start_time = time; <br>
<br>
# `wget --no-directories http://dmoz.org/rdf/content.rdf.u8.gz`; <br>
<br>
# `gzip -d content.rdf.u8.gz`; # finished with raf.u8.gz, so delete now...keep
space! <br>
<br>
# unlink "content.rdf.u8.gz"; <br>
<br>
#$main_rdf_end_time = time; <br>
<br>
#$main_rdf_total_time = $main_rdf_end_time - $main_rdf_start_time; <br>
<br>
# open(MAIL,"|/usr/sbin/sendmail -t") || die &error("Unable
to open Sendmail. Reason: $!"); <br>
# $webmaster = 'webmaster@assistantdirectors.com'; <br>
# print MAIL "To: $webmaster \n"; <br>
# print MAIL "From: $webmaster \n"; <br>
# print MAIL "Reply-to: $webmaster \n"; <br>
# print MAIL "Subject: RE Dump... \n\n"; <br>
# print MAIL "content.rdf.u8.gz has successfully been downloaded and decompressed.
Took $main_rdf_total_time\n"; <br>
# print MAIL "\n \n Thanks"; <br>
# print MAIL "\n"; <br>
# print MAIL "A.J.Newby \n"; <br>
# print MAIL "Ace Installer \n"; <br>
# close(MAIL); <br>
<br>
################################################### <br>
### END THE GETTING OF THE MAIN DUMP FILE ######### <br>
<br>
################################################### <br>
<br>
################################################## <br>
### CUT THE DUMP INTO 17 SMALLER CATEGORIES ###### <br>
################################################## <br>
<br>
$categories = "Top\/Adult::Top\/Arts"; <br>
$categories .= "~Top\/Arts::Top\/Business"; <br>
$categories .= "~Top\/Business::Top\/Computers"; <br>
$categories .= "~Top\/Computers::Top\/Games"; <br>
$categories .= "~Top\/Games::Top\/Health"; <br>
$categories .= "~Top\/Health::Top\/Home"; <br>
$categories .= "~Top\/News::Top\/Recreation"; <br>
$categories .= "~Top\/Reference::Top\/Regional"; <br>
$categories .= "~Top\/Regional::Top\/Science"; <br>
$categories .= "~Top\/Science::Top\/Shopping"; <br>
$categories .= "~Top\/Shopping::Top\/Society"; <br>
$categories .= "~Top\/Sports::Top\/World"; <br>
$categories .= "~Top\/Home::Top\/Kids_and_Teens"; <br>
<br>
@categories = split("~", $categories); # now loop through them all....
<br>
<br>
foreach (@categories) { <br>
@aaa = split("::", $_); <br>
$start_line = $aaa[0]; <br>
$end_line = $aaa[1]; <br>
$file_save = lc($start_line); <br>
$file_save =~ s/Top//i; # open up the main dmoz dump u8 file <br>
<br>
open(DMOZ, "./content.rdf.u8") || &error("Unable to read dump
file. Reason: $!"); # category <br>
open(CLEAN_DUMP, ">./$file_save.dump.slice"); <br>
print CLEAN_DUMP ""; close(CLEAN_DUMP); # to make the file blank...
<br>
open(DUMP_FILE, ">>./$file_save.dump.slice") or &error("cant
do it: $! : ./$file_save.dump.slice"); # open ready for input.... <br>
<br>
# start a while..not closed til right near the end... <br>
$do = 0; <br>
while (<DMOZ>) { <br>
# doing the arts category only needs this...then if the lines matches the regex
we are moved onto the next category.. <br>
# check to see when we wanna start, otherwise use next; <br>
if ($start_line) { <br>
if ($_ =~ /<Topic r:id=\"$start_line\">/) { $do = 1; } <br>
} <br>
if ($_ =~ /<Topic r:id=\"$end_line\">/) { close(DUMP_FILE); &import_done_email($start_line);
last; } <br>
else { if ($do) { print DUMP_FILE "$_\n"; } } <br>
} # end the while <br>
<br>
close(DMOZ); # close up the main file... <br>
<br>
} # end the foreach <br>
<br>
<br>
sub import_done_email { <br>
<br>
my $cat = shift; <br>
open(MAIL,"|/usr/sbin/sendmail -t") || die &error("Unable to
open Sendmail. Reason: $!"); <br>
$webmaster = 'webmaster@assistantdirectors'; <br>
print MAIL "To: $webmaster \n"; <br>
print MAIL "From: $webmaster \n"; <br>
print MAIL "Reply-to: $webmaster \n"; <br>
print MAIL "Subject: RE Main $cat Dump... \n\n"; <br>
print MAIL "$cat has now been inported into the SQL database.... \n";
<br>
print MAIL "\n \n Thanks"; <br>
print MAIL "\n"; <br>
print MAIL "A.J.Newby \n"; <br>
print MAIL "Ace Installer \n"; <br>
close(MAIL); <br>
} <br>
<br>
<br>
# error incase stuff goes wrong... <br>
sub error { <br>
my ($error) = shift; <br>
print $error; exit; <br>
}