So I have read all the posts out there regarding the RDF dump from DMOZ.org. I have been successfull to translate the catagories and links to the Links2.0 format for my desired subcatagories, however, there remains one hurdle for me.
I am not getting the link descriptions after the transfer has occured. Below is a copy of the rdf-hs.cgi that I am using, exactly as I have it. Again, the transfer is working on all accounts, except the link description. FYI, I am using content.rdf and structure.rdf as provided by DMOZ.
Thanks in advance,
Tim
support@ezFriend.com
RDF-HS.cgi
_______________________
#!/usr/local/bin/perl
## Change These 4 Settings...
## Each should point to the FULL path to the files
$incats = "structure.rdf"; # DMOZ Category File
$outcats = "categories.db"; # YOUR Category File
$inlinks = "content.rdf"; # DMOS Links Database
$outlinks = "links.db"; # YOUR Links Database
## For Gossamer Threads Users, look for lines
## Comments starting with the word "CHANGE" to
## find the location of the file formats you'll need to alter.
$top = "Top/";
&Categories;
&Links;
sub Categories {
$file = "$incats";
$catstart = '<narrow r:resource="';
$catend = '"/>';
$ID++;
open(FILE,$file);
while(<FILE>) {
chomp;
if ($_ =~ m/$catstart([^<]+)$catend/sog) {
$Category = $1;
$Category =~ s/$top//g;
$Category =~ s/_/_/g;
$category =~ s/\s/_/g;
$categories{$Category}++;
print ".";
}
}
close(FILE);
open(CATS,">$outcats");
foreach $cat(sort keys %categories) {
## To select only certain categories ... uncomment
next if($cat !~ "Home");
@entries=split(/\:/,$cat);
$main = $entries[$1];
$cat =~ s/$main\://g;
$cat =~ s/$main//g;
## CHANGE THIS TO MATCH YOUR FILE FORMAT !!
print CATS "$ID|$main|$cat|$related|$description|$keywords|$header|$footer\n";
}
close(CATS);
print "\n\nCategories DONE\n";
}
sub Links {
print "Starting Conversion...\n";
$catstart = '<Topic r:id="';
$catend = '">';
$linkstart = '<ExternalPage about="';
$linkend = '">';
$titlestart = '<d:Title>';
$titleend = '</d:Title>';
$descstart = '<d:Description>';
$descend = '</d:Description>';
$ID++;
open(CONVERTED,">$outlinks");
open(FILE,$inlinks);
while(<FILE>) {
chomp;
if ($_ =~ m/$catstart([^<]+)$catend/sog) {
$Category = $1;
$Category =~ s/$top//g;
$category =~ s/\s/_/g;
$Category =~ s/_/_/g;
}
elsif ($_ =~ m/$linkstart([^<]+)$linkend/sog) { $URL = $1; }
elsif ($_ =~ m/$titlestart([^<]+)$titleend/sog) { $Title = $1; }
elsif ($_ =~ m/$descstart([^<]+)$descend/sog) { $Description = $1;
## To select only certain categories ... uncomment
next if($Category !~ "Home");
$ID++;
print ".";
## CHANGE THIS TO MATCH YOUR FILE FORMAT !!
print CONVERTED "$ID|$Title|$URL|21-Aug-2000|$Category|$AltCategories|$Description|Bob Friend|support\@ezfriend.com|0|No|No|No|0|No\n";
}
}
close(FILE);
close(CONVERTED);
print "\n\nLinks DONE\n";
}
I am not getting the link descriptions after the transfer has occured. Below is a copy of the rdf-hs.cgi that I am using, exactly as I have it. Again, the transfer is working on all accounts, except the link description. FYI, I am using content.rdf and structure.rdf as provided by DMOZ.
Thanks in advance,
Tim
support@ezFriend.com
RDF-HS.cgi
_______________________
#!/usr/local/bin/perl
## Change These 4 Settings...
## Each should point to the FULL path to the files
$incats = "structure.rdf"; # DMOZ Category File
$outcats = "categories.db"; # YOUR Category File
$inlinks = "content.rdf"; # DMOS Links Database
$outlinks = "links.db"; # YOUR Links Database
## For Gossamer Threads Users, look for lines
## Comments starting with the word "CHANGE" to
## find the location of the file formats you'll need to alter.
$top = "Top/";
&Categories;
&Links;
sub Categories {
$file = "$incats";
$catstart = '<narrow r:resource="';
$catend = '"/>';
$ID++;
open(FILE,$file);
while(<FILE>) {
chomp;
if ($_ =~ m/$catstart([^<]+)$catend/sog) {
$Category = $1;
$Category =~ s/$top//g;
$Category =~ s/_/_/g;
$category =~ s/\s/_/g;
$categories{$Category}++;
print ".";
}
}
close(FILE);
open(CATS,">$outcats");
foreach $cat(sort keys %categories) {
## To select only certain categories ... uncomment
next if($cat !~ "Home");
@entries=split(/\:/,$cat);
$main = $entries[$1];
$cat =~ s/$main\://g;
$cat =~ s/$main//g;
## CHANGE THIS TO MATCH YOUR FILE FORMAT !!
print CATS "$ID|$main|$cat|$related|$description|$keywords|$header|$footer\n";
}
close(CATS);
print "\n\nCategories DONE\n";
}
sub Links {
print "Starting Conversion...\n";
$catstart = '<Topic r:id="';
$catend = '">';
$linkstart = '<ExternalPage about="';
$linkend = '">';
$titlestart = '<d:Title>';
$titleend = '</d:Title>';
$descstart = '<d:Description>';
$descend = '</d:Description>';
$ID++;
open(CONVERTED,">$outlinks");
open(FILE,$inlinks);
while(<FILE>) {
chomp;
if ($_ =~ m/$catstart([^<]+)$catend/sog) {
$Category = $1;
$Category =~ s/$top//g;
$category =~ s/\s/_/g;
$Category =~ s/_/_/g;
}
elsif ($_ =~ m/$linkstart([^<]+)$linkend/sog) { $URL = $1; }
elsif ($_ =~ m/$titlestart([^<]+)$titleend/sog) { $Title = $1; }
elsif ($_ =~ m/$descstart([^<]+)$descend/sog) { $Description = $1;
## To select only certain categories ... uncomment
next if($Category !~ "Home");
$ID++;
print ".";
## CHANGE THIS TO MATCH YOUR FILE FORMAT !!
print CONVERTED "$ID|$Title|$URL|21-Aug-2000|$Category|$AltCategories|$Description|Bob Friend|support\@ezfriend.com|0|No|No|No|0|No\n";
}
}
close(FILE);
close(CONVERTED);
print "\n\nLinks DONE\n";
}