#!/usr/bin/perl
# ice-form.pl -- cgi compliant ICE search interface
#
# ICE Version 1.4 beta 3
# July 1997
# (C) Christian Neuss (ice-man@next1.isa.informatik.th-darmstadt.de)
# MAJOR OVERHAUL BY PETE. CALL IT VERSION 1.4P
# NOW USES TEMPLATES
# NOW IGNORES REMOVED COMMON WORDS
# NOW IGNORES TOO SHORT WORDS
# NOW RECOGNISES ABSENCE OF KEYWORDS
#--- start of configuration --- put your changes here ---
# Title or name of your server:
# Example: local($title)="Search this server";
local($title)="Search Blather";
# search directories to present in the search dialogue
# Example:
# local(@directories)=(
# "DZSIM (/www/projects/dzsim)",
# "CSCW Laboratory (/www/projects/cscw-lab)",
# );
local(@directories)=(
'Archives (/archives/)',
'Archives2 (/archives2/)',
'Archives3 (/archives3/)',
'Archives4 (/archives4/)',
'Articles (/articles/)',
'Articles 2003 (/articles2003/)',
'Shitegeist (/shitegeist/)',
'Forteana (/forteana/)',
'Bookstore (/bookstore/)',
'Gear (/gear/)',
'Consulting (/consulting/)',
);
# Location of the indexfile:
# Note: under Windows or Windows NT, add the drive letter
# Example: $INDEXFILE='/usr/local/etc/httpd/index/index.idx';
$INDEXFILE='/hsphere/local/home/blather/blather.net/search/Index.idx';
# Location of the thesaurus data file:
# Example: $thesfile='/igd/a3/home1/neuss/Perl/thes.dat';
#$thesfile='/igd/a3/home1/neuss/Perl/thes.dat';
# Document Root and Aliases for your server. The Document Root is
# the directory where the "top level" documents reside. Additional
# mappings can be set via the "Aliases" variable (which can be left
# empty if no additional mappings exist).
#
# Important hint: if you are unsure about how to set $docroot,
# look at the end of the index file. $docroot must be set so
# that it matches the paths found there.
# Any occurrence of $docroot in the filepath will be substituted
# with a blank.
#
# Example
# $docroot = '/usr3/webstuff/documents';
# %aliases = (
# '/projects', '/usr/stud/proj',
# '/people', '/usr3/webstuff/staff',
# );
#
$docroot = '/hsphere/local/home/blather/blather.net/';
%aliases = ('http://www.blather.net/', '/hsphere/local/home/blather/blather.net/');
# The following configuration settings are OPTIONAL
# HEADER and FOOTER make up the template page.
# Split the template page where you want the
# script to insert forms/results and call the
# top 'header' and the bottom 'footer'
$HEADER = 'header.txt';
$FOOTER = 'footer.txt';
# Maximum number of hits to return
# Example:
# $MAXHITS=100;
# Minimum length of word to be indexed (same as in ice-idx.pl)
# Example:
# $MINLEN=3;
$MINLEN=3;
$TEMPLATE = 'test.html';
###########--- end of configuration --- you don't have to change anything below ---#########
# if this script is called up "by hand", run a test
unless($ENV{"SCRIPT_NAME"}){
local($word) = ($#ARGV==-1) ? "test" : $ARGV[0] ;
print "You have called the ice forms interface manually.\n";
print "Optionally, provide search word as an argument.\n";
print "Test mode: search for \"$word\"\n";
print "--------\n";
$orig="$word @ /";
$foo=&getquery($orig);
print $foo;
exit;
}
# do the real work, but trap any errors
eval '&main';
sub main
{
# if content_length is zero and query string is empty
if (($ENV{CONTENT_LENGTH}==0) &&
(length($ENV{"QUERY_STRING"})==0)){
# we're not decoding a form yet => send the form
&send_header("$title");
&send_index();
&send_footer();
return;
}
# parse forms result and store it in an associative array
%forms=&cgiparse();
&send_header("$title: ICE Query Result");
$query = $forms{KEYWORDS};
# remove non-word characters
$query = &html2text($query);
$query =~ tr/a-zA-Z\xc0-\xff0-9\-/ /cs;
# remove leading and trailing whitespace
$query =~ s/^\s*(.*\S)\s*/\1/;
$pquery = $query;
$context = $forms{CONTEXT};
if($context =~ m:\(([^)]*)\):) {
$context=$1;
}else{
$context="";
}
$thesaurus = $forms{THESAURUS};
$substring = $forms{SUBSTRING};
$days = $forms{DAYS};
if(length($days)>0){
$pquery.=" -D $days";
}
if(length($thesaurus)>0){
$pquery.=" -T";
}
if(length($substring)>0){
$pquery.=" -S";
}
if(length($context)>0){
$pquery.=" @ $context";
}
if($forms{KEYWORDS})
{($err,$page) = &getquery($pquery);}
else
{
print '
Please enter some keywords.';
&send_footer();
return undef;
}
if($err){
print "Query was: $query \n";
print "Problem: $err\n";
&send_footer();
return undef;
}
# print "Preferences set for this query:\n";
print "
\n";
if ($query) {
print "
query was \"$query\"\n";
}
if($context){
print "
context was set to $context\n";
}
if($thesaurus){
print "
use of thesaurus turned on\n";
}
if($substring){
print "
The index contains the following\n";
print "items relevant to the query\n";
print "$page\n";
}else{
print "
Nothing found.\n";
}
&send_footer();
}
# if an error has occured, log it to stdout
if($@)
{
&send_header("Error in Script"); # just in case
print "$@\n";
&send_footer();
}
# print the CGI script header
sub send_header
{
local($title)=@_;
print "Content-type: text/html\n\n";
if($HEADER)
{$header_dat = `cat $HEADER` || &Error("Couldn't get header $HEADER");}
else
{$header_dat = "
\n$title\n\n\n
$title
\n";}
print "$header_dat";
}
sub send_footer
{
if($FOOTER)
{$footer_dat = `cat $FOOTER` || &Error("Couldn't get footer $FOOTER");}
else
{$footer_dat = "";}
print "$footer_dat";
}
# display the Forms interface
sub send_index
{
if($thesfile)
{
$THESAURUS_INSERT =
'
Use Thesaurus to extend search ';
}
local($scriptname) = $ENV{"SCRIPT_NAME"};
print "
END
}
# parse data from CGI request and store it as name/value pairs
sub cgiparse
{
if ($ENV{'REQUEST_METHOD'} eq "POST")
{read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});}
else
{$buffer = $ENV{'QUERY_STRING'};}
local(@query_strings) = split("&", $buffer);
foreach $q (@query_strings)
{
$q =~ s/\+/ /g;
($attr, $val) = split("=", $q);
$val =~ s/%/\n%/g;
local($tmpval);
foreach (split("\n",$val))
{
if(m:%(\w\w):)
{
local($binval) = hex($1);
if(($binval>0)&&($binval<256))
{
local($htmlval) = pack("C",$binval);
s/%$1/$htmlval/;
}
}
$tmpval .= $_;
}
$forms{$attr} = $tmpval;
}
%forms;
}
sub getquery
{
local($query)=@_;
local($page)="