#!/usr/bin/perl -wT # vim: set sw=4 ts=4 si et: # # Copyright: GPL # Author: Guido Socher # Version: 1.5 # use strict; #global data: my %FORM; # # Note: If you would like to modify the web-pages then # just have a look at the end of this file. # # This is a cgi-bin example that feeds the search expressions to websearch # on stdin and is therefore considered to be more secure. #--------------change this according to your site: # # The address of this program (http address): my $url = "http://bearix/cgi-bin/websearch"; # The http address that leads to the docuement root (http address): my $home="http://bearix/hw/"; # the webfgrep program (file path and name): my $webfgrep="/usr/bin/webfgrep"; # the document root on the web-server (directory name): my $docroot="/home/httpd/html/hw"; # #-------------- # # Usage: # webserach # webserach search=1\&expr=linux # delete $ENV{'IFS'}; delete $ENV{'CDPATH'}; delete $ENV{'ENV'}; delete $ENV{'BASH_ENV'}; # print "Content-type: text/html\n\n"; # &cgi_receive; if ($FORM{'search'}){ &search; }else{ &printHTMLpage('greet'); } #-------------------------------------------------------- sub printHTMLpage($){ my $reqpage = shift; #read and print any text between __ $reqpage __ and the next __ my $prt = 0; while(<DATA>){ next unless ($prt || /^__ $reqpage __/o); if (/^__ $reqpage __/o){ $prt = 1; next; } last if (/^__ /); s/\$url/$url/o; print; } } #-------------------------------------------------------- sub search{ my ($how,$expr,$sexpr,$uptime,$load); my $casefold="-i"; my @keys; my $i=0; my $key; my $cmd; $ENV{'PATH'} = "/bin:/usr/bin"; # this is a overload protection. We must not stop web-service # due to too many search requests. $uptime=`/usr/bin/uptime`; if ($uptime=~/average: *([\d\.]+)/){ $load=$1; if ($load > 1.5){ &printHTMLpage('highload'); exit(0); } } unless($FORM{'expr'} && $FORM{'expr'} =~ /\S./){ &printHTMLpage('searcherror'); exit(0); } $expr=$FORM{'expr'}; if ($FORM{'how'} && $FORM{'how'} eq 'substr'){ $how=""; }else{ $how="-a"; } if ($FORM{'csens'}){ $casefold=""; } $expr=~s/\+/ /g; # some search engines use the plus @keys=split(/\s+/,$expr); $sexpr=""; for $i (0..2){ # this is for security reasons: if ($keys[$i] && $keys[$i]=~/([\w\.\-\_]{2,50})/){ $sexpr.="$1,"; } } chop($sexpr); #remove last comma # it must be at least 2 char long: unless($sexpr=~/../){ &printHTMLpage('searcherror'); exit(0); }else{ &printHTMLpage('resulthead'); print"<HTML> <HEAD> <TITLE>search results</TITLE> <BASE HREF=\"$home\"> </HEAD> <HTML> <BODY BGCOLOR=\"#FFFFFF\"> <H2>Search results</H2> "; chdir $docroot || die "ERROR: can not cd to $docroot\n"; # search web-pages until a depth of 2 from docroot. # websearch.$$ is a unique file name. $cmd="$webfgrep $how $casefold -s *.html */*.html>/tmp/websearch.$$"; open(CMD,"|$cmd")||die "ERROR: creating pipe\n"; print CMD "$sexpr\n"; close CMD; open(DAT,"/tmp/websearch.$$")||die "ERROR: can not read /tmp/websearch.$$"; while(<DAT>){ print; } close DAT; unlink("/tmp/websearch.$$"); print "\n<hr size=2 NOSHADE>\n"; print "<A href=\"$url\">Back to search page</a>\n"; print "</BODY></HTML>\n"; } } #-------------------------------------------------------- #-------------------------------------------------------- sub cgi_receive{ my $buffer = ""; my $pair; my $name; my $value; if ($ENV{'GATEWAY_INTERFACE'} && $ENV{'GATEWAY_INTERFACE'} =~ /CGI/){ if ($ENV{'REQUEST_METHOD'} eq 'GET') { if($ENV{'QUERY_STRING'}){ $buffer = $ENV{'QUERY_STRING'}; } }elsif ($ENV{'REQUEST_METHOD'} eq 'POST') { read(STDIN, $buffer,$ENV{'CONTENT_LENGTH'}); }else{ die "webfgrep Unknown REQUEST_METHOD: $ENV{'REQUEST_METHOD'}"; } }else { $buffer = $ARGV[0] if ($ARGV[0]); } # now decode it: # # Split the name-value pairs foreach $pair (split(/&/o, $buffer)){ ($name, $value) = split(/=/, $pair); next unless ($value); # Un-Webify plus signs and %-encoding $value =~ tr/+/ /; $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; $FORM{$name} = $value; } } #-------------------------------------------------------- __END__ __ greet __ <HTML> <HEAD> <TITLE>The search page</TITLE> </HEAD> <BODY BGCOLOR="#FFFFFF"> <h2>Search our web site</H2> <p>This page allows you to search through the web-pages. You may enter up-to 3 key words. Key words must seperated by space. A web-page matches your query if it contains all the key words. </p> <CENTER> <FORM METHOD="get" ACTION="$url"> <TABLE BORDER="1" CELLPADDING="5" cellspacing="0" bgcolor="#7777ff" > <TR> <TD ALIGN="center"> <INPUT TYPE="text" NAME="expr" SIZE=30 VALUE=""> </TD> <TD ALIGN="left"> <INPUT TYPE="radio" NAME="how" VALUE="anchor">exact words<BR> <INPUT TYPE="radio" NAME="how" VALUE="substr" CHECKED>substring <INPUT TYPE="hidden" NAME="search" VALUE="1"> </TD> <TD ALIGN="left"> <INPUT TYPE="checkbox" NAME="csens" VALUE="1">case sensitive<BR> </TD> </TR> <TR> <TD COLSPAN="3" VALIGN="top" ALIGN="center"> <INPUT TYPE="submit" VALUE="Search"> </TD> </TR> </TABLE> </FORM> </CENTER> <HR width="100%" size=2 NOSHADE > <p>This webserach page is maintained by <A href="mailto:xx@unknown.com?subject=comment-on-search">Put Your Name</A> </p> </BODY> </HTML> __ highload __ <HTML> <HEAD> <TITLE>too high load error</TITLE> </HEAD> <HTML> <BODY> <p>Sorry, the server processes currently too many requests. Please use one of our mirror sites or try again later.</p> </BODY> </HTML> __ searcherror __ <HTML> <HEAD> <TITLE>search error</TITLE> </HEAD> <HTML> <BODY> <p>Sorry, I could not understand your request. You need to enter one or more key words.</p> </BODY> </HTML> __ ende __