#!/usr/bin/perl
$|=1;
# Copyright 1999 Jose M. Vidal
# Authors:
# Jose M. Vidal, vidal@multiagent.com, http://jmvidal.ece.sc.edu
# Michael Schmitz <mschmitz@sema.de>
#
# This program is free software.  You can redistribute it and/or modify
# it under the terms of the GNU General Public License
#
# $Id: search.pl,v 1.33 2003/05/11 19:31:23 jmvidal Exp $

use CGI param, header, escape;
use Fcntl;

$dbase   = "urls.db";      ## Name of db file.
$otherbase  = "/etc/bk2site/searchbase.html";  ##Name of the template file
$searchprog = "/cgi-bin/bk2site/search.pl"; ##CHANGE this if your program is somewhere else.

# makes the functions that emulate bk2site's builtin function print
# debug outout to STDERR
$debug_emulateBuiltins = 0;

print header();
 
$database = param("db");      ## Name of db file.
if ($database eq "") {
  $database = $dbase;
};
unless ($database =~ /^[\w\.\/]+/ ) {
  print "Bad database $database name";
  exit;
}

$query = param("q"); 

$num = param("num");
unless ($num =~ /^[0-9]*$/ ) {
  print "Bad num";
  exit;
}
if ($num eq "") {
  $num = 20;
};
if ($num < 1) {
  $num = 20;
};
$stq = param("stq");
unless ($stq =~ /^[0-9]*$/ ) {
  print "Bad stq $stq";
  exit;
}
if ($stq eq "") {
  $stq = 0;
};
$escquery = escape($query);

# this was not secure
# open(DB,$database) ||
sysopen(DB, $database, O_RDONLY) ||
  die ("Can't open database(urls.db)= $database");
$checkDB = <DB>;
chop($checkDB);
$usingmetadata = 0;
if ($checkDB eq "#bk2site urls.db generated file metadata") {
  $numlines = 10;
  $usingmetadata = 1;
}
else {
  if ($checkDB ne "#bk2site urls.db generated file") {
    die ("Invalid database: $database");
  }
  else {
    $numlines = 9;
  }
}


$urltemplate = <DB>;
chop($urltemplate);
$newgif = <DB>;
chop($newgif);
$timecutoff = <DB>;
chop($timecutoff);
$oldesttime = time - $timecutoff * 86400;

open(OTHERBASE,$otherbase)||
  die ("Can't open $otherbase");

$temp = $urltemplate;
while ( $temp =~ /%IFCOMHAS\((.*?)\)\((.*?)\)/ ){
  push @directives, $1;
#   if ($1 eq $query) {
#     $query = $query;
#   }
  $temp =~ s/%IFCOMHAS\(.*?\)\(.*?\)//;
};


while (<OTHERBASE>) {
  if (/(.*)%QUERY(.*)/){
    $_ =~ s/%QUERY/$query/g;
  }
  if (/(.*)%ESCQUERY(.*)/){
    $_ =~ s/%ESCQUERY/$escquery/g;
  }
  if (/(.*)%NUMBER(.*)/){
    $_ =~ s/%NUMBER/$num/g;
  }
  if (/(.*)<!--bk2site:search-->(.*)/){
    $_ =~ s/<!--bk2site:search-->/$database/g;
  }
  if (/(.*)<!--bk2site-->(.*)/){
    print "$1";
    $afterComment = $2;
    last;
  }
  print;
}
print "<!-- Code below was automatically generated by bk2site-->\n";

@allRecords = <DB>; #there are 8 lines per record, so mutiply index by 8
## 0 is the type
## 1 is the ParentTitle
## 2 is the url or relative dir
## 3 is the Title
## 4 is the comment
## 5 is the creation time
## 6 is the last modified time
## 7 is the last visit time
## 8 is the number of user hits
## 9 is meta data

$typeN = 0;
$parentTitleN = 1;
$urlN = 2;
$titleN = 3;
$commentN = 4;
$creationtimeN = 5;
$modifiedtimeN = 6;
$visittimeN = 7;
$urlhitsN = 8;
$metaN = 9;

$numRecords = ($#allRecords + 1)/$numlines;

for ($i=0; $i <= $numRecords; ++$i){ #initialize hits to 0
  $numHits[$i] = 0; ##keyword hits
  $order[$i] = $i; ## given them an initial ordering
}

$categoryMatches = 0;
$siteMatches = 0;

if (length($query) > 1) {
  for ($i=0; $i < $numRecords; ++$i){ #do search
    $cr = ($i * $numlines);
    @words = split(/ +/, $query);
    $url = $allRecords[$cr + $urlN];
    $title = $allRecords[$cr + $titleN];
    $comment = $allRecords[$cr + $commentN];
    $comment =~ s/<BR>/ /g;
    if ($usingmetadata){
      $meta = $allRecords[$cr + $metaN];
    };

    @hasdirectives = ();
    foreach $dir (@directives) { #directives are strings that appear %IFCOMHAS(here)(new stuff)
      $temp = $dir;
      $temp =~ s/([\*\+\?\$\.^])/\\$1/g; #escape all nasties, to allow *cool* to be a directive
      if ($usingmetadata) {
	if ($comment =~ s/$temp//g || $meta =~ s/$temp//g ) {
	push @hasdirectives, $dir;
	};
      }
      else {
	if ($comment =~ s/$temp//g) {  #take *cool* out of comment and place it in hasdirectives.
	  push @hasdirectives, $dir;
	};
      };
    };
    
    foreach $q2 (@words) {
      if (length($q2) < 2) { #ignore 1-char queries
	next;
      };
	
      $q = $q2;
      $oldNumHits = $numHits[$i];

      foreach $dir (@hasdirectives){ #search also on the directives that were taken out of the comment
	if ($q eq $dir) {
	  $numHits[$i]++;
	}
      };

      $q =~ s/([\*\+\?\$\.^])/\\$1/g; #escape all nasties 
    
      # Query only URL's for LEAFS, not relative directories of FOLDER.
      if (($allRecords[$cr + $typeN]) eq "LEAF\n") {
        $numHits[$i] += ($url =~ s/($q)/\1/gi);
      }
      $numHits[$i] += ($title =~ s/($q)/<B>\1<\/B>/gi);

      if ($usingmetadata){
	$numHits[$i] += ($comment =~ s/($q)/<B>\1<\/B>/gi || $meta =~ s/($q)/<B>\1<\/B>/gi);
      }
      else {
	$numHits[$i] += ($comment =~ s/($q)/<B>\1<\/B>/gi);
      };

      if (($oldNumHits == 0) && ($numHits[$i] > 0)) {
	if (($allRecords[$cr + $typeN]) eq "FOLDER\n") {
	  $categoryMatches++; }
	else {
	  if (($allRecords[$cr + $typeN]) eq "LEAF\n") {
	  $siteMatches++;
	}
	  else {
	    print "ERROR: bad format in database file\n";
	    exit;
	};
	};
      };
    };
# Do not store url because it has not been changed.
#    $allRecords[$cr + $urlN] = $url;
    $allRecords[$cr + $titleN] = $title;
    if (@hasdirectives){
      foreach $dir (@hasdirectives) {
	$comment .= $dir . " ";
      };
    };
    $allRecords[$cr + $commentN] = $comment;
  };
};

@order = sort {$numHits[$b] <=> $numHits[$a]} @order;

print "<CENTER><B>$categoryMatches category matches and $siteMatches site matches.";
$endIndex = $stq + $num -1;
$maxIndex = $categoryMatches + $siteMatches - 1;
if ($endIndex > $maxIndex) {
  $endIndex = $maxIndex;
}
if ($stq > 0 || $endIndex < $maxIndex) {
  print " Showing $stq -- $endIndex.";
}
print "</B></CENTER>\n";

# Inside the following loops indexMatches is preincremented
$Index = -1;

if (($categoryMatches > 0) && ($categoryMatches > $stq)) {
  print "<B>Category Matches: $categoryMatches</B><BR>\n<UL>\n";
  for ($i=0; $i < $numRecords; ++$i){
    $rn = $order[$i];
    if (($numHits[$rn] > 0) && ($allRecords[($rn*$numlines) + $typeN] eq "FOLDER\n")) {
      $Index++;
      if ($Index < $stq) {
	next;
      } 
      elsif ($Index > $endIndex) {
	last;
      }
      $pt = $allRecords[($rn*$numlines)+$parentTitleN];
      chop($pt);
      $url = $allRecords[($rn*$numlines)+$urlN];
      $url =~ s/\?/%3F/g;
      chop($url);
      $title = $allRecords[($rn*$numlines)+$titleN];
      chop($title);
#      $title =~ tr/_/ /;
#      print "<LI>$pt<A HREF =\"../$url\">$title</A>\n";
#urls for directories have searchtorootpath already prepended.
      print "<LI>$pt<A HREF =\"$url\">$title</A>\n";
    }
  }
  print "</UL>\n";
}
else {
  $Index = $categoryMatches -1;
}

if (($siteMatches > 0) && (($categoryMatches + $siteMatches) > $stq)){
  print "<B>Site Matches: $siteMatches</B><BR>\n<UL>\n";
  for ($i=0; $i < $numRecords; ++$i){
    $rn = $order[$i];
    if (($numHits[$rn] > 0) && ($allRecords[($rn*$numlines) + $typeN] eq "LEAF\n")) {
      $pt = $allRecords[($rn*$numlines)+$parentTitleN];
      chop($pt);
      $notprinteddir = 1;
      for ($j=0; $j < $numRecords; ++$j){
	$rn = $order[$j];
	$npt = $allRecords[($rn*$numlines)+$parentTitleN]; 
	chop($npt);
	if (($numHits[$rn] > 0) && ($allRecords[($rn*$numlines) + $typeN] eq "LEAF\n") &&
	    ($npt eq $pt)) {
	  $numHits[$rn] = 0; #so we dont get chosen again.
	  $Index++;
	  if ($Index < $stq) {
	    next;
	  } 
	  elsif ($Index > $endIndex) {
	    last;
	  }
	  if ($notprinteddir){
	    print "<LI>$pt\n<UL>\n";
	    $notprinteddir =0;
	  };
	  $url = $allRecords[($rn*$numlines)+$urlN];
	  chop($url);
	  $title = $allRecords[($rn*$numlines)+$titleN];
	  chop($title);
	  $title =~ tr/_/ /;
	  $comment = $allRecords[($rn*$numlines)+$commentN];
	  chop($comment);
	  $numberofhits = $allRecords[($rn*$numlines)+$urlhitsN];
	  $creationtime = $allRecords[($rn*$numlines)+$creationtimeN];
	  ($ctsec, $ctmin, $cthour, $ctday, $ctmon, $ctyear, $ctwday, $ctyday, $ctisdst) = gmtime $creationtime;
	  $ctyearf = $ctyear + 1900;
	  if ($ctyear >= 100){
	    $ctyear -= 100;
	  }
	  $ctfull = localtime $creationtime;
	  $ctmon++;
	  $visittime = $allRecords[($rn*$numlines)+$visittimeN];
	  ($vtsec, $vtmin, $vthour, $vtday, $vtmon, $vtyear, $vtwday, $vtyday, $vtisdst) = gmtime $visittime;
	  $vtyearf = $vtyear + 1900;
	  if ($vtyear >= 100){
	    $vtyear -= 100;
	  }
	  $vtfull = localtime $visittime;
	  $vtmon++;
	  $modtime = $allRecords[($rn*$numlines)+$modifiedtimeN];
	  ($mtsec, $mtmin, $mthour, $mtday, $mtmon, $mtyear, $mtwday, $mtyday, $mtisdst) = gmtime $modtime;
	  $mtyearf = $mtyear + 1900;
	  if ($mtyear >= 100){
	    $mtyear -= 100;
	  }
	  $mtfull = localtime $modtime;
	  $mtmon++;
	  $urlhtml = $urltemplate;

#Use this line (instead of next one) if you want to count hits.
#	  $urlhtml =~ s/%URL/\/cgi-bin\/redirect.pl?url=$url/g;
	  $urlhtml =~ s/%URL/$url/g;
	  $urlhtml =~ s/%TITLE/$title/g;
	  $urlhtml =~ s/%HITS/$numberofhits/g;
	  $urlhtml =~ s/%DAYCRE/$ctday/g;
	  $urlhtml =~ s/%DAYVIS/$vtday/g;
	  $urlhtml =~ s/%DAYMOD/$mtday/g;
	  $urlhtml =~ s/%MONTHCRE1/$ctmon/g;
	  $urlhtml =~ s/%MONTHVIS1/$vtmon/g;
	  $urlhtml =~ s/%MONTHMOD1/$mtmon/g;
	  $urlhtml =~ s/%YEARCRE/$ctyear/g;
	  $urlhtml =~ s/%YEARFCRE/$ctyearf/g;
	  $urlhtml =~ s/%YEARVIS/$vtyear/g;
	  $urlhtml =~ s/%YEARFVIS/$vtyearf/g;
	  $urlhtml =~ s/%YEARMOD/$mtyear/g;
	  $urlhtml =~ s/%YEARFMOD/$mtyearf/g;
	  $urlhtml =~ s/%TIMEFCRE/$ctfull/g;
	  $urlhtml =~ s/%TIMEFVIS/$vtfull/g;
	  $urlhtml =~ s/%TIMEFMOD/$mtfull/g;
	  while ( $urlhtml =~ /%IFCOMHAS\((.*?)\)\((.*?)\)/ ){
	    $v1 = $1;
	    $v2 = $2;
	    $v1 =~ s/([*+?$.^])/\\$1/g; 
	    if ($comment =~ s/$v1//g) {
	      $urlhtml =~ s/%IFCOMHAS\(.*?\)\(.*?\)/$v2/;
	    }
	    else {
	      $urlhtml =~ s/%IFCOMHAS\(.*?\)\(.*?\)//;
	    };
	  };
	    
	  if ($creationtime > $oldesttime) {
	    $timestring = "$ctmon/$ctday/$ctyear";
	    $newgifhtml = $newgif;
	    $newgifhtml =~ s/%URL/$url/g;
	    $newgifhtml =~ s/%TITLE/$title/g;
	    $newgifhtml =~ s/%HITS/$numberofhits/g;
	    $newgifhtml =~ s/%DAYCRE/$ctday/g;
	    $newgifhtml =~ s/%DAYVIS/$vtday/g;
	    $newgifhtml =~ s/%DAYMOD/$mtday/g;
	    $newgifhtml =~ s/%MONTHCRE1/$ctmon/g;
	    $newgifhtml =~ s/%MONTHVIS1/$vtmon/g;
	    $newgifhtml =~ s/%MONTHMOD1/$mtmon/g;
	    $newgifhtml =~ s/%YEARCRE/$ctyear/g;
	    $newgifhtml =~ s/%YEARFCRE/$ctyearf/g;
	    $newgifhtml =~ s/%YEARVIS/$vtyear/g;
	    $newgifhtml =~ s/%YEARFVIS/$vtyearf/g;
	    $newgifhtml =~ s/%YEARMOD/$mtyear/g;
	    $newgifhtml =~ s/%YEARFMOD/$mtyearf/g;
	    $newgifhtml =~ s/%TIMEFCRE/$ctfull/g;
	    $newgifhtml =~ s/%TIMEFVIS/$vtfull/g;
	    $newgifhtml =~ s/%TIMEFMOD/$mtfull/g;
	    $urlhtml =~ s/%NEW/$newgifhtml/g;
	  }
	  else {
	    $urlhtml =~ s/%NEW//g;
	  };
	  if ($comment ne "") {
	    $urlhtml =~ s/%CONDDASH/-/g;
	  }
	  else {
	    $urlhtml =~ s/%CONDDASH//g;
	  };
	  $urlhtml =~ s/%(LONG)?COMMENT/$comment/g;

	  # Let the subroutines that emulate bk2site's builtin
	  # functions process the string before sending it to the
	  # resulting document.
	  $urlhtml = processFunctions(qq{$urlhtml});

	  print "<LI>$urlhtml";
	}
      }
      if ($notprinteddir == 0) {
	print "</UL>\n";
      }
      if ($Index > $endIndex) {
       last;
      }
    }
  }
  print "</UL>\n";
}

if ($stq > 0 || $endIndex < $maxIndex) {
  print "<center>";
  for ($i =0; $i <= $maxIndex; $i += $num){
    if (($stq >= $i) && ($stq < ($i + $num))){
      print " <b>$i</b>";
    }
    else {
      print " <a href=\"$searchprog?num=$num&amp;q=$escquery&amp;stq=$i&amp;db=$database\">$i</a> ";
    }
  }
  print "</center>";
};
print "<!-- Code above was automatically generated by bk2site-->\n";
print $afterComment;

while (<OTHERBASE>){
  if (/(.*)%QUERY(.*)/){
    $_ =~ s/%QUERY/$query/g;
  }
  if (/(.*)%ESCQUERY(.*)/){
    $_ =~ s/%ESCQUERY/$escquery/g;
  }
  if (/(.*)%NUMBER(.*)/){
    $_ =~ s/%NUMBER/$num/g;
  }
  print;
}

close(DB);

sub processFunctions {
  my ($input, $output_prefix) = @_;
  my $output;
  my $prefix;
  my $function;
  my $rest;
  my $args;

  print STDERR "$output_prefix". "entering processFunctions($input)\n" if $debug_emulateBuiltins;
  $output_prefix .= "  ";

  if ( $input =~ /(&(FILL|NOHTML|REPLACE|CUT|NOLINEBREAKS|NOACCENTS|FS)\[)/ ) {
    $prefix = $`;
    $function = $2;
    $rest = $';

    print STDERR "$output_prefix\$prefix = $prefix\n" if $debug_emulateBuiltins;
    print STDERR "$output_prefix\$function = $function\n" if $debug_emulateBuiltins;
    print STDERR "$output_prefix\$rest = $rest\n" if $debug_emulateBuiltins;

    $args = processFunctions($rest, $output_prefix);
    print STDERR "$output_prefix\$args = $args\n" if $debug_emulateBuiltins;

    $res = eval("builtin_$function(q{$args}, \"$output_prefix\")");

    if ( !defined($res) ) {
      # Something was wrong with the arguments to the emulated builtin
      # function. Ideally we should return the unmodified string
      # argument, but if builtin_* hadn't been able to parse the input
      # string it is unfeasible here. I think returning the entire
      # input string is the best we can do.
      #$res = $input;
      $res = "&$function\[$args";
    }

    $output = "$prefix$res";
  }
  else {
    $output = $input;
  }

  print STDERR "$output_prefix" . "returning $output\n" if $debug_emulateBuiltins;
  return($output);
}

# the following builtin function is not yet implemented
sub builtin_FILL {
  my ($arg, $output_prefix) = @_;
  my $result;
  my $string;
  my $column;
  my $rest;

  print STDERR "${output_prefix}FILL($arg)\n" if $debug_emulateBuiltins;
  $output_prefix .= "  ";

  if ( $arg =~ /^([^|[\]]*)\|(\d+)\]/ ) {
    $string = $1;
    $column = $2;
    $rest = $';

    # OK, how do we have to fill $string to column $column? Apart from
    # that, I think this kind of layout aspects should be left to the
    # browser.

    # At this time we simply take $string as the result of filling.
    $result = "$string$rest";
  }
  else {
    # The parameter list if malformed.
    $result = undef;
  }

  print STDERR "${output_prefix}\$result = ${\(defined($result) ? $result : \"undef\" )}\n" if $debug_emulateBuiltins;
  return($result);
}

sub builtin_NOHTML {
  my ($arg, $output_prefix) = @_;
  my $result;
  my $string;
  my $rest;

  print STDERR "${output_prefix}NOHTML($arg)\n" if $debug_emulateBuiltins;
  $output_prefix .= "  ";

  if ( $arg =~ /^([^|[\]]*)\]/ ) {
    $string = $1;
    $rest = $';

    $string =~ s/<[^>]+>//g;

    $result = "$string$rest";
  }
  else {
    # The parameter list if malformed.
    $result = undef;
  }

  print STDERR "${output_prefix}\$result = ${\(defined($result) ? $result : \"undef\" )}\n" if $debug_emulateBuiltins;
  return($result);
}

sub builtin_REPLACE {
  my ($arg, $output_prefix) = @_;
  my $result;
  my $string;
  my $from;
  my $to;
  my $rest;

  print STDERR "${output_prefix}REPLACE($arg)\n" if $debug_emulateBuiltins;
  $output_prefix .= "  ";

  if ( $arg =~ /^([^|[\]]*)\|([^|[\]]*)\|([^|[\]]*)\]/ ) {
    $string = $1;
    $from = qr/$2/;
    $to = $3;
    $rest = $';

    print STDERR "\$string = $string\n" if $debug_emulateBuiltins;
    print STDERR "\$from = $from\n" if $debug_emulateBuiltins;
    print STDERR "\$to = $to\n" if $debug_emulateBuiltins;
    print STDERR "\$rest = $rest\n" if $debug_emulateBuiltins;

    if ( $string && $from ) {
      $string =~ s/$from/$to/g;
    }

    $result = "$string$rest";
  }
  else {
    # The parameter list if malformed.
    $result = undef;
  }

  print STDERR "${output_prefix}\$result = ${\(defined($result) ? $result : \"undef\" )}\n" if $debug_emulateBuiltins;
  return($result);
}

sub builtin_CUT {
  my ($arg, $output_prefix) = @_;
  my $result;
  my $string;
  my $n;
  my $rest;

  print STDERR "${output_prefix}CUT($arg)\n" if $debug_emulateBuiltins;
  $output_prefix .= "  ";

  if ( $arg =~ /^([^|[\]]*)\|(\d+)\]/ ) {
    $string = $1;
    $n = $2;
    $rest = $';

    $result = "${\(substr($string, 0, $n))}$rest";
  }
  else {
    # The parameter list if malformed.
    $result = undef;
  }

  print STDERR "${output_prefix}\$result = ${\(defined($result) ? $result : \"undef\" )}\n" if $debug_emulateBuiltins;
  return($result);
}

sub builtin_NOLINEBREAKS {
  my ($arg, $output_prefix) = @_;

  print STDERR "${output_prefix}NOLINEBREAKS($arg)\n" if $debug_emulateBuiltins;
  $output_prefix .= "  ";

  if ( $arg =~ /^([^|[\]]*)\]/ ) {
    $string = $1;
    $rest = $';

    $string =~ s/[\n\b\t]/ /g;

    $result = "$string$rest";
  }
  else {
    # The parameter list if malformed.
    $result = undef;
  }

  print STDERR "${output_prefix}\$result = ${\(defined($result) ? $result : \"undef\" )}\n" if $debug_emulateBuiltins;
  return($result);
}

# the following builtin function is not yet implemented
sub builtin_NOACCENTS {
  my ($arg, $output_prefix) = @_;

  print STDERR "${output_prefix}NOACCENTS($arg)\n" if $debug_emulateBuiltins;
  $output_prefix .= "  ";

  if ( $arg =~ /^([^|[\]]*)\]/ ) {
    $string = $1;
    $rest = $';

    # What do we have to do with $string???

    # At this time we simply take $string as the result.
    $result = "$string$rest";
  }
  else {
    # The parameter list if malformed.
    $result = undef;
  }

  print STDERR "${output_prefix}\$result = ${\(defined($result) ? $result : \"undef\" )}\n" if $debug_emulateBuiltins;
  return($result);
}

sub builtin_FS {
  my ($arg, $output_prefix) = @_;

  print STDERR "${output_prefix}FS($arg)\n" if $debug_emulateBuiltins;
  $output_prefix .= "  ";

  if ( $arg =~ /^([^|[\]]*)\]/ ) {
    $string = $1;
    $rest = $';

    $string =~ s/([.!?]).*/$1/;

    $result = "$string$rest";
  }
  else {
    # The parameter list if malformed.
    $result = undef;
  }

  print STDERR "${output_prefix}\$result = ${\(defined($result) ? $result : \"undef\" )}\n" if $debug_emulateBuiltins;
  return($result);
}
