<?php

class Search {

var $dir;
var $contentdb; // word -> paths
var $keyworddb; // keyword -> paths
var $timedb;    // path -> time of last index
var $dbtype;
var $open = false;
var $exclude = array('and'=>1, 'or'=>1, 'but'=>1, 'i'=>1, 'a'=>1, 'of'=>1, 'the'=>1);

function Search($search_data_dir) {
	if (function_exists('dba_handlers')) {
		$handlers = dba_handlers();
		if (in_array('db4',$handlers))
			$this->dbtype='db4';
		elseif (in_array('db2',$handlers))
			$this->dbtype='db2';
	}
	else {
		$this->dbtype='none';
	}
		
	$this->dir = $search_data_dir;
	if (!is_dir($this->dir)) {
		if (!is_writeable(dirname($this->dir))) {
			$user = posix_getpwuid(posix_getuid());
			die(dirname($this->dir) . " must be writeable by " . $user['name']);
		}
		else {
			mkdir($this->dir);
		}
	}
		
}

// mode of 'c' is broken before php 4.3.5
function opendb($mode='w') {
	if (!$this->open) {
		foreach(array('content','keyword','time') as $db) {
			if (! file_exists("$this->dir/$db.db") ) {
				$rc = dba_open("$this->dir/$db.db", 'n', $this->dbtype );
				if ($rc) dba_close($rc);
			}
			$dbfield = $db . 'db';
			$filename = "$this->dir/$db.db";
			$handle = dba_open($filename, $mode, $this->dbtype );
			if($handle == false) {
				$msg = "Couldn't write to $filename ($this->dbtype)";
				if (!is_writeable($filename)) {
					$user = posix_getpwuid(posix_getuid());
					$msg .= "<br>That file must be writeable by " . $user['name'];
				}
				die($msg);
			}
			else {
				$this->$dbfield = $handle;
			}
		}
	}
	$this->open = true;
}

function closedb() {
	if ($this->open) {
		dba_close($this->contentdb);        
		dba_close($this->keyworddb);
		dba_close($this->timedb);
	}
	$this->open = false;
}

// deletes all indexes
function clear() {
	$this->closedb();
	@unlink("$this->dir/content.db");
	@unlink("$this->dir/keyword.db");
	@unlink("$this->dir/time.db");	
}

function findClosestPage($s, $currentpath='') {
	if ($this->dbtype=='none') return;
	
	$this->opendb('r');
	$tokens = $this->tokenize($s);
	$pages = '';
	if (dba_exists($s,$this->keyworddb))
		$pages = dba_fetch($s,$this->keyworddb);
	else {
		foreach($tokens as $token) {
			$pages .= dba_fetch($token,$this->keyworddb);
		}
	}
	$this->closedb();
	$pages = explode('|',$pages);
	$pages = array_filter($pages,'strlen');
	$counts = array_count_values($pages);
	arsort($counts,SORT_NUMERIC);
	
	// extract just the highest count getters
	if (count($counts))
		$prevcount = current($counts);
	$top=array();
	foreach($counts as $path => $count) {
		if ($count != $prevcount) break;
		$top[] = $path;
	}
	
	// calculate distance from the current path
	// return shorter first.
	if ($currentpath) {
		$distance=array();
		foreach($top as $path) {
			$distance[$path] = levenshtein($currentpath,$path);
		}
		asort($distance,SORT_NUMERIC);
		$ret = array_keys($distance);
	}
	else {
		$ret = $top;
	}
	return @$ret[0];
}

function findByContent($s,&$ps) {
	if ($this->dbtype=='none') return;

	$timeparts = explode(" ",microtime());
	$starttime = $timeparts[1].substr($timeparts[0],1);
	
	$this->opendb('r');
	$tokens = $this->tokenize($s);
	$pages = '';
	$regexptokens = array();
	foreach($tokens as $token) {
		$kw = dba_fetch($token,$this->keyworddb);
		$pages .= $kw . $kw; // count keywords twice.
		$pages .= dba_fetch($token,$this->contentdb);
		$regexptokens[] = "/$token/i";
	}
	$this->closedb();
	
	$pages = explode('|',$pages);
	$pages = array_filter($pages,'strlen');
	$counts = array_count_values($pages);
	arsort($counts,SORT_NUMERIC);
	$counts = array_slice($counts,0,20); // no paging yet, limit to 20 results.
	$foreach = '';
	$root = $GLOBALS['root']; // hackish
	$html = '';
	foreach($counts as $path => $count) {
		$page = $ps->getPage($path);
		$title = $page->get('title');
		if ($title == '') $title = "Untitled";
		$html .= "<a href=\"$root$path\">$title</a><br/>\n";
		$matches = $page->search($tokens);
		foreach($matches as $line) {
			if (trim($line)) {
				$line = preg_replace('/<[\/\!]*?[^<>]*?>|\*|- |\[|\]|{|}/si', '', $line); // strip html
				$line = preg_replace($regexptokens, '<b>$0</b>', $line);
				$html .= "$line ... ";
			}
		}
		$html .= "<div style='color:green;font-size:small;'>" . substr($path,0,strlen($path)-1) . "</div>";
		$html .= "<p></p>";
	}
	
	$timeparts = explode(" ",microtime());
	$total_time = ($timeparts[1].substr($timeparts[0],1)) - $starttime;
	$time = "<form action=\"./\" method=get><input name=s value=\"" . htmlentities(@$_REQUEST[s]) . "\"><input type=submit value=" . _("Search") . "></form><p><i>" . _("Search Results") . " (" . substr($total_time,0,4) . " " . _("seconds") . ")</i></p>";
	$html = $time . $html;
	return $html;
}

function dump() {
	if ($this->dbtype=='none') return;
	
	$this->opendb();	
	echo "<table><tr><th>key</th><th>value</th></tr>\n";
	foreach(array('contentdb','keyworddb','timedb') as $dbname) {
		$db = &$this->$dbname;
		$key = dba_firstkey( $db );
		while ( $key != false ) {
			$value = dba_fetch( $key, $db);
			print "<tr><td>'$key'</td>";
			print "<td>'$value'</td></tr>\n";
			$key = dba_nextkey( $db );
		}
	}
	echo "</table><br><br>";
}

// updates the indexes for $page
function index($page) {
	if ($this->dbtype=='none') return;
	if ($page->get('index') == false) return;
	$this->opendb();
	
	## update timestamp 	
	$db = &$this->timedb;
	if ( dba_exists("$page->path/",$db) ) {
		$time = dba_fetch("$page->path/", $db);
		if ($time >= $page->get('mtime')) {
			return;
		}
		dba_replace("$page->path/", time(), $db);
	}
	else {
		dba_insert("$page->path/", time(), $db);
	}
	
	## content db 
	$db = &$this->contentdb;
	$content = $page->get('content');
	$content = str_replace("\n",' ',$content);
	$content = preg_replace('/[^\w\s\d]/','',$content);
	$content = strtolower($content);
	$words = explode(" ", $content);
#	$words = array_unique($words);
	foreach($words as $word) {
		$word = trim($word);
		if ($word == '' || strlen($word) <= 2) continue;
		if (isset($this->exclude[$word])) continue;
		
		if ( dba_exists($word,$db) ) {
			$pages = dba_fetch($word, $db);
			if (!strstr($pages,"|$page->path/|")) {
				$pages .= "$page->path/|";
				dba_replace($word, $pages,$db);
			}
		}
		else {
			dba_insert($word,"|$page->path/|",$db);
		}
	}
	
	## keyword db
	$db = &$this->keyworddb;
	$name  = $this->tokenize(basename($page->path));
	$title = $this->tokenize($page->get('title'));
	$keywords = $this->tokenize($page->get('keywords'));
	$words = array_merge($name,$title,$keywords);
#	$words = array_unique($words);
	foreach($words as $word) {
		$word = trim($word);
		if ($word == '') continue;
		if (isset($this->exclude[$word])) continue;
		
		if ( dba_exists($word,$db) ) {
			$pages = dba_fetch($word, $db);
			if (!strstr($pages,"|$page->path/|")) {
				$pages .= "$page->path/|";
				dba_replace($word, $pages,$db);
			}
		}
		else {
			dba_insert($word,"|$page->path/|",$db);
		}
	}
}

// removes the indexes for $page
function deindex($page) {
	if ($this->dbtype=='none') return;
	
	$this->opendb();
	
	// create a new page in case the page we are passed
	// is not what is written to disk.
	$page = $page->ps->getPage($page->path);

	## update timestamp 	
	$db = &$this->timedb;
	if ( dba_exists("$page->path/",$db) ) {
		dba_delete("$page->path/", $db);
	}

	## content db 
	$db = &$this->contentdb;
	$content = $page->get('content');
	$content = str_replace("\n",' ',$content);
	$content = preg_replace("/[^\w\s\d]/",'',$content);
	$content = strtolower($content);
	$words = explode(" ", $content);
#	$words = array_unique($words);
	foreach($words as $word) {
		$word = trim($word);
		if ($word == '' || strlen($word) <= 2) continue;
		if (isset($this->exclude[$word])) continue;		
		if ( dba_exists($word,$db) ) {
			$pages = dba_fetch($word, $db);
			$pages = str_replace("|$page->path/|", '|', $pages);
			if ($pages=='|')
				dba_delete($word, $db);
			else
				dba_replace($word, $pages, $db);
		}
	}
	
	## keyword db
	$db = &$this->keyworddb;
	$name  = $this->tokenize(basename($page->path));
	$title = $this->tokenize($page->get('title'));
	$keywords = $this->tokenize($page->get('keywords'));
	$words = array_merge($name,$title,$keywords);
#	$words = array_unique($words);
	foreach($words as $word) {
		$word = trim($word);
		if ($word == '') continue;
		if (isset($this->exclude[$word])) continue;
		if ( dba_exists($word,$db) ) {
			$pages = dba_fetch($word, $db);
			$pages = str_replace("|$page->path/|", '|', $pages);
			if ($pages=='|')
				dba_delete($word, $db);
			else
				dba_replace($word, $pages, $db);
		}
	}
}

function tokenize($str, $char='') {
	$str = preg_replace('/[^\w\s\d\.\@]/',' ',$str);
	$str = strtolower($str);
	$str = explode(" ", $str);
	$str = array_filter($str,'strlen');
#	if ($char)
#		$str = array_map(create_function('$a','return "' . $char . '":$a";'), $str);
	return $str;
}

function replacePath($old, $new) {
	if ($this->dbtype=='none') return;
	
	$this->opendb();

	$db = &$this->timedb;
	$key = dba_firstkey( $db );
	while ( $key != false ) {
		if (preg_match("'^$old/'", $key)) {
			$value = dba_fetch($key,$db);
			dba_delete($key,$db);
			$key = preg_replace("'^$old/'","$new/",$key);
			dba_insert($key, $value, $db);
		}
		$key = dba_nextkey( $db );
	}

	foreach(array('contentdb','keyworddb') as $dbname) {
		$db = &$this->$dbname;
		$key = dba_firstkey( $db );
		while ( $key != false ) {
			$value = dba_fetch( $key, $db);
			if (strstr($value,"|$old/")) {
				$value = str_replace("|$old/","|$new/",$value);
				dba_replace($key,$value,$db);
			}
			$key = dba_nextkey( $db );
		}
	}
}


// recursively indexes all sub pages
function indexTree($page) {
	if ($this->dbtype=='none') return;
	
	$this->index($page);
	$children = $page->children();
	foreach($children as $child) {
		if ($child->get('index') == true)
			$this->indexTree($child);
	}
}

// recursively deindexes all sub pages
function deindexTree($page) {
	if ($this->dbtype=='none') return;
	
	$this->deindex($page);
	$children = $page->children();
	foreach($children as $child) {
		$this->deindexTree($child);
	}
}

} // end class

?>
