/*
** Copyright (C) 2003-2006 Teus Benschop.
**  
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
**  
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
** GNU General Public License for more details.
**  
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
**  
*/


#include "utilities.h"
#include "search_utils.h"
#include "indexer.h"
#include "bible.h"
#include "gtkwrappers.h"
#include "references.h"
#include "bibletime.h"
#include <sqlite3.h>
#include "sqlite_reader.h"
#include "gwrappers.h"
#include "gtkwrappers.h"


void search_string_internal (const ustring& searchfor, bool casesensitive, 
                             const ustring& project, vector<ustring>& results,
                             bool use_book_selection, set<ustring>& books)
{
  sqlite3 *db;
  int rc;
  char *error = NULL;
  try
  {
    // Connect to database.
    Indexer worddatabase (0);
    rc = sqlite3_open(worddatabase.dbfilename (project).c_str (), &db);
    if (rc) {
      throw runtime_error (sqlite3_errmsg(db));
    }
    sqlite3_busy_timeout (db, 2000);
    // Get the string to search for. Note any apostrophies need to be doubled for SQLite.
    // We need to normalize the search expression, as prescribed, when comparing strings.
    ustring localsearchword;
    localsearchword = searchfor;
    localsearchword = localsearchword.normalize ();
    if (!casesensitive)
      localsearchword = localsearchword.casefold ();
    localsearchword = double_apostrophy (localsearchword);
    /*
    There is an additional complication because we use the SQL glob function
    to get the results quickly from the database. But this implies that the
    characters that are used in Unix globbing work different from what is
    expected.
    These characters are four: * ? [ ]
    Measures are taken so that these four characters behave as should, that is,
    searching for * really searches for the character * and does not work like
    in Unix filename globbing. The same thing for the other characters.
    The problem is solved by looking whether the user searches for one of these
    characters. If so, all data is retrieved from the database, and a manual
    search is performed on the results. It slows down the process a little.
    */
    // Open a reader.
    SqliteReader reader (0);
    if (localsearchword.find_first_of ("*?[]") != string::npos) {
      // Ok, any of the special characters are present. Slower search.
      // Execute the SQL statement, and read the results.
      char * sql;
      if (casesensitive) {
        sql = g_strdup_printf ("select book, chapter, verse, line from 'lines';");
      } else {
        sql = g_strdup_printf ("select book, chapter, verse, casefolded from 'lines';");
      }
      rc = sqlite3_exec(db, sql, reader.callback, &reader, &error);
      g_free (sql);
      if (rc != SQLITE_OK) {
        throw runtime_error (error);
      }
      for (unsigned int i = 0; i < reader.ustring0.size(); i++) {
        ustring line = reader.ustring3[i];
        if (line.find (localsearchword) != string::npos) {
          if (use_book_selection) {
            string book = index_to_biblebook (convert_to_int (reader.ustring0[i]));
            if (books.find (book) != books.end ()) {
              results.push_back (book_chapter_verse_to_reference (convert_to_int (reader.ustring0[i]), convert_to_int (reader.ustring1[i]), reader.ustring2[i]));
            }
          } else {
            results.push_back (book_chapter_verse_to_reference (convert_to_int (reader.ustring0[i]), convert_to_int (reader.ustring1[i]), reader.ustring2[i]));
          }
        }
      }
    } else {
      // Ok, no special characters: fast search.
      // Execute the SQL statement, and read the results.
      char * sql;
      if (casesensitive) {
        sql = g_strdup_printf ("select book, chapter, verse from 'lines' where line glob ('*%s*');", localsearchword.c_str ());
      } else {
        sql = g_strdup_printf ("select book, chapter, verse from 'lines' where casefolded glob ('*%s*');", localsearchword.c_str ());
      }
      rc = sqlite3_exec(db, sql, reader.callback, &reader, &error);
      g_free (sql);
      if (rc != SQLITE_OK) {
        throw runtime_error (error);
      }
      for (unsigned int i = 0; i < reader.ustring0.size(); i++) {
        if (use_book_selection) {
          string book = index_to_biblebook (convert_to_int (reader.ustring0[i]));
          if (books.find (book) != books.end ()) {
            results.push_back (book_chapter_verse_to_reference (convert_to_int (reader.ustring0[i]), convert_to_int (reader.ustring1[i]), reader.ustring2[i]));
          }
        } else {
          results.push_back (book_chapter_verse_to_reference (convert_to_int (reader.ustring0[i]), convert_to_int (reader.ustring1[i]), reader.ustring2[i]));
        }
      }
    }
  }
  catch (exception & ex)
  {
    gw_critical (ex.what ());
    gtkw_dialog_error (NULL, ex.what());
  }
  // Close connection.  
  sqlite3_close (db);
}


void search_string (Session * session, const ustring& project, 
                    bool use_books_selection, vector<ustring>& results)
{
  search_string_internal (session->searchword, session->search_case_sensitive, project, results, use_books_selection, session->selected_books);
}


void text_was_being_indexed_warning (GtkWidget * parentwindow)
{
  gtkw_dialog_warning (parentwindow, "At the time of searching, the text was still being indexed.\n"
                                     "Therefore the results may or may not be up to date.");
}


vector<ustring> search_in_bibledit (Configuration * configuration, Progress * progress, Session * session)
/*
This function searches in bibledit, and handles things like word boundaries,
globbing and so forth.
*/
{
  // Progress information.
  progress->start();

  // Storage for intermediate search results.
  vector<bool> includes;     // Whether to include this result in the selection process.
  vector<ustring> refs;      // References
  vector<ustring> lines;     // Lines are stored here for higher speed.
  // Glob-style pattern matching pattern specification.
  GPatternSpec * gpatternspec = NULL;
  ustring localsearchword, localsearchword2;
  // Database variables.
  sqlite3 *db;
  int rc;
  char *error = NULL;
  try
  {
    // Connect to database.
    Indexer worddatabase (0);
    rc = sqlite3_open(worddatabase.dbfilename (configuration->project).c_str (), &db);
    if (rc) {
      throw runtime_error (sqlite3_errmsg(db));
    }
    sqlite3_busy_timeout (db, 2000);
    // Get the string to search for. Note any apostrophies need to be doubled for SQLite.
    // We need to normalize the search expression, as prescribed, when comparing strings.
    localsearchword = session->searchword;
    localsearchword = localsearchword.normalize ();
    if (!session->search_case_sensitive)
      localsearchword = localsearchword.casefold ();
    ustring patternword = "*" + localsearchword + "*";
    localsearchword2 = localsearchword;
    localsearchword = double_apostrophy (localsearchword);
    // Compile pattern specification.
    gpatternspec = g_pattern_spec_new (patternword.c_str());
    // To enable proper progress feedback, get the line count from the database.
    {
      SqliteReader reader (0);
      char * sql;
      sql = g_strdup_printf ("select count(*) from 'lines';");
      rc = sqlite3_exec(db, sql, reader.callback, &reader, &error);
      g_free (sql);
      if (rc != SQLITE_OK) {
        throw runtime_error (error);
      }
      gint count;
      count = convert_to_int (reader.ustring0[0]);
      progress->set_iterate (0, 1, count);
      progress->set_text ("Searching stage one...");
    }
    /*
    There is an additional complication because we would use the SQL glob 
    function to get the results quickly from the database. But this implies that 
    the characters that are used in Unix globbing work different from what is
    expected.
    These characters are four: * ? [ ]
    This search function allows for the globbing characters * and ?.
    We cannot rely on the globbing function built in sqlite, as the Unicode part
    of it has not been developed enough. 
    We therefore rely on the Glob-style pattern matching functions of Glib.
    All data is retrieved from the database, and a manual search is performed 
    on the results.
    */
    // Open a reader.
    SqliteReader reader (0);
    // Execute the SQL statement, and read the results.
    // All the categories of text are stored in the reader.
    // And later we'll sort out what we need, based on the area(s) we search in.
    char * sql;
    if (session->search_case_sensitive) {
      sql = g_strdup_printf ("select book, chapter, verse, line, id, intro, head, chap, study, note, ref, vs from lines;");
    } else {
      sql = g_strdup_printf ("select book, chapter, verse, casefolded, idcf, introcf, headcf, chapcf, studycf, notecf, refcf, vscf from lines;");
    }
    rc = sqlite3_exec(db, sql, reader.callback, &reader, &error);
    g_free (sql);
    if (rc != SQLITE_OK) {
      throw runtime_error (error);
    }
    // At this point we have dealt with the case sensitivity,
    // though we still have not yet looked at the word, whether it's there.
    for (unsigned int i = 0; i < reader.ustring0.size(); i++) {
      progress->iterate ();
      if (progress->cancel) {
        progress->finish();
        throw runtime_error ("Search cancelled");
        g_pattern_spec_free (gpatternspec);
        return refs;
      }
      // Assemble the line of text we have to search through.
      ustring line;
      switch (session->area_type) {
        case atRaw : 
          line = reader.ustring3[i];
          break;
        case atAll :
          line = reader.ustring4[i];
          line.append (" ");
          line.append (reader.ustring5[i]);
          line.append (" ");
          line.append (reader.ustring6[i]);
          line.append (" ");
          line.append (reader.ustring7[i]);
          line.append (" ");
          line.append (reader.ustring8[i]);
          line.append (" ");
          line.append (reader.ustring9[i]);
          line.append (" ");
          line.append (reader.ustring10[i]);
          line.append (" ");
          line.append (reader.ustring11[i]);
          break;
        case atSelection :
          if (session->area_id) {
            line.append (reader.ustring4[i]);
            line.append (" ");
          }
          if (session->area_intro) {
            line.append (reader.ustring5[i]);
            line.append (" ");
          }
          if (session->area_heading) {
            line.append (reader.ustring6[i]);
            line.append (" ");
          }
          if (session->area_chapter) {
            line.append (reader.ustring7[i]);
            line.append (" ");
          }
          if (session->area_study) {
            line.append (reader.ustring8[i]);
            line.append (" ");
          }
          if (session->area_notes) {
            line.append (reader.ustring9[i]);
            line.append (" ");
          }
          if (session->area_xref) {
            line.append (reader.ustring10[i]);
            line.append (" ");
          }
          if (session->area_verse) {
            line.append (reader.ustring11[i]);
          }
          break;
      }
      // Deal with the book selection.
      // And deal with the "search in current book only" setting.
      string book = index_to_biblebook (convert_to_int (reader.ustring0[i]));
      if (session->search_current_book) {
        if (book != configuration->book_opened)
          continue;          
      } else {
        if (session->selected_books.find (book) == session->selected_books.end ())
          continue;
      }
      // Deal with wildcards / globbing or not.
      if (session->search_globbing && (session->search_page == 1)) {
        // Use glob-style pattern matching here.
        if (!g_pattern_match_string (gpatternspec, line.c_str()))
          continue;
      } else {
        if (line.find (localsearchword2) == string::npos)
          continue;
      }
      // Store results for the first stage.
      includes.push_back (true);
      refs.push_back (book_chapter_verse_to_reference (convert_to_int (reader.ustring0[i]), convert_to_int (reader.ustring1[i]), reader.ustring2[i]));
      lines.push_back (line);
    }
  }
  catch (exception & ex)
  {
    gw_critical (ex.what ());
    if (!progress->cancel) {
      ustring message;
      message = ex.what();
      message.append ("\n\nTry again");
      gtkw_dialog_error (NULL, message);
    }
  }
  // Close connection.  
  sqlite3_close (db);

  /*
  Deal with matching the start of a word and/or of the end.
  
  There are four cases here.
  1. Match word start only.
  2. Match word end only.
  3. Both match start end end of a word, which implies "whole word".
  4. No matching at all.
  
  Boundary resolution is handled by pango_break(). Textual boundaries such 
  as word boundaries and line boundaries are determined for each item.
  In most cases a general algorithm suffices for this process, but in some
  cases a language module will override the generic algorithm with a more
  specific one.
  It seems to be easier programming to use GtkTextIter and GtkTextBuffer,
  rather than pango_break() directly.
  */

  // Progress feedback.
  progress->set_iterate (0, 1, includes.size());
  progress->set_text ("Searching stage two...");

  // Create a textbuffer for determining word boundaries.
  GtkTextBuffer * textbuffer;
  textbuffer = gtk_text_buffer_new (NULL);
  // Iterators needed.
  GtkTextIter startiter;
  GtkTextIter enditer;
  
  // Compile a new glob-style pattern to compare with.
  // The pattern compiled depends on the begin/end word matching.
  g_pattern_spec_free (gpatternspec);
  gpatternspec = NULL;
  if (session->search_start_word_match && !session->search_end_word_match) {
    ustring patternword = localsearchword2 + "*";
    gpatternspec = g_pattern_spec_new (patternword.c_str());
  }
  if (!session->search_start_word_match && session->search_end_word_match) {
    ustring patternword = "*" + localsearchword2;
    gpatternspec = g_pattern_spec_new (patternword.c_str());
  }
  if (session->search_start_word_match && session->search_end_word_match) {
    ustring patternword = localsearchword2;
    gpatternspec = g_pattern_spec_new (patternword.c_str());
  }

  // Go through all results and deal with the word start/end matching.
  // But do this only for advances searching.
  if (session->search_page == 1) {
  
    for (unsigned int i = 0; i < includes.size(); i++) {
    
      // Progress.
      progress->iterate ();
      if (progress->cancel) {
        progress->finish();
        g_object_unref (textbuffer);
        if (gpatternspec)
          g_pattern_spec_free (gpatternspec);
        return refs;
      }
      // Whether the word matches.
      bool match = false;
    
      // Text to work on is placed in the buffer.
      gtk_text_buffer_set_text (textbuffer, lines[i].c_str(), -1);

      // Store segments of text to compare against.
      vector<ustring> segments;
    
      // Deal with case one: Match word start only.  
      if (session->search_start_word_match && !session->search_end_word_match) {
        // Collect all strings starting with a word.
        gtk_text_buffer_get_start_iter (textbuffer, &startiter);
        gtk_text_buffer_get_end_iter (textbuffer, &enditer);
        while (gtk_text_iter_forward_word_end (&startiter)) {
          gtk_text_iter_backward_word_start (&startiter);    
          segments.push_back (gtk_text_iter_get_text (&startiter, &enditer));
          gtk_text_iter_forward_word_end (&startiter);
        }
        // See whether the word is in it.
        for (unsigned int i2 = 0; i2 < segments.size(); i2++) {
          if (session->search_globbing) {
            // Glob-style pattern matching.
            if (g_pattern_match_string (gpatternspec, segments[i2].c_str())) {
              match = true;
              break;
            }
          } else {
            // Straight compare.
            if (segments[i2].find (localsearchword2) == 0) {
              match = true;
              break;
            }
          }
        }
      }
    
      // Deal with case two: Match word end only.  
      if (!session->search_start_word_match && session->search_end_word_match) {
        // Collect all strings ending with a word.
        gtk_text_buffer_get_start_iter (textbuffer, &startiter);
        gtk_text_buffer_get_start_iter (textbuffer, &enditer);
        while (gtk_text_iter_forward_word_end (&enditer)) {
          segments.push_back (gtk_text_iter_get_text (&startiter, &enditer));
        }
        // See whether the word is in it.
        for (unsigned int i2 = 0; i2 < segments.size(); i2++) {
          if (session->search_globbing) {
            // Glob-style pattern matching.
            if (g_pattern_match_string (gpatternspec, segments[i2].c_str())) {
              match = true;
              break;
            }
          } else {
            // Straight compare.
            size_t matchposition;
            matchposition = segments[i2].length() - localsearchword2.length();
            // Negative match positions cause a false match. Solve that here.
            matchposition = CLAMP (matchposition, 0, 99999999);
            if (segments[i2].find (localsearchword2) == matchposition) {
              match = true;
              break;
            }
          }
        }
      }
    
      // Deal with case three: Match both word start and end.  
      // Interpreted as "match whole word".
      if (session->search_start_word_match && session->search_end_word_match) {
        // Collect all whole words.
        gtk_text_buffer_get_start_iter (textbuffer, &enditer);
        while (gtk_text_iter_forward_word_end (&enditer)) {
          startiter = enditer;
          gtk_text_iter_backward_word_start (&startiter);    
          segments.push_back (gtk_text_iter_get_text (&startiter, &enditer));
        }
        // See whether the word is in it.
        for (unsigned int i2 = 0; i2 < segments.size(); i2++) {
          if (session->search_globbing) {
            // Glob-style pattern matching.
            if (g_pattern_match_string (gpatternspec, segments[i2].c_str())) {
              match = true;
              break;
            }
          } else {
            // Straight compare.
            if (segments[i2] == localsearchword2) {
              match = true;
              break;
            }
          }
        }
      }
    
      // Case four: Nothing to test, so set found to true.
      if (!session->search_start_word_match && !session->search_end_word_match)
        match = true;
    
      // Mark this reference for matching or not.
      includes[i] = match;
    
    }
  }

  // Free memory.
  g_object_unref (textbuffer);
  if (gpatternspec)
    g_pattern_spec_free (gpatternspec);
  
  {
    // Pick out references that matched in the second stage.
    vector<ustring> refs2;
    for (unsigned int i = 0; i < refs.size(); i++) {
      if (includes[i])
        refs2.push_back(refs[i]);
    }
    refs.clear();
    refs.assign (refs2.begin(), refs2.end());
  }
    
  // Progress end.
  progress->finish();
  
  // Return search results.
  return refs;
}


vector<ustring> search_in_bibletime (Configuration * configuration, Progress * progress, Session * session, BibleTime * bibletime)
/*
This handles the bibletime search functions.
*/
{
  progress->start();
  progress->set_fraction (0.5);
  progress->set_text ("Searching in BibleTime");

  // Storage for search results.
  vector<ustring> searchresults;

  // We need to normalize the search expression, as prescribed, when comparing strings.
  ustring localsearchword;
  localsearchword = session->searchword;
  localsearchword = localsearchword.normalize ();

  // Do the actual search.
  switch (session->searchbibletimetype) {
    case sbttDefaultBible: 
      searchresults = bibletime->search_in_default_bible (session->searchword);
      break;
    case sbttOpenModules:
      searchresults = bibletime->search_in_open_modules (session->searchword);
      break;
    case sbttBible:
      searchresults = bibletime->search_in_module (session->search_bibletime_bible, session->searchword);
      break;
    case sbttCommentary:
      searchresults = bibletime->search_in_module (session->search_bibletime_commentary, session->searchword);
      break;
  }

  // Change the results to our format.
  vector<ustring> searchresults2;
  for (unsigned int i = 0; i < searchresults.size(); i++) {
    size_t position;
    position = searchresults[i].find ("] ");
    if (position != string::npos) {
      position++;
      position++;
      searchresults[i].erase (0, position);
    }
    ustring newbook, newchapter, newverse;
    if (reference_discover ("", "", "", searchresults[i], newbook, newchapter, newverse)) {
      searchresults2.push_back (newbook + " " + newchapter + ":" + newverse);
    }
  }
  
  progress->finish();

  return searchresults2;
}


void search_load_references (vector<ustring>& searchresults, 
  GtkListStore * liststore, GtkWidget * listview, GtkTreeViewColumn * treeviewcolumn, 
  Progress * progress, Session * session, Configuration * configuration)
/*
This function takes the searchresults from a search, and depending on information
entered in the search dialog, loads this in the reference area, or merges it 
with the references that are already there.
*/
{
  // Set progressbar halfway.
  progress->start();
  progress->set_fraction (0.5);
  
  // Get the references from the editor.
  vector<ustring> inputrefs;
  References references (liststore, listview, treeviewcolumn);
  references.get_loaded ();
  references.get_references (inputrefs);

  // Deal with how the current search results interact with the ones in the editor.
  SearchResultsType searchresultstype = session->searchresultstype;
  // If the search page is zero, that means we are on basic search. And that 
  // means we always load the results in the editor, regardless of the setting
  // in the dialog.
  if (session->search_page == 0)
    searchresultstype = sstLoad;
  switch (searchresultstype) {
    case sstLoad:
    {
      // Sort and load the references.
      sort_references (searchresults);
      references.set_references (searchresults, session, configuration);
      references.fill_store ();
      break;
    }
    case sstAdd:
    {
      // Add the references to the ones already in the editor
      set<string> refs_set;
      for (unsigned int i = 0; i < inputrefs.size(); i++)
        refs_set.insert (inputrefs[i]);
      for (unsigned int i = 0; i < searchresults.size(); i++)
        refs_set.insert (searchresults[i]);
      searchresults.clear();
      searchresults.assign (refs_set.begin(), refs_set.end());
      sort_references (searchresults);
      references.set_references (searchresults, session, configuration);
      references.fill_store ();
      break;
    }
    case sstSubtract:
    {
      // Subtract the references from the ones already in the editor.
      set<string> refs_set;
      for (unsigned int i = 0; i < inputrefs.size(); i++)
        refs_set.insert (inputrefs[i]);
      for (unsigned int i = 0; i < searchresults.size(); i++) {
        set<string>::const_iterator found_position;
        found_position = refs_set.find (searchresults[i]);
        if (found_position != refs_set.end())
          refs_set.erase (found_position);
      }
      searchresults.clear();
      searchresults.assign (refs_set.begin(), refs_set.end());
      sort_references (searchresults);
      references.set_references (searchresults, session, configuration);
      references.fill_store ();
      break;
    }
    case sstShare:
    {
      // Share the references with the ones already in the editor.
      // This "share" means that only references that are already in the editor
      // _and_ ones that are the result of this search, will be loaded.
      // All other ones will be discarded.
      set<ustring> refs_set;
      for (unsigned int i = 0; i < inputrefs.size(); i++)
        refs_set.insert (inputrefs[i]);
      vector<ustring> refs2;
      for (unsigned int i = 0; i < searchresults.size(); i++) {
        set<ustring>::const_iterator found_position;
        found_position = refs_set.find (searchresults[i]);
        if (found_position != refs_set.end())
          refs2.push_back (searchresults[i]);
      }
      sort_references (refs2);
      references.set_references (refs2, session, configuration);
      references.fill_store ();
      break;
    }
  }        
  
  // Finish progress system.
  progress->finish();
}


void search_string
  (Configuration * configuration, GtkListStore * liststore, GtkWidget * listview, 
   GtkTreeViewColumn * treeviewcolumn, Progress * progress, Session * session, BibleTime * bibletime)
{
  // Storage for results;
  vector<ustring> searchresults;
  
  switch (session->search_page) {
    case 0 :
    {
      // Basic search, with optionally searching in currently opened books only.
      vector<ustring> mybooks (session->selected_books.begin(), session->selected_books.end());
      session->selected_books.clear();
      session->selected_books.insert (configuration->book_opened);
      search_string (session, configuration->project, session->search_current_book, searchresults);
      session->selected_books.clear();
      for (unsigned int i = 0; i < mybooks.size(); i++)
        session->selected_books.insert (mybooks[i]);
      break;
    }
    case 1 :
    {
      // Advanced search.
      searchresults = search_in_bibledit (configuration, progress, session);
      break;
    }
    case 2 :
    {
      // Search in bibletime.
      searchresults = search_in_bibletime (configuration, progress, session, bibletime);
      break;
    }
  }
  // Load the references in the editor.
  search_load_references (searchresults, liststore, listview, treeviewcolumn, progress, session, configuration);
}
