/*
 * Copyright (C) 2000-2004 Chris Ross and various contributors
 * Copyright (C) 1999-2000 Chris Ross
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * o Redistributions of source code must retain the above copyright notice, this
 *   list of conditions and the following disclaimer.
 * o Redistributions in binary form must reproduce the above copyright notice,
 *   this list of conditions and the following disclaimer in the documentation
 *   and/or other materials provided with the distribution.
 * o Neither the name of the ferite software nor the names of its contributors may
 *   be used to endorse or promote products derived from this software without
 *   specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "ferite.h"
#include <pcre.h> /* perl compatible regualr expressions */

/**
 * !group Regular Expressions
 * !description Regular expressions play an important role within the ferite language. They can
 *              be used from native code using a very simple interface.
 */

/***************************************************************
 * The way some of this code operates (eg within the matches)  *
 * was semi borrowed from php, this is because the pcre module *
 * that is shiped with php is rather good.                     *
 *                                                             *
 * Those parts of the code borrowed were written by            *
 *   Andrei Zmievski <andrei@ispi.net> and are subject to the  *
 *   PHP 2.02 lisence - http://www.php.net/license/2_02.txt    *
 ***************************************************************/

/* memory functions so pcre uses ferite's memory management */
void *(*old_pcre_malloc)(size_t);
void  (*old_pcre_free)(void *);

void *ferite_regex_malloc( size_t size)
{
    void *ptr;
    FE_ENTER_FUNCTION;
    ptr = fmalloc( size );
    FE_LEAVE_FUNCTION( ptr );
}

void ferite_regex_free( void *ptr )
{
    FE_ENTER_FUNCTION;
    ffree( ptr );
    FE_LEAVE_FUNCTION( NOWT );
}

void ferite_init_regex()
{
    FE_ENTER_FUNCTION;
    if( ferite_use_mm_with_pcre == 1 )
    {
        old_pcre_malloc = pcre_malloc;
        old_pcre_free = pcre_free;
        pcre_malloc = ferite_regex_malloc;
        pcre_free = ferite_regex_free;
    }
    FUD(( "REGEX, Using: PCRE %s\n", pcre_version() ));
    FE_LEAVE_FUNCTION( NOWT );
}

void ferite_deinit_regex()
{
    FE_ENTER_FUNCTION;
    if( ferite_use_mm_with_pcre == 1 )
    {
        pcre_malloc = old_pcre_malloc;
        pcre_free = old_pcre_free;
    }
    FE_LEAVE_FUNCTION( NOWT );
}

/**
 * !function ferite_create_regex
 * !declaration FeriteRegex *ferite_create_regex()
 * !brief Create a regular expression structure used to store various information for them
 * !return A new FeriteRegex structure
 */
FeriteRegex *ferite_create_regex()
{
    FeriteRegex *ptr = NULL;

    FE_ENTER_FUNCTION;

    ptr = fmalloc( sizeof( FeriteRegex ) );
    ptr->pattern = NULL;
 //   ptr->type = F_RGX_MATCH;
    ptr->pcre_options = 0;
    ptr->fergx_options = 0;
    ptr->compiled_re = NULL;
    ptr->compile_buf = NULL;
    ptr->swap_buf = NULL;
    ptr->extra_info = NULL;

    FE_LEAVE_FUNCTION( ptr );
}

/**
 * !function ferite_delete_regex
 * !declaration void ferite_delete_regex( FeriteRegex *rgx )
 * !brief Delete a strucute created using ferite_create_regex()
 * !param FeriteRegex *rgx The regex to delete
 */
void ferite_delete_regex( FeriteRegex *rgx )
{
    FE_ENTER_FUNCTION;

    if( rgx != NULL )
    {
        if( rgx->pattern != NULL )
          ffree( rgx->pattern );
        if( rgx->compiled_re != NULL )
        {
            if( !ferite_use_mm_with_pcre )
              pcre_free( rgx->compiled_re );
            else
              ffree( rgx->compiled_re );
        }
        if( rgx->compile_buf != NULL )
          ffree( rgx->compile_buf );
        if( rgx->swap_buf != NULL )
          ffree( rgx->swap_buf );
        ffree( rgx );
    }
    FE_LEAVE_FUNCTION( NOWT );
}

/**
 * !function ferite_regex_dup
 * !declaration FeriteRegex *ferite_regex_dup( FeriteRegex *rgx )
 * !brief Duplicate a regular expression structure
 * !param FeriteRegex *rgx The regex to duplicate
 * !return An exact copy of the regexular expression
 */
FeriteRegex *ferite_regex_dup( FeriteRegex *rgx )
{
    FeriteRegex *ptr = NULL;

    FE_ENTER_FUNCTION;
    if( rgx != NULL )
    {
        ptr = fmalloc(sizeof(FeriteRegex));
        ptr->pattern = fstrdup( rgx->pattern );
        ptr->pcre_options = rgx->pcre_options;
        ptr->fergx_options = rgx->fergx_options;
        ptr->compile_buf = fstrdup( rgx->compile_buf );
        ptr->swap_buf = fstrdup( rgx->swap_buf );
        ptr->extra_info = NULL;
        ptr->compiled_re = NULL;
    }
    FE_LEAVE_FUNCTION( ptr );
}

/**
 * !function ferite_generate_regex
 * !declaration FeriteRegex *ferite_generate_regex( FeriteScript *script, char *pattern, char *swapbuffer, char type, char *flags )
 * !brief A convinience function to create and popular a regex structure
 * !param FeriteScript *script The script
 * !param char *pattern The regular expression
 * !param char *swapbuffer The buffer that is evaluated on a match
 * !param char type The type of the regular expression. s for a swap, m for a match, c for an assignment.
 * !param char *flags The flags the effect the behavior of the regular expression.
 * !return A regular expression structure setup and populated correctly
 */
FeriteRegex *ferite_generate_regex( FeriteScript *script, char *pattern, char *flags )
{
    FeriteRegex *ptr = NULL;
    size_t i = 0;

    FE_ENTER_FUNCTION;

    ptr = ferite_create_regex();
    ptr->pattern = fstrdup( pattern );

   /* go from end to beginning to get the options */
    FUD(( "REGEX postfix's\n" ));
    for( i = 0; i < strlen( flags ); i++ )
    {
        switch( flags[i] )
        {
            /* pcre supported perl'isms */
          case 'x': FUD(( "REGEX: `-> Extended Line\n" ));    ptr->pcre_options |= PCRE_EXTENDED; break;
          case 's': FUD(( "REGEX: `-> Single Line Mode\n" )); ptr->pcre_options |= PCRE_DOTALL; break;
          case 'm': FUD(( "REGEX: `-> Multi Line Mode\n" ));  ptr->pcre_options |= PCRE_MULTILINE; break;
          case 'i': FUD(( "REGEX: `-> Case Insensitive\n" )); ptr->pcre_options |= PCRE_CASELESS; break;
            /* pcre + ferite internal'isms */
          case 'g': FUD(( "REGEX: `-> Global Matching\n" ));  ptr->fergx_options |= F_RGX_GLOBAL; break;
          case 'o': FUD(( "REGEX: `-> Cache Compile\n" ));    ptr->fergx_options |= F_RGX_COMPILE; break;
            /* pcre's specific stuff */
          case 'A': FUD(( "REGEX: `-> Anchored\n" ));         ptr->pcre_options |= PCRE_ANCHORED; break;
          case 'D': FUD(( "REGEX: `-> Dollar @ ABS End\n" )); ptr->pcre_options |= PCRE_DOLLAR_ENDONLY; break;
          default:
            ferite_warning( script, "Regex Option %c - UNKNOWN, ignoring\n", flags[i] );
        }
    }

   /* setup compile buffers and pre-compile the regex */
    ptr->compile_buf = fstrdup( ptr->pattern );
    if( ptr->fergx_options & F_RGX_COMPILE )
    {
        FUD(( "REGEX: Compiling RGX: \"%s\"\n", ptr->compile_buf ));
        ptr->compiled_re = ferite_compile_regex( script, ptr->compile_buf, ptr->pcre_options );
    }
    FE_LEAVE_FUNCTION( ptr );
}

/**
 * !function ferite_compile_regex
 * !declaration void *ferite_compile_regex( FeriteScript *script, char *pattern, int options )
 * !brief Compile a regular expression to make things faster
 * !param FeriteScript *script The script
 * !param char *pattern The pattern to compile
 * !param int options The options that effect how it is compiled
 * !return A pointer to the in memory compiled regular expression
 */
void *ferite_compile_regex( FeriteScript *script, char *pattern, int options )
{
    void       *ptr;
    const char *error;
    int         erroroffset;

    FE_ENTER_FUNCTION;
    ptr = pcre_compile( pattern, options, &error, &erroroffset, NULL );

    if( ptr == NULL )
    {
        ferite_warning( script, "Regex Compilation failed: %s at offset %d\n", error, erroroffset );
        return NULL;
    }

    FE_LEAVE_FUNCTION( ptr );
}

FeriteVariable *ferite_execute_swap_regex( FeriteRegex *rgx, FeriteVariable *target, int global, FeriteNamespace *ns, FeriteScript *script )
{
    FeriteVariable *retv = NULL; /* rturn value */
    FeriteVariable *ptr = NULL;  /* so we can fudge with the arrays */
    FeriteVariable *ePtr = NULL, *ePtrRV = NULL;
    FeriteNamespaceBucket *nsb;
    size_t captured_str_cnt = 0; /* number of strings that will be captured in ()'s */
    int *offsets;         /* array of subpattern offsets */
    int size_offsets;     /* size of the array */
    int start_offset;     /* damn nation, this is the new start :) */
    int current_var = 0;  /* the global variable we need to update */
    int current_match = 0;
    int count = 0;        /* count the number of subexpressions */
    int i = 0, swaps = 0, target_backtick = 0;
    size_t walkpos = 0;
    size_t endOfLastMatch = 0;
    char buf[10], *match = NULL, *replace_buffer, *tmpbuf = NULL;
    char *newstr = NULL;
    char **stringlist = NULL /* the subparts () values */;
    int g_notempty = 0;
    size_t repbuf_len = 0;

    FE_ENTER_FUNCTION;

   /* if the regex is not compiled already -> compile it */
    if( rgx->compiled_re == NULL )
    {
        rgx->compiled_re = ferite_compile_regex( script, rgx->compile_buf, rgx->pcre_options );
        if( rgx->compiled_re == NULL )
        {
            retv = ferite_create_number_long_variable( script, "regex-exec-return", 0, FE_STATIC );
            FE_LEAVE_FUNCTION( retv );
        }
    }

    /* get the number of subparts */
    captured_str_cnt = pcre_info( rgx->compiled_re, NULL, NULL ) + 1;
    /* create an offset array */
    size_offsets = (int)(captured_str_cnt * 3);
    offsets = (int *)fmalloc(size_offsets * sizeof(int));

    /* we need to setup the global variable hash so that r1->r99 holds the number of subparts */
    for( i = 1; i <= (unsigned int)(captured_str_cnt > script->last_regex_count ? captured_str_cnt : script->last_regex_count); i++ )
    {
        memset( buf, '\0', 10 );
        sprintf( buf, "r%d", (int)i );
        nsb = ferite_namespace_element_exists( script, ns, buf );
        ptr = ( nsb == NULL ? NULL : nsb->data );
        if( ptr == NULL )
        {
            ptr = ferite_create_string_variable( script, buf, NULL, FE_ALLOC );
            MARK_VARIABLE_AS_FINALSET( ptr ); /* we make these global variables read only */
            ferite_register_ns_variable( script, ns, ptr );
        }
    }
    script->last_regex_count = (unsigned int)captured_str_cnt;

    start_offset = 0;
    current_var = 1;
    current_match = 0;

    newstr = fmalloc( 1 );
    *newstr = '\0';

    do
    {
        /* execute regex */
        FUD(( "REGEX: Executing against \"%s\"\n", FE_STR2PTR(target) ));
        count = pcre_exec( rgx->compiled_re, rgx->extra_info, FE_STR2PTR(target), (unsigned int)FE_STRLEN(target),
                           start_offset, g_notempty, offsets, size_offsets );

        /* check for too many substrings */
        if( count == 0 )
        {
            ferite_warning( script, "A match was found but too many substrings found.\n" );
            count = size_offsets / 3;
        }

        FUD(( "REGEX: match count = %d\n", count ));
        /* we matched something */
        if( count >= 0 )
        {
            match = FE_STR2PTR(target) + offsets[0];

            /* get the list of substrings */
            if( pcre_get_substring_list( FE_STR2PTR(target), offsets, (int)count, (const char ***)&stringlist) < 0 )
            {
                ffree(offsets);
                ferite_warning( script, "Unable to obtain captured strings in regular expression.\n");
                retv = ferite_create_number_long_variable( script, "regex-exec-return", 0, FE_STATIC );
                FE_LEAVE_FUNCTION( retv );
            }

     /* need to setup the r%% variables we do this before doing the string stuff as
      * it allows us to use it in the eval operator */
     /* 0 = total matched string 1++ is subparts */
            for( i = 1; i < count; i++ )
            {
                memset( buf, '\0', 10 );
                sprintf( buf, "r%d", (int)i /*current_var*/ );
                nsb = ferite_namespace_element_exists( script, ns, buf );
                ptr = nsb->data;
                ferite_str_set( VAS(ptr), (char *)stringlist[i], strlen((char *)stringlist[i]), FE_CHARSET_DEFAULT );
                FUD(( "Setting %s to %s\n", ptr->name, FE_STR2PTR(ptr) ));
                current_var++;
            }

     /* build the replace buffer */
            if( rgx->swap_buf != NULL )
            {
                repbuf_len = strlen(rgx->swap_buf) + (strlen(rgx->compile_buf) * 10) + 1;
                replace_buffer = fcalloc(repbuf_len, sizeof(char));
                for( i = 0, walkpos = 0; i < strlen( rgx->swap_buf ) && walkpos < repbuf_len; i++ )
                {
         /* check to see if we have a backtick */
                    if( rgx->swap_buf[i] == '\\' )
                    {
          /* account for escaped backticks */
                        if( rgx->swap_buf[i+1] == '\\' )
                        {
                            replace_buffer[walkpos++] = rgx->swap_buf[i];
                            i++;
                        }
                        else
                        {
           /* get the required backtick */
                            if( i < (strlen(rgx->swap_buf) - 1) )
                            {
                                target_backtick = rgx->swap_buf[i+1] - '0';
                                if( i < (strlen(rgx->swap_buf) - 2) )
                                  target_backtick = (rgx->swap_buf[i+2] <= '9' && rgx->swap_buf[i+2] >= '0' ? (target_backtick * 10) + rgx->swap_buf[i+2] - '0' : target_backtick);
                                if( target_backtick <= count )
                                {
                                    memcpy( replace_buffer + walkpos, stringlist[target_backtick], strlen( stringlist[target_backtick] ) );
                                    walkpos += strlen(stringlist[target_backtick]);
                                    if( target_backtick > 9 )
                                      i += 2;
                                    else
                                      i++;
                                }
                            }
                            else
                            {
                                break;
                            }
                        }
                    }
                    else
                    {
                        replace_buffer[walkpos++] = rgx->swap_buf[i];
                    }
                }
                replace_buffer[walkpos] = '\0'; /* terminate the string */
                FUD(("RGX: Walkpos %d, repbuf_len %d, strlen, %d, content: ``%s''\n", walkpos, repbuf_len, strlen( replace_buffer ), replace_buffer ));
            }
            else
            {
                replace_buffer = fstrdup("");
            }

     /*   printf( "Swap at index %d len %d\n", offsets[0], strlen(stringlist[0]) ); */

     /* ok this is what we do:
      *   setup the rx variables
      *   call eval
      *   use it's return as a string */
            if( rgx->fergx_options & F_RGX_EVAL_SWP )
            {
                FUD(( ">>>> Swap evaluation <<<< '%s' \n", replace_buffer ));
                ePtr = ferite_create_string_variable_from_ptr( script, "regex-eval", replace_buffer, strlen(replace_buffer), FE_CHARSET_DEFAULT, FE_STATIC );

      /* we dont need this any more */
                ffree( replace_buffer );
                ePtrRV = ferite_op_eval( script, ePtr );
                FUD(( "eval returned: %s, %s\n", ePtrRV->name, ferite_variable_id_to_str( script, ePtrRV->type ) ));
                if( ePtrRV->type != F_VAR_STR )
                {
                    ferite_warning( script, "Regular Expression Eval does not return a string - using \"\"\n" );
                    replace_buffer = fstrdup("");
                }
                else
                {
                    replace_buffer = fstrdup(FE_STR2PTR(ePtrRV));
                }
      /* clean up afterwards */
                ferite_variable_destroy( script, ePtr );
                ferite_variable_destroy( script, ePtrRV );
            }

     /* grow the buffer */
            FUD(( "New Str Length: %d\n", strlen( newstr ) + (offsets[0] - endOfLastMatch) + strlen( replace_buffer ) + 10 ));
            tmpbuf = fcalloc( strlen( newstr ) + (offsets[0] - endOfLastMatch) + strlen( replace_buffer ) + 10, sizeof(char) );
            strcpy( tmpbuf, newstr );
            ffree( newstr );

            newstr = tmpbuf;

     /* ok what we do here is find out the offset within the original string and then replace that with
      * the swap buffer */
            strncat( newstr, FE_STR2PTR(target) + endOfLastMatch, offsets[0] - endOfLastMatch );
            strcat( newstr, replace_buffer );

            FUD(( "Real Length: %d\n", strlen(newstr) ));
     /* new offset */
            endOfLastMatch = offsets[0] + strlen(stringlist[0]);
     /*   printf( "newstr: %s %d\n", newstr, endOfLastMatch );*/

            swaps++;

            ffree( replace_buffer );

            FUD(( "RGX: Captured String count: %d\n", captured_str_cnt ));
            for( i = 0; i < captured_str_cnt; i++ )
            {
                FUD(( "RGX: STRLIST: [%d] %s\n", i, stringlist[i] ));
            }
            if( ferite_use_mm_with_pcre )
            {
                ffree( stringlist );
            }
            else
              pcre_free( stringlist );
        }
        else /* we didn't match something */
        {
            match = "\0";
            if (g_notempty != 0 && start_offset < (int)FE_STRLEN(target) )
            {
                offsets[0] = start_offset;
                offsets[1] = start_offset + 1;
            }
            else
              break;
        }

        g_notempty = ( offsets[1] == offsets[0] ) ? PCRE_NOTEMPTY : 0;

      /* Advance to the position right after the last full match */
        start_offset = offsets[1];

    }
    while( global );

   /* cleaning up */
    if( endOfLastMatch < FE_STRLEN(target) )
    {
        tmpbuf = fcalloc( strlen( newstr ) + FE_STRLEN(target) + endOfLastMatch + 2, sizeof(char) );
        strcpy( tmpbuf, newstr );
        ffree( newstr );
        newstr = tmpbuf;
        strcat( newstr, FE_STR2PTR(target) + endOfLastMatch );
    }
    ferite_str_set( VAS(target), newstr, strlen(newstr), FE_CHARSET_DEFAULT );
    ffree( newstr );
    ffree( offsets );

    retv = ferite_create_number_long_variable( script, "regex-swap-exec-return", (long)swaps, FE_STATIC );
    MARK_VARIABLE_AS_DISPOSABLE( retv );
    FE_LEAVE_FUNCTION( retv );
}

/**
 * !end
 */
