#line 1 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
/* -*- c -*-
|| This file is part of Pike. For copyright information see COPYRIGHT.
|| Pike is distributed under GPL, LGPL and MPL. See the file COPYING
|| for more information.
|| $Id: unicode_module.cmod,v 1.6 2002/10/21 17:06:55 marcus Exp $
*/

#include "global.h"
#include "stralloc.h"
#include "global.h"
RCSID("$Id: unicode_module.cmod,v 1.6 2002/10/21 17:06:55 marcus Exp $");
#include "pike_macros.h"
#include "interpret.h"
#include "program.h"
#include "program_id.h"
#include "object.h"
#include "operators.h"
#include "module_support.h"
#include "array.h"

#include "config.h"
#include "normalize.h"
#include "split.h"
#include "buffer.h"

/*! @module Unicode
 */

static void push_words( int *d, struct words *w )
{
  struct array *r = allocate_array( w->size );
  unsigned int i;
  
  for( i=0; i<w->size; i++ )
  {
    r->item[i].type = PIKE_T_STRING;
    r->item[i].u.string =
      make_shared_binary_string2( d+w->words[i].start,
				  w->words[i].size );
  }
  push_array( r );
  uc_words_free( w );
}

/*! @decl array(string) split_words(string intput)
 *!
 *! @fixme
 *!   Document this function.
 */
#define f_split_words_defined
void f_split_words(INT32 args) {
#line 50 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
struct pike_string * input;
#line 50 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
if(args != 1) wrong_number_of_args_error("split_words",args,1);
#line 50 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
if(Pike_sp[0-1].type != PIKE_T_STRING) SIMPLE_BAD_ARG_ERROR("split_words",1,"string");
#line 50 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
debug_malloc_pass(input=Pike_sp[0-1].u.string);
{
  struct words *res;
  struct buffer * data;
  data = uc_buffer_from_pikestring( input );
  res = unicode_split_words_buffer( data );
  pop_n_elems( args );
  push_words( data->data, res );
  uc_buffer_free( data );
}

}
/*! @decl array(string) split_words_and_normalize(string input)
 *!
 *! @fixme
 *!   Document this function.
 */
#define f_split_words_and_normalize_defined
void f_split_words_and_normalize(INT32 args) {
#line 66 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
struct pike_string * input;
#line 66 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
if(args != 1) wrong_number_of_args_error("split_words_and_normalize",args,1);
#line 66 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
if(Pike_sp[0-1].type != PIKE_T_STRING) SIMPLE_BAD_ARG_ERROR("split_words_and_normalize",1,"string");
#line 66 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
debug_malloc_pass(input=Pike_sp[0-1].u.string);
{
  struct words *res;
  struct buffer *data;
  data = unicode_decompose_buffer(uc_buffer_from_pikestring(input), COMPAT_BIT);
  res = unicode_split_words_buffer( data );
  pop_n_elems( args );
  push_words( data->data, res );
  uc_buffer_free( data );
}

}
/*! @decl string normalize( string data, string method );
 *!
 *! Normalize the given unicode string according to the specified method.
 *! 
 *! The methods are:
 *!
 *!  NFC, NFD, NFKC and NFKD.
 *!  
 *! The methods are described in detail in the UAX #15 document, which
 *! can currently be found at
 *! http://www.unicode.org/unicode/reports/tr15/tr15-21.html
 *!
 *! A short description:
 *! 
 *! C and D specifies whether to decompose (D) complex characters to
 *! their parts, or compose (C) single characters to complex ones.
 *!
 *! K specifies whether or not do a canonical or compatibility
 *! conversion. When K is present, compatibility transformations are
 *! performed as well as the canonical transformations.
 *!
 *! @i{In the following text, 'X' denotes the single character 'X', even
 *!  if there is more than one character inside the quotation marks. 
 *!  The reson is that it's somewhat hard to describe unicode in
 *!  iso-8859-1.@}
 *!
 *! The Unicode Standard defines two equivalences between characters:
 *! canonical equivalence and compatibility equivalence. Canonical
 *! equivalence is a basic equivalency between characters or
 *! sequences of characters. 
 *!
 *! ''  and  'A'' (combining ring above)' are canonically equivalent.
 *!
 *! For round-trip compatibility with existing standards, Unicode has
 *! encoded many entities that are really variants of existing nominal
 *! characters. The visual representations of these character are
 *! typically a subset of the possible visual representations of the
 *! nominal character. These are given compatibility decompositions in
 *! the standard. Because the characters are visually distinguished,
 *! replacing a character by a compatibility equivalent may lose
 *! formatting information unless supplemented by markup or styling.
 *!
 *! Examples of compatibility equivalences:
 *! @ul
 *!   @item
 *!     Font variants (thin, italic, extra wide characters etc)
 *!   @item
 *!     Circled and squared characters
 *!   @item
 *!     super/subscript ('' -> '2')
 *!   @item
 *!     Fractions       ('' -> '1/2')
 *!   @item
 *!     Other composed characters ('fi' -> 'f' 'i',  'kg' -> 'k' 'g')
 *! @endul
 *!
 */
#define f_normalize_defined
void f_normalize(INT32 args) {
#line 134 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
struct pike_string * s;
#line 134 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
struct pike_string * flags;
#line 134 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
if(args != 2) wrong_number_of_args_error("normalize",args,2);
#line 134 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
if(Pike_sp[0-2].type != PIKE_T_STRING) SIMPLE_BAD_ARG_ERROR("normalize",1,"string");
#line 134 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
debug_malloc_pass(s=Pike_sp[0-2].u.string);
#line 134 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
if(Pike_sp[1-2].type != PIKE_T_STRING) SIMPLE_BAD_ARG_ERROR("normalize",2,"string");
#line 134 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
debug_malloc_pass(flags=Pike_sp[1-2].u.string);
{
  int _flags=0, i;

  for( i = 0; i<flags->len; i++ )
    switch( flags->str[ i ] )
    {
      case 'K': _flags|=1; break;
      case 'C': _flags|=2; break;
    }

  do { struct pike_string * ret_=(unicode_normalize( s, _flags )); pop_n_elems(2); push_string(ret_); return; }while(0);
#line 146 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
}

}
/*! @decl int is_wordchar(int c)
 *!
 *! @fixme
 *!   Document this function.
 */
#define f_is_wordchar_defined
void f_is_wordchar(INT32 args) {
#line 153 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
INT_TYPE c;
#line 153 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
if(args != 1) wrong_number_of_args_error("is_wordchar",args,1);
#line 153 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
if(Pike_sp[0-1].type != PIKE_T_INT) SIMPLE_BAD_ARG_ERROR("is_wordchar",1,"int");
c=Pike_sp[0-1].u.integer;
#line 154 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
{
  do { INT_TYPE ret_=(unicode_is_wordchar( c )); pop_stack(); push_int(ret_); return; }while(0);
#line 156 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
}

}
/*! @endmodule
 */

#line 161 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
PIKE_MODULE_INIT
{
  
#ifdef f_split_words_defined
#line 50 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
  ADD_FUNCTION2("split_words", f_split_words, tFunc(tString,tArr(tString)), 0, OPT_EXTERNAL_DEPEND|OPT_SIDE_EFFECT);

#endif /* f_split_words_defined */

#ifdef f_split_words_and_normalize_defined
#line 66 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
  ADD_FUNCTION2("split_words_and_normalize", f_split_words_and_normalize, tFunc(tString,tArr(tString)), 0, OPT_EXTERNAL_DEPEND|OPT_SIDE_EFFECT);

#endif /* f_split_words_and_normalize_defined */

#ifdef f_normalize_defined
#line 134 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
  ADD_FUNCTION2("normalize", f_normalize, tFunc(tString tString,tString), 0, OPT_EXTERNAL_DEPEND|OPT_SIDE_EFFECT);

#endif /* f_normalize_defined */

#ifdef f_is_wordchar_defined
#line 153 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
  ADD_FUNCTION2("is_wordchar", f_is_wordchar, tFunc("\10\200\0\0\0\177\377\377\377","\10\200\0\0\0\177\377\377\377"), 0, OPT_EXTERNAL_DEPEND|OPT_SIDE_EFFECT);

#endif /* f_is_wordchar_defined */
#line 164 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
unicode_normalize_init();
}

PIKE_MODULE_EXIT
{
  
#line 170 "/tmp/pikedeb.5a120871e4/7.4/src/post_modules/Unicode/unicode_module.cmod"
}

