#include "ctype.h"
#include "malloc.h"
#include "string.h"
#include "ruby.h"
#include "intern.h"
#include "util.h"
/*
 * Web escape CopyRight (c)2002 MoonWolf <moonwolf@moonwolf.com>
 */

VALUE mWeb;

int FLAGS[] = {
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  1, 0,10, 0, 0, 0, 8, 8, 0, 0, 0, 0, 0, 1, 1, 0,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 6, 0, 6, 0,
  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };

int HEXR[] = {
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  1, 2, 3, 4, 5, 6, 7, 8, 9,10, 0, 0, 0, 0, 0, 0,
  0,11,12,13,14,15,16, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0,11,12,13,14,15,16, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };

unsigned char HEX[] = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};



static VALUE webescape_url_escape(VALUE self, VALUE str) {
  int i, arglen, retlen;
  unsigned char *argstr, *retstr, *p;
  unsigned char c;
  VALUE retval;

  /* Check_Type(str, T_STRING); */
  str = rb_obj_as_string(str);
  arglen = RSTRING(str)->len;
  argstr = RSTRING(str)->ptr;

  /* get length */
  retlen = 0;
  i=0;
  while(i<arglen) {
    c = argstr[i];
    if (FLAGS[c] & 1) {
      retlen += 1;
    } else {
      retlen += 3;
    }
    i++;
  }
  /* alloc */
  retstr = malloc(retlen);

  /* convert */
  p = retstr;
  i = 0;
  while(i<arglen) {
    c = argstr[i];
    if (FLAGS[c] & 1) {
      if (c==' ') *p++ = '+'; else *p++ = c;
    } else {
      *p++ = '%';
      *p++ = HEX[c >> 4];
      *p++ = HEX[c & 0xf];
    }
    i++;
  }

  retval = rb_str_new(retstr,retlen);
  OBJ_INFECT(retval, str);
  free(retstr);

  return retval;
}



static VALUE webescape_url_unescape(VALUE self, VALUE str) {
  int i, arglen, retlen, hh, hl;
  unsigned char *argstr, *retstr, *p;
  unsigned char c;
  VALUE retval;

  /* Check_Type(str, T_STRING); */
  str = rb_obj_as_string(str);
  arglen = RSTRING(str)->len;
  argstr = RSTRING(str)->ptr;

  /* get length */
  retlen = 0;
  i=0;
  while(i<arglen) {
    c = argstr[i];
    if (i < arglen-2 && c=='%') {
      if ((hh=HEXR[argstr[i+1]]) && (hl=HEXR[argstr[i+2]])) {
        retlen += 1;
        i += 3;
      } else {
        retlen += 1;
        i += 1;
      }
    } else {
      retlen += 1;
      i += 1;
    }
  }
  /* alloc */
  retstr = malloc(retlen);

  /* convert */
  p = retstr;
  for(i=0; i<arglen;) {
    c = argstr[i];
    if (i < arglen-2 && c=='%') {
      if ((hh=HEXR[argstr[i+1]]) && (hl=HEXR[argstr[i+2]])) {
        c = ((hh << 4) + hl) - 17;
        *p++ = c;
        i += 3;
      } else {
        *p++ = c;
        i += 1;
      }
    } else {
      if (c=='+') {
        *p++ = ' ';
      } else {
        *p++ = c;
      }
      i += 1;
    }
  }
  
  retval = rb_str_new(retstr,retlen);
  OBJ_INFECT(retval, str);
  free(retstr);

  return retval;
}



static VALUE webescape_html_escape(VALUE self, VALUE str) {
  int i, arglen, retlen;
  unsigned char *argstr, *retstr, *p;
  unsigned char c;
  VALUE retval;

  /* Check_Type(str, T_STRING); */
  str = rb_obj_as_string(str);
  arglen = RSTRING(str)->len;
  argstr = RSTRING(str)->ptr;

  /* get length */
  retlen = arglen;
  for(i=0; i<arglen; i++) {
    retlen += (FLAGS[argstr[i]] >> 1);
  }
  /* alloc */
  retstr = malloc(retlen);

  /* convert */
  p = retstr;
  for(i=0; i<arglen;) {
    c = argstr[i];
    if (FLAGS[c] >> 1) {
      switch(c) {
      case '"':
        *p++ = '&';
        *p++ = 'q';
        *p++ = 'u';
        *p++ = 'o';
        *p++ = 't';
        *p++ = ';';
        break;
      case '&':
        *p++ = '&';
        *p++ = 'a';
        *p++ = 'm';
        *p++ = 'p';
        *p++ = ';';
        break;
      case 0x27:
        *p++ = '&';
        *p++ = '#';
        *p++ = '3';
        *p++ = '9';
        *p++ = ';';
        break;
      case '<':
        *p++ = '&';
        *p++ = 'l';
        *p++ = 't';
        *p++ = ';';
        break;
      case '>':
        *p++ = '&';
        *p++ = 'g';
        *p++ = 't';
        *p++ = ';';
        break;
      }
    } else {
      *p++ = c;
    }
    i++;
  }

  retval = rb_str_new(retstr,retlen);
  OBJ_INFECT(retval, str);
  free(retstr);

  return retval;
}



static VALUE webescape_html_unescape(int argc, VALUE *argv, VALUE self) {
  int i, j, k, arglen, retlen, reflen;
  unsigned char *argstr, *retstr, *p, *retp;
  /* unsigned char kcode; */
  unsigned char c;
  unsigned long uv;
  VALUE str;
  VALUE htmlentity;
  VALUE entity;
  VALUE retval;
  ID id_hex;
  
  id_hex = rb_intern("hex");
  /* kcode = (unsigned char)( tolower( (int)(rb_get_kcode()[0]) ) ); */
  
  
  switch(argc) {
  case 0:
    str = self;
    htmlentity = rb_const_get(mWeb, rb_intern("HTMLEntity"));
  case 1:
    str = argv[0];
    htmlentity = rb_const_get(mWeb, rb_intern("HTMLEntity"));
    break;
  case 2:
    str = argv[0];
    htmlentity = argv[1];
    break;
  }
  
  Check_Type(htmlentity, T_HASH);
  /* Check_Type(str, T_STRING); */
  str = rb_obj_as_string(str);
  arglen = RSTRING(str)->len;
  argstr = RSTRING(str)->ptr;
  
  /* get length */
  retlen = 0;
  for(i=0; i<arglen; i++) {
    c = argstr[i];
    if (c=='&' && i<arglen-2) {
      for(j=i+1; j<arglen; j++) {
        if (argstr[j]==';') break;
      }
      if (j<arglen) {
        reflen = j-i-1;
        p = argstr+i+1;
        if (reflen==4 && memcmp(p, "quot", 4)==0) {
          retlen += 1;
        } else if (reflen==3 && memcmp(p, "amp", 3)==0) {
          retlen += 1;
        } else if (reflen==2 && memcmp(p, "lt", 2)==0) {
          retlen += 1;
        } else if (reflen==2 && memcmp(p, "gt", 2)==0) {
          retlen += 1;
        } else {
          if (reflen>=2 && p[0]=='#') {
            if (p[1]=='x') {
              /* hex */
              reflen -= 2;
              p += 2;
              if (reflen<1) {
                rb_raise(rb_eRangeError, "character entity reference error");
                return Qnil;
              }
              for(k=0 ; k<reflen ; k++) {
                if (!isxdigit(p[k])) {
                  rb_raise(rb_eRangeError, "character entity reference error");
                  return Qnil;
                }
              }
              uv = NUM2ULONG(rb_funcall(rb_str_new(p,reflen), id_hex, 0));
            } else {
              /* decimal */
              reflen -= 1;
              p += 1;
              for(k=0 ; k<reflen ; k++) {
                if (!isdigit(p[k])) {
                  rb_raise(rb_eRangeError, "character entity reference error");
                  return Qnil;
                }
              }
              uv = NUM2ULONG(rb_Integer(rb_str_new(p,reflen)));
            }
            if (uv<0x80) {
              retlen += 1;
            } else {
              /*
              if (kcode!='u') {
                rb_raise(rb_eRangeError, "character entity reference error");
              }
               */
              if (uv<0x00000800) {
                retlen += 2;
              } else if (uv<0x00010000) {
                retlen += 3;
              } else if (uv<0x00200000) {
                retlen += 4;
              } else if (uv<0x04000000) {
                retlen += 5;
              } else if (uv<0x80000000) {
                /*
                if (kcode!='u') {
                  rb_raise(rb_eRangeError, "character entity reference error");
                }
                 */
                retlen += 6;
              } else {
                rb_raise(rb_eRangeError, "character entity reference error");
                return Qnil;
              }
            }
          } else {
            entity = rb_hash_aref(htmlentity, rb_str_new(p, reflen));
            if (NIL_P(entity)) {
              rb_raise(rb_eRangeError, "character entity reference error");
              return Qnil;
            } else {
              /*
              if (kcode!='u') {
                rb_raise(rb_eRangeError, "character entity reference error");
              }
                */
              retlen += RSTRING(entity)->len;
            }
          }
        }
      } else {
        rb_raise(rb_eRangeError, "character entity reference error");
        return Qnil;
      }
      i = j;
    } else {
      retlen += 1;
    }
  }

  /* alloc */
  retstr = malloc(retlen);

  /* convert */
  retp = retstr;
  for(i=0; i<arglen; i++) {
    c = argstr[i];
    if (c=='&' && i<arglen-2) {
      for(j=i+1; j<arglen; j++) {
        if (argstr[j]==';') break;
      }
      if (j<arglen) {
        reflen = j-i-1;
        p = argstr+i+1;
        if (reflen==4 && memcmp(p, "quot", 4)==0) {
          *retp++ = 0x22;
        } else if (reflen==3 && memcmp(p, "amp", 3)==0) {
          *retp++ = '&';
        } else if (reflen==2 && memcmp(p, "lt", 2)==0) {
          *retp++ = '<';
        } else if (reflen==2 && memcmp(p, "gt", 2)==0) {
          *retp++ = '>';
        } else {
          if (reflen>=2 && p[0]=='#') {
            if (p[1]=='x') {
              /* hex */
              reflen -= 2;
              p += 2;
              if (reflen<1) {
                rb_raise(rb_eRangeError, "character entity reference error");
                return Qnil;
              }
              for(k=0 ; k<reflen ; k++) {
                if (!isxdigit(p[k])) {
                  rb_raise(rb_eRangeError, "character entity reference error");
                  return Qnil;
                }
              }
              uv = NUM2ULONG(rb_funcall(rb_str_new(p,reflen), id_hex, 0));
            } else {
              /* decimal */
              reflen -= 1;
              p += 1;
              for(k=0 ; k<reflen ; k++) {
                if (!isdigit(p[k])) {
                  rb_raise(rb_eRangeError, "character entity reference error");
                  return Qnil;
                }
              }
              uv = NUM2ULONG(rb_Integer(rb_str_new(p,reflen)));
            }
            /* to utf-8 */
            if (uv < 0x80) {
              *retp++ = uv;
            } else if (uv < 0x00000800) {
              *retp++ = ((uv>>6)  & 0xff) | 0xc0;
              *retp++ = (uv       & 0x3f) | 0x80;
            } else if (uv < 0x00010000) {
              *retp++ = ((uv>>12) & 0xff) | 0xe0;
              *retp++ = ((uv>>6)  & 0x3f) | 0x80;
              *retp++ = (uv       & 0x3f) | 0x80;
            } else if (uv < 0x00200000) {
              *retp++ = ((uv>>18) & 0xff) | 0xf0;
              *retp++ = ((uv>>12) & 0x3f) | 0x80;
              *retp++ = ((uv>>6)  & 0x3f) | 0x80;
              *retp++ = (uv       & 0x3f) | 0x80;
            } else if (uv < 0x04000000) {
              *retp++ = ((uv>>24) & 0xff) | 0xf8;
              *retp++ = ((uv>>18) & 0x3f) | 0x80;
              *retp++ = ((uv>>12) & 0x3f) | 0x80;
              *retp++ = ((uv>>6)  & 0x3f) | 0x80;
              *retp++ = (uv       & 0x3f) | 0x80;
            } else {
              *retp++ = ((uv>>30) & 0xff) | 0xfc;
              *retp++ = ((uv>>24) & 0x3f) | 0x80;
              *retp++ = ((uv>>18) & 0x3f) | 0x80;
              *retp++ = ((uv>>12) & 0x3f) | 0x80;
              *retp++ = ((uv>>6)  & 0x3f) | 0x80;
              *retp++ = (uv       & 0x3f) | 0x80;
            }
          } else {
            entity = rb_hash_aref(htmlentity, rb_str_new(p, reflen));
            if (NIL_P(entity)) {
              rb_raise(rb_eRangeError, "character entity reference error");
              return Qnil;
            } else {
              memcpy(retp, RSTRING(entity)->ptr,RSTRING(entity)->len);
              retp += RSTRING(entity)->len;
            }
          }
        }
      } else {
        rb_raise(rb_eRangeError, "character entity reference error");
        return Qnil;
      }
      i = j;
    } else {
      *retp++ = c;
    }
  }

  retval = rb_str_new(retstr,retlen);
  OBJ_INFECT(retval, str);
  free(retstr);

  return retval;
}




void Init_escape_ext() {
  ID id;
  VALUE htmlentity;

  id = rb_intern("Web");
  if (rb_const_defined(rb_cObject, id)) {
    mWeb = rb_const_get(rb_cObject, id);
  } else {
    mWeb = rb_define_module("Web");
  }

  rb_define_const(mWeb, "ESCAPE", rb_str_new2("C"));

  rb_define_module_function(mWeb, "escape", webescape_url_escape, 1);
  rb_define_module_function(mWeb, "unescape", webescape_url_unescape, 1);
  rb_define_module_function(mWeb, "escapeHTML", webescape_html_escape, 1);
  rb_define_module_function(mWeb, "unescapeHTML", webescape_html_unescape, -1);

  htmlentity = rb_hash_new();
  rb_define_const(mWeb, "HTMLEntity", htmlentity);

  rb_hash_aset(htmlentity, rb_str_new2("Aacute"), rb_str_new2("\303\201"));
  rb_hash_aset(htmlentity, rb_str_new2("aacute"), rb_str_new2("\303\241"));
  rb_hash_aset(htmlentity, rb_str_new2("Acirc"), rb_str_new2("\303\202"));
  rb_hash_aset(htmlentity, rb_str_new2("acirc"), rb_str_new2("\303\242"));
  rb_hash_aset(htmlentity, rb_str_new2("acute"), rb_str_new2("\302\264"));
  rb_hash_aset(htmlentity, rb_str_new2("AElig"), rb_str_new2("\303\206"));
  rb_hash_aset(htmlentity, rb_str_new2("aelig"), rb_str_new2("\303\246"));
  rb_hash_aset(htmlentity, rb_str_new2("Agrave"), rb_str_new2("\303\200"));
  rb_hash_aset(htmlentity, rb_str_new2("agrave"), rb_str_new2("\303\240"));
  rb_hash_aset(htmlentity, rb_str_new2("alefsym"), rb_str_new2("\342\204\265"));
  rb_hash_aset(htmlentity, rb_str_new2("Alpha"), rb_str_new2("\316\221"));
  rb_hash_aset(htmlentity, rb_str_new2("alpha"), rb_str_new2("\316\261"));
  rb_hash_aset(htmlentity, rb_str_new2("and"), rb_str_new2("\342\210\247"));
  rb_hash_aset(htmlentity, rb_str_new2("ang"), rb_str_new2("\342\210\240"));
  rb_hash_aset(htmlentity, rb_str_new2("apos"), rb_str_new2("'"));
  rb_hash_aset(htmlentity, rb_str_new2("Aring"), rb_str_new2("\303\205"));
  rb_hash_aset(htmlentity, rb_str_new2("aring"), rb_str_new2("\303\245"));
  rb_hash_aset(htmlentity, rb_str_new2("asymp"), rb_str_new2("\342\211\210"));
  rb_hash_aset(htmlentity, rb_str_new2("Atilde"), rb_str_new2("\303\203"));
  rb_hash_aset(htmlentity, rb_str_new2("atilde"), rb_str_new2("\303\243"));
  rb_hash_aset(htmlentity, rb_str_new2("Auml"), rb_str_new2("\303\204"));
  rb_hash_aset(htmlentity, rb_str_new2("auml"), rb_str_new2("\303\244"));
  rb_hash_aset(htmlentity, rb_str_new2("bdquo"), rb_str_new2("\342\200\236"));
  rb_hash_aset(htmlentity, rb_str_new2("Beta"), rb_str_new2("\316\222"));
  rb_hash_aset(htmlentity, rb_str_new2("beta"), rb_str_new2("\316\262"));
  rb_hash_aset(htmlentity, rb_str_new2("brvbar"), rb_str_new2("\302\246"));
  rb_hash_aset(htmlentity, rb_str_new2("bull"), rb_str_new2("\342\200\242"));
  rb_hash_aset(htmlentity, rb_str_new2("cap"), rb_str_new2("\342\210\251"));
  rb_hash_aset(htmlentity, rb_str_new2("Ccedil"), rb_str_new2("\303\207"));
  rb_hash_aset(htmlentity, rb_str_new2("ccedil"), rb_str_new2("\303\247"));
  rb_hash_aset(htmlentity, rb_str_new2("cedil"), rb_str_new2("\302\270"));
  rb_hash_aset(htmlentity, rb_str_new2("cent"), rb_str_new2("\302\242"));
  rb_hash_aset(htmlentity, rb_str_new2("Chi"), rb_str_new2("\316\247"));
  rb_hash_aset(htmlentity, rb_str_new2("chi"), rb_str_new2("\317\207"));
  rb_hash_aset(htmlentity, rb_str_new2("circ"), rb_str_new2("\313\206"));
  rb_hash_aset(htmlentity, rb_str_new2("clubs"), rb_str_new2("\342\231\243"));
  rb_hash_aset(htmlentity, rb_str_new2("cong"), rb_str_new2("\342\211\205"));
  rb_hash_aset(htmlentity, rb_str_new2("copy"), rb_str_new2("\302\251"));
  rb_hash_aset(htmlentity, rb_str_new2("crarr"), rb_str_new2("\342\206\265"));
  rb_hash_aset(htmlentity, rb_str_new2("cup"), rb_str_new2("\342\210\252"));
  rb_hash_aset(htmlentity, rb_str_new2("curren"), rb_str_new2("\302\244"));
  rb_hash_aset(htmlentity, rb_str_new2("dagger"), rb_str_new2("\342\200\240"));
  rb_hash_aset(htmlentity, rb_str_new2("Dagger"), rb_str_new2("\342\200\241"));
  rb_hash_aset(htmlentity, rb_str_new2("darr"), rb_str_new2("\342\206\223"));
  rb_hash_aset(htmlentity, rb_str_new2("dArr"), rb_str_new2("\342\207\223"));
  rb_hash_aset(htmlentity, rb_str_new2("deg"), rb_str_new2("\302\260"));
  rb_hash_aset(htmlentity, rb_str_new2("Delta"), rb_str_new2("\316\224"));
  rb_hash_aset(htmlentity, rb_str_new2("delta"), rb_str_new2("\316\264"));
  rb_hash_aset(htmlentity, rb_str_new2("diams"), rb_str_new2("\342\231\246"));
  rb_hash_aset(htmlentity, rb_str_new2("divide"), rb_str_new2("\303\267"));
  rb_hash_aset(htmlentity, rb_str_new2("Eacute"), rb_str_new2("\303\211"));
  rb_hash_aset(htmlentity, rb_str_new2("eacute"), rb_str_new2("\303\251"));
  rb_hash_aset(htmlentity, rb_str_new2("Ecirc"), rb_str_new2("\303\212"));
  rb_hash_aset(htmlentity, rb_str_new2("ecirc"), rb_str_new2("\303\252"));
  rb_hash_aset(htmlentity, rb_str_new2("Egrave"), rb_str_new2("\303\210"));
  rb_hash_aset(htmlentity, rb_str_new2("egrave"), rb_str_new2("\303\250"));
  rb_hash_aset(htmlentity, rb_str_new2("empty"), rb_str_new2("\342\210\205"));
  rb_hash_aset(htmlentity, rb_str_new2("emsp"), rb_str_new2("\342\200\203"));
  rb_hash_aset(htmlentity, rb_str_new2("ensp"), rb_str_new2("\342\200\202"));
  rb_hash_aset(htmlentity, rb_str_new2("Epsilon"), rb_str_new2("\316\225"));
  rb_hash_aset(htmlentity, rb_str_new2("epsilon"), rb_str_new2("\316\265"));
  rb_hash_aset(htmlentity, rb_str_new2("equiv"), rb_str_new2("\342\211\241"));
  rb_hash_aset(htmlentity, rb_str_new2("Eta"), rb_str_new2("\316\227"));
  rb_hash_aset(htmlentity, rb_str_new2("eta"), rb_str_new2("\316\267"));
  rb_hash_aset(htmlentity, rb_str_new2("ETH"), rb_str_new2("\303\220"));
  rb_hash_aset(htmlentity, rb_str_new2("eth"), rb_str_new2("\303\260"));
  rb_hash_aset(htmlentity, rb_str_new2("Euml"), rb_str_new2("\303\213"));
  rb_hash_aset(htmlentity, rb_str_new2("euml"), rb_str_new2("\303\253"));
  rb_hash_aset(htmlentity, rb_str_new2("euro"), rb_str_new2("\342\202\254"));
  rb_hash_aset(htmlentity, rb_str_new2("exist"), rb_str_new2("\342\210\203"));
  rb_hash_aset(htmlentity, rb_str_new2("fnof"), rb_str_new2("\306\222"));
  rb_hash_aset(htmlentity, rb_str_new2("forall"), rb_str_new2("\342\210\200"));
  rb_hash_aset(htmlentity, rb_str_new2("frac12"), rb_str_new2("\302\275"));
  rb_hash_aset(htmlentity, rb_str_new2("frac14"), rb_str_new2("\302\274"));
  rb_hash_aset(htmlentity, rb_str_new2("frac34"), rb_str_new2("\302\276"));
  rb_hash_aset(htmlentity, rb_str_new2("frasl"), rb_str_new2("\342\201\204"));
  rb_hash_aset(htmlentity, rb_str_new2("Gamma"), rb_str_new2("\316\223"));
  rb_hash_aset(htmlentity, rb_str_new2("gamma"), rb_str_new2("\316\263"));
  rb_hash_aset(htmlentity, rb_str_new2("ge"), rb_str_new2("\342\211\245"));
  rb_hash_aset(htmlentity, rb_str_new2("harr"), rb_str_new2("\342\206\224"));
  rb_hash_aset(htmlentity, rb_str_new2("hArr"), rb_str_new2("\342\207\224"));
  rb_hash_aset(htmlentity, rb_str_new2("hearts"), rb_str_new2("\342\231\245"));
  rb_hash_aset(htmlentity, rb_str_new2("hellip"), rb_str_new2("\342\200\246"));
  rb_hash_aset(htmlentity, rb_str_new2("Iacute"), rb_str_new2("\303\215"));
  rb_hash_aset(htmlentity, rb_str_new2("iacute"), rb_str_new2("\303\255"));
  rb_hash_aset(htmlentity, rb_str_new2("Icirc"), rb_str_new2("\303\216"));
  rb_hash_aset(htmlentity, rb_str_new2("icirc"), rb_str_new2("\303\256"));
  rb_hash_aset(htmlentity, rb_str_new2("iexcl"), rb_str_new2("\302\241"));
  rb_hash_aset(htmlentity, rb_str_new2("Igrave"), rb_str_new2("\303\214"));
  rb_hash_aset(htmlentity, rb_str_new2("igrave"), rb_str_new2("\303\254"));
  rb_hash_aset(htmlentity, rb_str_new2("image"), rb_str_new2("\342\204\221"));
  rb_hash_aset(htmlentity, rb_str_new2("infin"), rb_str_new2("\342\210\236"));
  rb_hash_aset(htmlentity, rb_str_new2("int"), rb_str_new2("\342\210\253"));
  rb_hash_aset(htmlentity, rb_str_new2("Iota"), rb_str_new2("\316\231"));
  rb_hash_aset(htmlentity, rb_str_new2("iota"), rb_str_new2("\316\271"));
  rb_hash_aset(htmlentity, rb_str_new2("iquest"), rb_str_new2("\302\277"));
  rb_hash_aset(htmlentity, rb_str_new2("isin"), rb_str_new2("\342\210\210"));
  rb_hash_aset(htmlentity, rb_str_new2("Iuml"), rb_str_new2("\303\217"));
  rb_hash_aset(htmlentity, rb_str_new2("iuml"), rb_str_new2("\303\257"));
  rb_hash_aset(htmlentity, rb_str_new2("Kappa"), rb_str_new2("\316\232"));
  rb_hash_aset(htmlentity, rb_str_new2("kappa"), rb_str_new2("\316\272"));
  rb_hash_aset(htmlentity, rb_str_new2("Lambda"), rb_str_new2("\316\233"));
  rb_hash_aset(htmlentity, rb_str_new2("lambda"), rb_str_new2("\316\273"));
  rb_hash_aset(htmlentity, rb_str_new2("lang"), rb_str_new2("\342\214\251"));
  rb_hash_aset(htmlentity, rb_str_new2("laquo"), rb_str_new2("\302\253"));
  rb_hash_aset(htmlentity, rb_str_new2("larr"), rb_str_new2("\342\206\220"));
  rb_hash_aset(htmlentity, rb_str_new2("lArr"), rb_str_new2("\342\207\220"));
  rb_hash_aset(htmlentity, rb_str_new2("lceil"), rb_str_new2("\342\214\210"));
  rb_hash_aset(htmlentity, rb_str_new2("ldquo"), rb_str_new2("\342\200\234"));
  rb_hash_aset(htmlentity, rb_str_new2("le"), rb_str_new2("\342\211\244"));
  rb_hash_aset(htmlentity, rb_str_new2("lfloor"), rb_str_new2("\342\214\212"));
  rb_hash_aset(htmlentity, rb_str_new2("lowast"), rb_str_new2("\342\210\227"));
  rb_hash_aset(htmlentity, rb_str_new2("loz"), rb_str_new2("\342\227\212"));
  rb_hash_aset(htmlentity, rb_str_new2("lrm"), rb_str_new2("\342\200\216"));
  rb_hash_aset(htmlentity, rb_str_new2("lsaquo"), rb_str_new2("\342\200\271"));
  rb_hash_aset(htmlentity, rb_str_new2("lsquo"), rb_str_new2("\342\200\230"));
  rb_hash_aset(htmlentity, rb_str_new2("macr"), rb_str_new2("\302\257"));
  rb_hash_aset(htmlentity, rb_str_new2("mdash"), rb_str_new2("\342\200\224"));
  rb_hash_aset(htmlentity, rb_str_new2("micro"), rb_str_new2("\302\265"));
  rb_hash_aset(htmlentity, rb_str_new2("middot"), rb_str_new2("\302\267"));
  rb_hash_aset(htmlentity, rb_str_new2("minus"), rb_str_new2("\342\210\222"));
  rb_hash_aset(htmlentity, rb_str_new2("Mu"), rb_str_new2("\316\234"));
  rb_hash_aset(htmlentity, rb_str_new2("mu"), rb_str_new2("\316\274"));
  rb_hash_aset(htmlentity, rb_str_new2("nabla"), rb_str_new2("\342\210\207"));
  rb_hash_aset(htmlentity, rb_str_new2("nbsp"), rb_str_new2("\302\240"));
  rb_hash_aset(htmlentity, rb_str_new2("ndash"), rb_str_new2("\342\200\223"));
  rb_hash_aset(htmlentity, rb_str_new2("ne"), rb_str_new2("\342\211\240"));
  rb_hash_aset(htmlentity, rb_str_new2("ni"), rb_str_new2("\342\210\213"));
  rb_hash_aset(htmlentity, rb_str_new2("not"), rb_str_new2("\302\254"));
  rb_hash_aset(htmlentity, rb_str_new2("notin"), rb_str_new2("\342\210\211"));
  rb_hash_aset(htmlentity, rb_str_new2("nsub"), rb_str_new2("\342\212\204"));
  rb_hash_aset(htmlentity, rb_str_new2("Ntilde"), rb_str_new2("\303\221"));
  rb_hash_aset(htmlentity, rb_str_new2("ntilde"), rb_str_new2("\303\261"));
  rb_hash_aset(htmlentity, rb_str_new2("Nu"), rb_str_new2("\316\235"));
  rb_hash_aset(htmlentity, rb_str_new2("nu"), rb_str_new2("\316\275"));
  rb_hash_aset(htmlentity, rb_str_new2("Oacute"), rb_str_new2("\303\223"));
  rb_hash_aset(htmlentity, rb_str_new2("oacute"), rb_str_new2("\303\263"));
  rb_hash_aset(htmlentity, rb_str_new2("Ocirc"), rb_str_new2("\303\224"));
  rb_hash_aset(htmlentity, rb_str_new2("ocirc"), rb_str_new2("\303\264"));
  rb_hash_aset(htmlentity, rb_str_new2("OElig"), rb_str_new2("\305\222"));
  rb_hash_aset(htmlentity, rb_str_new2("oelig"), rb_str_new2("\305\223"));
  rb_hash_aset(htmlentity, rb_str_new2("Ograve"), rb_str_new2("\303\222"));
  rb_hash_aset(htmlentity, rb_str_new2("ograve"), rb_str_new2("\303\262"));
  rb_hash_aset(htmlentity, rb_str_new2("oline"), rb_str_new2("\342\200\276"));
  rb_hash_aset(htmlentity, rb_str_new2("Omega"), rb_str_new2("\316\251"));
  rb_hash_aset(htmlentity, rb_str_new2("omega"), rb_str_new2("\317\211"));
  rb_hash_aset(htmlentity, rb_str_new2("Omicron"), rb_str_new2("\316\237"));
  rb_hash_aset(htmlentity, rb_str_new2("omicron"), rb_str_new2("\316\277"));
  rb_hash_aset(htmlentity, rb_str_new2("oplus"), rb_str_new2("\342\212\225"));
  rb_hash_aset(htmlentity, rb_str_new2("or"), rb_str_new2("\342\210\250"));
  rb_hash_aset(htmlentity, rb_str_new2("ordf"), rb_str_new2("\302\252"));
  rb_hash_aset(htmlentity, rb_str_new2("ordm"), rb_str_new2("\302\272"));
  rb_hash_aset(htmlentity, rb_str_new2("Oslash"), rb_str_new2("\303\230"));
  rb_hash_aset(htmlentity, rb_str_new2("oslash"), rb_str_new2("\303\270"));
  rb_hash_aset(htmlentity, rb_str_new2("Otilde"), rb_str_new2("\303\225"));
  rb_hash_aset(htmlentity, rb_str_new2("otilde"), rb_str_new2("\303\265"));
  rb_hash_aset(htmlentity, rb_str_new2("otimes"), rb_str_new2("\342\212\227"));
  rb_hash_aset(htmlentity, rb_str_new2("Ouml"), rb_str_new2("\303\226"));
  rb_hash_aset(htmlentity, rb_str_new2("ouml"), rb_str_new2("\303\266"));
  rb_hash_aset(htmlentity, rb_str_new2("para"), rb_str_new2("\302\266"));
  rb_hash_aset(htmlentity, rb_str_new2("part"), rb_str_new2("\342\210\202"));
  rb_hash_aset(htmlentity, rb_str_new2("permil"), rb_str_new2("\342\200\260"));
  rb_hash_aset(htmlentity, rb_str_new2("perp"), rb_str_new2("\342\212\245"));
  rb_hash_aset(htmlentity, rb_str_new2("Phi"), rb_str_new2("\316\246"));
  rb_hash_aset(htmlentity, rb_str_new2("phi"), rb_str_new2("\317\206"));
  rb_hash_aset(htmlentity, rb_str_new2("Pi"), rb_str_new2("\316\240"));
  rb_hash_aset(htmlentity, rb_str_new2("pi"), rb_str_new2("\317\200"));
  rb_hash_aset(htmlentity, rb_str_new2("piv"), rb_str_new2("\317\226"));
  rb_hash_aset(htmlentity, rb_str_new2("plusmn"), rb_str_new2("\302\261"));
  rb_hash_aset(htmlentity, rb_str_new2("pound"), rb_str_new2("\302\243"));
  rb_hash_aset(htmlentity, rb_str_new2("prime"), rb_str_new2("\342\200\262"));
  rb_hash_aset(htmlentity, rb_str_new2("Prime"), rb_str_new2("\342\200\263"));
  rb_hash_aset(htmlentity, rb_str_new2("prod"), rb_str_new2("\342\210\217"));
  rb_hash_aset(htmlentity, rb_str_new2("prop"), rb_str_new2("\342\210\235"));
  rb_hash_aset(htmlentity, rb_str_new2("Psi"), rb_str_new2("\316\250"));
  rb_hash_aset(htmlentity, rb_str_new2("psi"), rb_str_new2("\317\210"));
  rb_hash_aset(htmlentity, rb_str_new2("radic"), rb_str_new2("\342\210\232"));
  rb_hash_aset(htmlentity, rb_str_new2("rang"), rb_str_new2("\342\214\252"));
  rb_hash_aset(htmlentity, rb_str_new2("raquo"), rb_str_new2("\302\273"));
  rb_hash_aset(htmlentity, rb_str_new2("rarr"), rb_str_new2("\342\206\222"));
  rb_hash_aset(htmlentity, rb_str_new2("rArr"), rb_str_new2("\342\207\222"));
  rb_hash_aset(htmlentity, rb_str_new2("rceil"), rb_str_new2("\342\214\211"));
  rb_hash_aset(htmlentity, rb_str_new2("rdquo"), rb_str_new2("\342\200\235"));
  rb_hash_aset(htmlentity, rb_str_new2("real"), rb_str_new2("\342\204\234"));
  rb_hash_aset(htmlentity, rb_str_new2("reg"), rb_str_new2("\302\256"));
  rb_hash_aset(htmlentity, rb_str_new2("rfloor"), rb_str_new2("\342\214\213"));
  rb_hash_aset(htmlentity, rb_str_new2("Rho"), rb_str_new2("\316\241"));
  rb_hash_aset(htmlentity, rb_str_new2("rho"), rb_str_new2("\317\201"));
  rb_hash_aset(htmlentity, rb_str_new2("rlm"), rb_str_new2("\342\200\217"));
  rb_hash_aset(htmlentity, rb_str_new2("rsaquo"), rb_str_new2("\342\200\272"));
  rb_hash_aset(htmlentity, rb_str_new2("rsquo"), rb_str_new2("\342\200\231"));
  rb_hash_aset(htmlentity, rb_str_new2("sbquo"), rb_str_new2("\342\200\232"));
  rb_hash_aset(htmlentity, rb_str_new2("Scaron"), rb_str_new2("\305\240"));
  rb_hash_aset(htmlentity, rb_str_new2("scaron"), rb_str_new2("\305\241"));
  rb_hash_aset(htmlentity, rb_str_new2("sdot"), rb_str_new2("\342\213\205"));
  rb_hash_aset(htmlentity, rb_str_new2("sect"), rb_str_new2("\302\247"));
  rb_hash_aset(htmlentity, rb_str_new2("shy"), rb_str_new2("\302\255"));
  rb_hash_aset(htmlentity, rb_str_new2("Sigma"), rb_str_new2("\316\243"));
  rb_hash_aset(htmlentity, rb_str_new2("sigma"), rb_str_new2("\317\203"));
  rb_hash_aset(htmlentity, rb_str_new2("sigmaf"), rb_str_new2("\317\202"));
  rb_hash_aset(htmlentity, rb_str_new2("sim"), rb_str_new2("\342\210\274"));
  rb_hash_aset(htmlentity, rb_str_new2("spades"), rb_str_new2("\342\231\240"));
  rb_hash_aset(htmlentity, rb_str_new2("sub"), rb_str_new2("\342\212\202"));
  rb_hash_aset(htmlentity, rb_str_new2("sube"), rb_str_new2("\342\212\206"));
  rb_hash_aset(htmlentity, rb_str_new2("sum"), rb_str_new2("\342\210\221"));
  rb_hash_aset(htmlentity, rb_str_new2("sup"), rb_str_new2("\342\212\203"));
  rb_hash_aset(htmlentity, rb_str_new2("sup1"), rb_str_new2("\302\271"));
  rb_hash_aset(htmlentity, rb_str_new2("sup2"), rb_str_new2("\302\262"));
  rb_hash_aset(htmlentity, rb_str_new2("sup3"), rb_str_new2("\302\263"));
  rb_hash_aset(htmlentity, rb_str_new2("supe"), rb_str_new2("\342\212\207"));
  rb_hash_aset(htmlentity, rb_str_new2("szlig"), rb_str_new2("\303\237"));
  rb_hash_aset(htmlentity, rb_str_new2("Tau"), rb_str_new2("\316\244"));
  rb_hash_aset(htmlentity, rb_str_new2("tau"), rb_str_new2("\317\204"));
  rb_hash_aset(htmlentity, rb_str_new2("there4"), rb_str_new2("\342\210\264"));
  rb_hash_aset(htmlentity, rb_str_new2("Theta"), rb_str_new2("\316\230"));
  rb_hash_aset(htmlentity, rb_str_new2("theta"), rb_str_new2("\316\270"));
  rb_hash_aset(htmlentity, rb_str_new2("thetasym"), rb_str_new2("\317\221"));
  rb_hash_aset(htmlentity, rb_str_new2("thinsp"), rb_str_new2("\342\200\211"));
  rb_hash_aset(htmlentity, rb_str_new2("THORN"), rb_str_new2("\303\236"));
  rb_hash_aset(htmlentity, rb_str_new2("thorn"), rb_str_new2("\303\276"));
  rb_hash_aset(htmlentity, rb_str_new2("tilde"), rb_str_new2("\313\234"));
  rb_hash_aset(htmlentity, rb_str_new2("times"), rb_str_new2("\303\227"));
  rb_hash_aset(htmlentity, rb_str_new2("trade"), rb_str_new2("\342\204\242"));
  rb_hash_aset(htmlentity, rb_str_new2("Uacute"), rb_str_new2("\303\232"));
  rb_hash_aset(htmlentity, rb_str_new2("uacute"), rb_str_new2("\303\272"));
  rb_hash_aset(htmlentity, rb_str_new2("uarr"), rb_str_new2("\342\206\221"));
  rb_hash_aset(htmlentity, rb_str_new2("uArr"), rb_str_new2("\342\207\221"));
  rb_hash_aset(htmlentity, rb_str_new2("Ucirc"), rb_str_new2("\303\233"));
  rb_hash_aset(htmlentity, rb_str_new2("ucirc"), rb_str_new2("\303\273"));
  rb_hash_aset(htmlentity, rb_str_new2("Ugrave"), rb_str_new2("\303\231"));
  rb_hash_aset(htmlentity, rb_str_new2("ugrave"), rb_str_new2("\303\271"));
  rb_hash_aset(htmlentity, rb_str_new2("uml"), rb_str_new2("\302\250"));
  rb_hash_aset(htmlentity, rb_str_new2("upsih"), rb_str_new2("\317\222"));
  rb_hash_aset(htmlentity, rb_str_new2("Upsilon"), rb_str_new2("\316\245"));
  rb_hash_aset(htmlentity, rb_str_new2("upsilon"), rb_str_new2("\317\205"));
  rb_hash_aset(htmlentity, rb_str_new2("Uuml"), rb_str_new2("\303\234"));
  rb_hash_aset(htmlentity, rb_str_new2("uuml"), rb_str_new2("\303\274"));
  rb_hash_aset(htmlentity, rb_str_new2("weierp"), rb_str_new2("\342\204\230"));
  rb_hash_aset(htmlentity, rb_str_new2("Xi"), rb_str_new2("\316\236"));
  rb_hash_aset(htmlentity, rb_str_new2("xi"), rb_str_new2("\316\276"));
  rb_hash_aset(htmlentity, rb_str_new2("Yacute"), rb_str_new2("\303\235"));
  rb_hash_aset(htmlentity, rb_str_new2("yacute"), rb_str_new2("\303\275"));
  rb_hash_aset(htmlentity, rb_str_new2("yen"), rb_str_new2("\302\245"));
  rb_hash_aset(htmlentity, rb_str_new2("yuml"), rb_str_new2("\303\277"));
  rb_hash_aset(htmlentity, rb_str_new2("Yuml"), rb_str_new2("\305\270"));
  rb_hash_aset(htmlentity, rb_str_new2("Zeta"), rb_str_new2("\316\226"));
  rb_hash_aset(htmlentity, rb_str_new2("zeta"), rb_str_new2("\316\266"));
  rb_hash_aset(htmlentity, rb_str_new2("zwj"), rb_str_new2("\342\200\215"));
  rb_hash_aset(htmlentity, rb_str_new2("zwnj"), rb_str_new2("\342\200\214"));
  
  /* rb_include_module (rb_cString, mWeb); */
}
