/*+++++++++++++++++
  refdbxp.c - expands or contracts citations in SGML/XML documents

  markus@mhoenicka.de 2002-10-8
  $Id: refdbxp.c,v 1.7.2.5 2005/11/08 21:39:50 mhoenicka Exp $

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
   
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

   ++++++++++++++++++++++++*/

/* ToDo:
   Use a command line option to define a default database. Then include the
   database in the linked list elements and write out the expanded form in
   all cases.
*/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include "getopt.h"
#include "strfncs.h"

#define LINE_SIZE 65536 /* buffer size for fgets */
#define MAX_ATTR_LEN 256 /* maximum allowable length of a attribute value.
			    This value is taken from docbook.dcl (4.1), but
			    xml.dcl doesn't specify a value so I'm not
			    sure whether we're on the safe side */
#define FIRST 0
#define SUBSEQ 1

/* structures used in the linked lists */

/*
  liliment is used in a linked list to manage the chunks of a line
  separated by refdb citation element starts. Each chunk starts at the
  beginning of the line or just after the end of the previous chunk
  and extends to the end of the citation end tag. Within each chunk we
  keep pointers to the start of the citation start tag, the data
  proper (i.e. a string containing one or more references in either
  short or long format), and the start of the end tag. The contents of
  the original line could be recreated by piecing together the chunks
  in the correct order.
*/
typedef struct liliment {
  char* chunkstart; /* ptr to start of the current chunk */
  char* elstartstart; /* ptr to first char of start tag */
  char* elstartend; /* ptr to last char of start tag */
  char* elendstart; /* ptr to first char of end tag, NULL if element
		       is not closed */
  char* eldata; /* ptr to allocated string containing element data */
  struct liliment *ptr_next; /* pointer to the next element in the list */
  struct liliment *ptr_last; /* pointer to the last element in the list */
} Liliment;

/*
  lilicit is used in a linked list to manage a global list of all
  references in the document
 */
typedef struct lilicit {
  char* cit; /* ptr to reference string */
  int nis_subseq; /* if 1, subsequent citation, if 0, first citation */
  int n_citstylespec; /* 0 = regular, 1 = authoronly, 2 = yearonly */
  struct lilicit *ptr_next; /* pointer to the next element in the list */
  struct lilicit *ptr_last; /* pointer to the last element in the list */
} Lilicit;


/* global variables */
int n_create_full = 1; /* by default, create expanded output */
int n_intype = 0; /* 0 = db31; 1 = db31x; 2 = teix */
int n_multicit_count = 0; /* this counter is used to create pseudo-unique
			     ID values for multiple citations, i.e. IMXXX */
char typesep = '-'; /* separator for A,Q,Y etc suffix */


/* prototypes */
int handle_full_style(Liliment *ptr_first, Lilicit *ptr_first_cit);
int handle_short_style(Liliment *ptr_first, Lilicit *ptr_first_cit);
int write_full(const char* citation, int nis_subseq, int n_citstylespec);
int write_short(const char* citation, int n_citstylespec);
int write_multixref(Lilicit* ptr_cit_sentinel, int n_citestylespec, int* ptr_multicit_count);
char* fget_line(char** linebuffer, size_t* lb_size, const char* start_tag, const char* end_tag, FILE* infp);
char* find_start(char* scan_start, char* start_tag, char* attrname_refdb, char* attrvalue_refdb, char** cit_startend, int n_intype);
Liliment* add_liliment(Liliment* ptr_first, const char* el_startstart, const char* el_startend, const char* chunkstart);
Liliment* get_next_liliment(Liliment* ptr_first);
int delete_all_liliment(Liliment* ptr_first);
int count_liliment(Liliment* ptr_first);
Lilicit* add_lilicit(Lilicit *ptr_first, char* cit);
Lilicit* get_next_lilicit(Lilicit* ptr_first);
Lilicit* get_lilicit_byname(Lilicit* ptr_first, const char* name);
int delete_all_lilicit(Lilicit *ptr_first);
int count_lilicit(Lilicit* ptr_first);

/* declaration of the svn version function */
const char* svn_version(void);


/* the one and only main function */
int main(int argc, char** argv) {
  char *linebuffer;
  char *new_linebuffer;
  char *uplinebuffer = NULL;
  char *outbuffer;
  char *new_outbuffer;
  char *result;
  char *cit_start;
  char *cit_startend = NULL;
  char *cit_end;
  char *scan_start;
  char *real_start;
  char start_tag[11];
  char end_tag[11];
  char attrname_refdb[5];
  char attrvalue_refdb[16];
  int n_opt;
  size_t len;
  size_t lb_len = LINE_SIZE;
  size_t ob_len = LINE_SIZE;
  FILE *infp = stdin;
  Liliment sentinel;
  Liliment *ptr_curr;
  Lilicit cit_sentinel;

  /* initialize linked lists */
  sentinel.ptr_next = NULL;
  sentinel.ptr_last = &sentinel;
  sentinel.eldata = NULL;

  cit_sentinel.ptr_next = NULL;
  cit_sentinel.ptr_last = &cit_sentinel;
  cit_sentinel.cit = NULL;

  /* allocate memory for input and output */
  linebuffer = malloc(lb_len);
  if (linebuffer == NULL) {
    fprintf(stderr, "out of memory\n");
    exit(1);
  }

  linebuffer[0] = '\0';

  outbuffer = malloc(ob_len);
  if (outbuffer == NULL) {
    fprintf(stderr, "out of memory\n");
    free(linebuffer);
    exit(1);
  }

  /* parse command line options */
  while ((n_opt = getopt(argc, argv, "hst:v")) != -1) {
    switch (n_opt) {
    case 'h':
      fprintf(stderr, "Expands or contracts citations in SGML/XML documents\nInput is read from stdin, output is sent to stdout\nUsage: refdbxp [-h] [-s] [-t type]\nOptions: -h print help and exit\n         -s create short citations (default: long)\n         -t specify input type (db31, db31x, teix)\n         -v print version info and exit\n");
      exit(0);
      break;
    case 's':
      n_create_full = 0;
      break;
    case 't':
      if (!strcmp(optarg, "db31x")) {
	n_intype = 1;
      }
      else if (!strcmp(optarg, "teix")) {
	n_intype = 2;
      }
      /* else: for db31 and anything else n_intype remains 0 */
      break;
    case 'v':
      printf("refdbxp %s built from svn revision %s markus@mhoenicka.de\nYou may redistribute and modify this software under the terms of the GNU General Public License.\n", VERSION, svn_version());
      exit (0);
      break;
    case ':':
      fprintf(stderr, "Usage: refdbxp [-h] [-s] [-t type]\nOptions: -h print help and exit\n         -s create short citations (default: long)\n         -t specify input type (db31, db31x, teix)\n");
      break;
    case '?':
      fprintf(stderr, "unknown option %c: use refdbxp -h to display usage\n", optopt);
      break;
    default:
      break;
    }
  }
  
  /* set search strings to match input type */
  switch (n_intype) {
  case 0: /* db31 */
    strcpy(start_tag, "<CITATION");
    strcpy(end_tag, "</CITATION");
    strcpy(attrname_refdb, "ROLE");
    strcpy(attrvalue_refdb, "\"REFDB\"");
    break;
  case 1: /* db31x */
    strcpy(start_tag, "<citation");
    strcpy(end_tag, "</citation");
    strcpy(attrname_refdb, "role");
    strcpy(attrvalue_refdb, "\"REFDB\"");
    break;
  case 2: /* teix */
    strcpy(start_tag, "<seg");
    strcpy(end_tag, "</seg");
    strcpy(attrname_refdb, "type");
    strcpy(attrvalue_refdb, "\"REFDBCITATION\"");
    break;
  default:
    break;
  }

  /* strategy:
     - loop over all lines of input
     - locate <citation>/<seg> start tags with appropriate attributes
       and add them to linked list
     - walk through linked list of citations (fifo)
     - decide whether citation has short or full style
     - full style: add references to linked list, correct first/subseq
       stuff if necessary
     - short style: add references to linked list, expand reference
       by inserting appropriate markup, take care of first/subseq issue
  */

  /* loop over all \n-terminated lines of the input */
  while ((result = fget_line(&linebuffer, &lb_len, start_tag, end_tag, infp)) != NULL) {
/*     fprintf(stderr, linebuffer); */

    /* get sizes of linebuffer and outbuffer in sync if necessary */
    if (lb_len > ob_len) {
      if ((new_outbuffer = (char*)realloc(outbuffer, lb_len)) == NULL) {
	free(linebuffer);
	free(outbuffer);
	fprintf(stderr, "out of memory\n");
	exit(1);
      }
      else {
	outbuffer = new_outbuffer;
	ob_len = lb_len;
      }
    }

    /* if we're looking at SGML data, create a copy of the input
       string and uppercase it */
    if (!n_intype) {
      uplinebuffer = strdup(linebuffer);
      if (!uplinebuffer) {
	free(linebuffer);
	free(outbuffer);
	fprintf(stderr, "out of memory\n");
	exit(1);
      }
      strup(uplinebuffer);
      scan_start = uplinebuffer;
    }
    else { /* we're looking at XML data */
      scan_start = linebuffer;
    }
    
    /* real_start is a ptr to the real data regardless of uppercasing
       scan_start = real_start for XML data. Only if we use SGML data,
       scan_start points to the uppercased string. We do all string
       comparisons with the uppercased (for SGML data) string, but use
       weird pointer arithmetics to grab the corresponding chunks of
       interest from the original string. That is, all pointers in
       the linked list elements are pointers to the original string
       meaning they're never uppercased. Functions doing string comparisons
       with these fragments need to uppercase the fragments again in the
       case of SGML data
    */

    real_start = linebuffer;

    /* look for start tag */
    while ((cit_start = find_start(scan_start, start_tag, attrname_refdb, attrvalue_refdb, &cit_startend, n_intype)) != NULL) {
      /* add the element to the list of elements of the current line */
      if ((ptr_curr = add_liliment(&sentinel, real_start+(cit_start-scan_start), real_start+(cit_startend-scan_start), real_start)) == NULL) {
	fprintf(stderr, "out of memory\n");
	break;
      }

      /* look for end tag */
      if ((cit_end = strstr(cit_startend, end_tag)) != NULL) {
	ptr_curr->elendstart = real_start+(cit_end-scan_start);
	len = ptr_curr->elendstart - ptr_curr->elstartend;
	ptr_curr->eldata = malloc(len + 1);
	if (!ptr_curr->eldata) {
	  fprintf(stderr, "out of memory\n");
	  break;
	}
	strncpy(ptr_curr->eldata, ptr_curr->elstartend, len);
	(ptr_curr->eldata)[len] = '\0';
      }
      else {
	ptr_curr->elendstart = NULL;
      }
      
      /* 5 is the minimum length of an end tag (</seg for teix), so this
       is where the next round should look for a new start tag at the
       earliest */
      scan_start += (ptr_curr->elendstart) ? (ptr_curr->elendstart+5)-real_start : (ptr_curr->elstartend+1)-real_start;
      real_start = (ptr_curr->elendstart) ? ptr_curr->elendstart+5 : ptr_curr->elstartend+1;
    }

    if (sentinel.ptr_next) {
      ptr_curr = &sentinel;
    
      /* all citations of the current line are in the linked list
	 starting at sentinel. Loop over all list members to create
	 the requested output
      */
      while ((ptr_curr = get_next_liliment(ptr_curr)) != NULL) {
	len = ptr_curr->elstartend - ptr_curr->chunkstart;
	strncpy(outbuffer, ptr_curr->chunkstart, len);
	outbuffer[len] = '\0';
	fputs(outbuffer, stdout); /* don't use printf for chunks that could
				     contain '%' */

	/* check first char after closing delimiter */
	if (*(ptr_curr->eldata) == '<') {
	  /* full style */
	  handle_full_style(ptr_curr, &cit_sentinel);
	}
	else {
	  /* short style */
	  handle_short_style(ptr_curr, &cit_sentinel);
	}

	/* after the element is written, we have to print the data
	   until the next element starts or the remainder of the
	   line if this was the last element */
	if (ptr_curr->ptr_next) {
	  len = (ptr_curr->ptr_next)->chunkstart - ptr_curr->elendstart;
	  strncpy(outbuffer, ptr_curr->elendstart, len);
	  outbuffer[len] = '\0';
	  fputs(outbuffer, stdout);
	}
	else {
	  fputs(ptr_curr->elendstart, stdout);
	}
      }
      /* delete list of citations of the current line */
      delete_all_liliment(&sentinel);
    }
    else { /* line doesn't contain citation - pass through unaltered */
      fputs(result, stdout);
    }

    if (lb_len > LINE_SIZE) {
      lb_len = LINE_SIZE;
      new_linebuffer = (char*)realloc(linebuffer, lb_len);
      if (!new_linebuffer) {
	delete_all_lilicit(&cit_sentinel);
	free(linebuffer);
	if (uplinebuffer) { /* only used for SGML data, otherwise NULL */
	  free(uplinebuffer);
	}
	free(outbuffer);
      }
      else {
	linebuffer = new_linebuffer;
      }
    }

    if (uplinebuffer) { /* only used for SGML data, otherwise NULL */
      free(uplinebuffer);
      uplinebuffer = NULL;
    }

    linebuffer[0] = '\0';
  }

  /* delete global list of references */
  delete_all_lilicit(&cit_sentinel);

  /* free allocated memory */
  free(linebuffer);
  if (uplinebuffer) { /* only used for SGML data, otherwise NULL */
    free(uplinebuffer);
  }
  free(outbuffer);
  exit(0);
}

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  handle_full_style(): parses and creates output from a full-style
                       citation

  int handle_full_style returns 0 if ok, 1 if error

  Liliment *ptr_first ptr to structure containing the current citation
           data

  Lilicit *ptr_first_cit ptr to sentinel of global citation list

  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
int handle_full_style(Liliment *ptr_first, Lilicit *ptr_first_cit) {
  /* ToDo: look for multixref stuff and ignore */
  int citstylespec = 0; /* 0 is regular, 1 is authoronly, 2 is yearonly */
  int len;
  char *token;
  char *idstring;
  char *idend;
  char idbuffer[MAX_ATTR_LEN];
  char att_role[] = "\"MULTIXREF\"";
  char att_linkend[10];
  Lilicit cit_sentinel;
  Lilicit *ptr_curr_cit;

  cit_sentinel.ptr_next = NULL;
  cit_sentinel.ptr_last = &cit_sentinel;
  cit_sentinel.cit = NULL;

  if (!ptr_first || (!ptr_first->eldata && !*(ptr_first->eldata))) {
    return 0; /* nothing to do */
  }

  /* check whether SGML or XML data */
  switch (n_intype) {
  case 0:
    strcpy(att_linkend, "LINKEND");
    break;
  case 1:
    strcpy(att_linkend, "linkend");
    break;
  case 2:
    strcpy(att_linkend, "target");
    break;
  default:
    break;
  }

  /* parse data and create list of references */

  for (token = strtok(ptr_first->eldata, "<"); token; token = strtok(NULL, "<")) {
/*     fprintf(stderr, "token:%s<<\n", token); */
    if (!n_intype) {
      strup(token);
    }

    /* extract ID from token */
    if (strstr(token, att_role)) {
      /* ignore multixref stuff */
      continue;
    }

    idstring = strstr(token, att_linkend);
    if (!idstring) {
      /* should never happen */
      continue;
    }

    while (*idstring && *idstring != '=') {
      idstring++;
    }
    if (!*idstring) {
      /* attribute name without '=', should never happen */
      continue;
    }

    idstring = strstr(idstring, "ID");
    if (!idstring) {
      /* should never happen */
      continue;
    }

    idstring += 2;
    idend = strstr(idstring, "\""); /* quotation mark terminates attribute
				       value */
    if (!idend) {
      /* should never happen */
      continue;
    }
    else {
      /* use one char less to eliminate trailing type specifier */
      len = idend - idstring - 1;
      
      if (len < 5) {
	/* incomplete citation */
	continue;
      }

      /* skip the type suffix separator */
      if (*(idend-2) == typesep) {
	len--;
      }

      /* prevent buffer overflow */
      len = (len >= MAX_ATTR_LEN) ? MAX_ATTR_LEN-1 : len;
      
      /* keep copy of ID string */
      strncpy(idbuffer, idstring, len);
      idbuffer[len] = '\0'; /* terminate just in case */
    }

    /* check reference type */
    switch ((int)*(idend-1)) {
    case (int)'X':
      /* fall through */
    case (int)'S':
      citstylespec = 0;
      break;
    case (int)'A':
      /* fall through */
    case (int)'Q':
      citstylespec = 1;
      break;
    case (int)'Y':
      citstylespec = 2;
      break;
    default:
      /* should never happen */
      break;
    }

    /* try to find existing reference with the same ID string */
    if (get_lilicit_byname(ptr_first_cit, idbuffer)) {
      /* subsequent occurrence, add only to local reference list */
      if ((ptr_curr_cit = add_lilicit(&cit_sentinel, idbuffer)) == NULL) {
	return 1;
      }

      ptr_curr_cit->nis_subseq = SUBSEQ;
      ptr_curr_cit->n_citstylespec = citstylespec;
    }
    else {
      /* first occurrence, first add to global reference list... */
      if (!add_lilicit(ptr_first_cit, idbuffer)) {
	return 1;
      }

      /* ...then add to local reference list */
      if ((ptr_curr_cit = add_lilicit(&cit_sentinel, idbuffer)) == NULL) {
	return 1;
      }

      ptr_curr_cit->nis_subseq = FIRST;
      ptr_curr_cit->n_citstylespec = citstylespec;
    }
  } /* end for */
  
  /* write requested output full/short. Start with MULTIXREF stuff
     if applicable */
  write_multixref(&cit_sentinel, citstylespec, &n_multicit_count);

  ptr_curr_cit = &cit_sentinel;

  /* loop over all references in the current citation */
  while ((ptr_curr_cit = get_next_lilicit(ptr_curr_cit)) != NULL) {
    if (n_create_full) {
      write_full(ptr_curr_cit->cit, ptr_curr_cit->nis_subseq, ptr_curr_cit->n_citstylespec);
    }
    else {
      write_short(ptr_curr_cit->cit, ptr_curr_cit->n_citstylespec);
    }
  } /* end while */

  /* delete local list of references of the current citation */
  delete_all_lilicit(&cit_sentinel);

  return 0;
}

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  handle_short_style(): parses and creates output from a short-style
                       citation

  int handle_short_style returns 0 if ok, 1 if error

  Liliment *ptr_first ptr to structure containing the current citation
           data

  Lilicit *ptr_first_cit ptr to sentinel of global citation list

  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
int handle_short_style(Liliment *ptr_first, Lilicit *ptr_first_cit) {
  int citstylespec = 0; /* 0 is regular, 1 is authoronly, 2 is yearonly */
  char *mydata;
  char *token;
  Lilicit cit_sentinel;
  Lilicit *ptr_curr_cit;

  cit_sentinel.ptr_next = NULL;
  cit_sentinel.ptr_last = &cit_sentinel;
  cit_sentinel.cit = NULL;

  if (!ptr_first->eldata && !*(ptr_first->eldata)) {
    return 0; /* nothing to do */
  }

  /* first look for citation style specifier */
  if (!strncmp(ptr_first->eldata, "A:", 2)) {
    /* authoronly */
    citstylespec = 1;
  }
  else if (!strncmp(ptr_first->eldata, "Y:", 2)) {
    /* yearonly */
    citstylespec = 2;
  }

  /* parse remaining data and create list of references */
  mydata = (citstylespec) ? ptr_first->eldata+2 : ptr_first->eldata;

  for (token = strtok(mydata, ";"); token; token = strtok(NULL, ";")) {
    if (get_lilicit_byname(ptr_first_cit, token)) {
      /* subsequent occurrence, add reference to local list only */
      if ((ptr_curr_cit = add_lilicit(&cit_sentinel, token)) == NULL) {
	return 1;
      }

      ptr_curr_cit->nis_subseq = SUBSEQ;
      ptr_curr_cit->n_citstylespec = citstylespec;
    }
    else {
      /* first occurrence, first add reference to global list ... */
      if (!add_lilicit(ptr_first_cit, token)) {
	return 1;
      }

      /* ... then to local reference list */
      if ((ptr_curr_cit = add_lilicit(&cit_sentinel, token)) == NULL) {
	return 1;
      }

      ptr_curr_cit->nis_subseq = FIRST;
      ptr_curr_cit->n_citstylespec = citstylespec;
    }
  } /* end for */
  
  /* write requested output full/short. Start with MULTIXREF stuff
     if applicable */
  write_multixref(&cit_sentinel, citstylespec, &n_multicit_count);

  ptr_curr_cit = &cit_sentinel;

  while ((ptr_curr_cit = get_next_lilicit(ptr_curr_cit)) != NULL) {
    if (n_create_full) {
      if (write_full(ptr_curr_cit->cit, ptr_curr_cit->nis_subseq, ptr_curr_cit->n_citstylespec)) {
	delete_all_lilicit(&cit_sentinel);
	return 1;
      }
    }
    else {
      write_short(ptr_curr_cit->cit, ptr_curr_cit->n_citstylespec);
    }
  } /* end while */

  /* delete local list of references of the current citation */
  delete_all_lilicit(&cit_sentinel);

  return 0;
}

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  write_full(): writes a full-style citation 

  int write_full returns 0 if ok, 1 on error

  const char* citation ptr to citation string

  int nis_subseq 1 if citation is a subsequent one, 0 if first

  int n_citestylespec 0 = regular citation, 1 = authoronly, 2 = yearonly

  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
int write_full(const char* citation, int nis_subseq, int n_citstylespec) {
  char modifier[3] = {'-', '\0', '\0'};

  switch (n_citstylespec) {
  case 0:
    modifier[1] = (nis_subseq) ? 'S' : 'X';
    break;
  case 1:
    modifier[1] = (nis_subseq) ? 'Q' : 'A';
    break;
  case 2:
    modifier[1] = 'Y';
    break;
  default:
    break;
  }

  switch (n_intype) {
  case 0: /* db31 */
    printf("<xref linkend=\"ID%s%s\">", citation, modifier);
    break;
  case 1: /* db31x */
    printf("<xref linkend=\"ID%s%s\"/>", citation, modifier);
    break;
  case 2: /* teix */
    printf("<ptr targOrder=\"U\" target=\"ID%s%s\" TEIform=\"ptr\"/>", citation, modifier);
    break;
  default:
    break;
  }

  return 0;
}

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  write_short(): writes a short-style citation 

  int write_short returns 0

  const char* citation ptr to citation string

  int n_citestylespec 0 = regular citation, 1 = authoronly, 2 = yearonly

  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
int write_short(const char* citation, int n_citstylespec) {
  printf("%s;", citation);
  return 0;
}

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  write_multixref(): writes multixref stuff

  int write_multixref returns 0

  Lilicit* ptr_cit_sentinel ptr to start of linked list with references

  int n_citestylespec 0 = regular citation, 1 = authoronly, 2 = yearonly

  int* ptr_multicit_count ptr to counter for multiple citations; will be
       incremented accordingly

  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
int write_multixref(Lilicit* ptr_cit_sentinel, int n_citestylespec, int* ptr_multicit_count) {
  if (n_create_full && count_lilicit(ptr_cit_sentinel)) {
    switch (n_intype) {
    case 0: /* db31 */
      printf("<xref endterm=\"IM%d\" linkend=\"ID%s\" role=\"MULTIXREF\">", *ptr_multicit_count, (ptr_cit_sentinel->ptr_next)->cit);
      break;
    case 1: /* db31x */
      printf("<xref endterm=\"IM%d\" linkend=\"ID%s\" role=\"MULTIXREF\"/>", *ptr_multicit_count, (ptr_cit_sentinel->ptr_next)->cit);
      break;
    case 2: /* teix */
      printf("<ptr type=\"MULTIXREF\" targOrder=\"U\" target=\"IM%d\" TEIform=\"ptr\"/>", *ptr_multicit_count);
      break;
    default:
      break;
    }

    (*ptr_multicit_count)++;
  }
  else if (!n_create_full) {
    switch (n_citestylespec) {
    case 0:
      /* no modifier necessary */
      break;
    case 1:
      printf("A:");
      break;
    case 2:
      printf("Y:");
      break;
    default:
      break;
    }
  }
  return 0;
}

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  fget_line(): reads a full line until a newline is encountered,
               basically a lame workaround for the fixed buffer size
	       problem of fgets, but in addition this fn makes sure
	       that we read each citation element completely even if
	       it is spread over several \n-terminated lines.

  char* fget_line returns ptr to complete string or NULL if some error
               occurs

  char** linebuffer address of ptr to allocated string to contain
               the full line. **linebuffer must hold at least LINE_SIZE
	       chars initially and may grow as needed.

  size_t* lb_size ptr to size of *linebuffer

  const char* start_tag ptr to string containing start tag of citation
               element

  const char* end_tag ptr to string containing end tag of citation
               element

  FILE* infp ptr to file descriptor of data source

  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
char* fget_line(char** linebuffer, size_t* lb_size, const char* start_tag, const char* end_tag, FILE* infp) {
  char *result;
  char *my_linebuffer;
  char *new_linebuffer;
  char *start_cit;
  char *end_cit;
  char *my_cit = NULL;
  size_t len;
  size_t total_len = 0;
  int n_have_cit = 0;

  my_linebuffer = *linebuffer;

  /* loop until we've got one logical line or no more data */
  while (1) {
    if ((result = fgets(my_linebuffer, LINE_SIZE, infp)) == NULL) {
      /* assume no more data available (EOF) */
      if (**linebuffer) { /* if we have some data, return what we have */
	break; 
      }
      else { /* otherwise return NULL */
	return NULL;
      }
    }

    /* check whether last char is a newline, i.e. whether we've got
       one logical line */
    len = strlen(result);
    total_len += len;
    if (result[len-1] == '\n') {
      if (!n_have_cit) {
	/* now we need to check whether the current line contains
	   the start of an unfinished citation element. If so, we
	   need to loop until we find the matching end tag */
	start_cit = strrchr(result, (int)'<');
	if (start_cit) {
	  if (!n_intype) {
	    my_cit = strdup(start_cit);
	    if (!my_cit) {
	      fprintf(stderr, "out of memory");
	      return NULL;
	    }
	    strup(my_cit);
	  }
	  else {
	    my_cit = start_cit;
	  }

	  if ((start_cit = strstr(my_cit, start_tag)) == NULL) {
	    if (!n_intype && my_cit) {
	      free(my_cit);
	    }
	    break;
	  }
	  else {
	    if (!n_intype && my_cit) {
	      free(my_cit);
	    }
	    n_have_cit = 1;
	  }
	  my_cit = NULL;
	}
      }
      else {
	/* a previous line contained a citation start tag. Now we
	   need to keep on reading until we find a matching end tag */
	if (!n_intype) {
	  my_cit = strdup(result);
	  if (!my_cit) {
	    fprintf(stderr, "out of memory");
	    return NULL;
	  }
	  strup(my_cit);
	}
	else {
	  my_cit = result;
	}

	end_cit = strstr(my_cit, end_tag);
	if (end_cit) {
	  n_have_cit = 0;
	  /* to make matters worse, some nasty author could have
	     started another citation element on the very same line.
	     So we need to check again whether we really can safely
	     stop reading here */
	  start_cit = strrchr(my_cit, (int)'<');
	  if (start_cit) {
	    if ((start_cit = strstr(start_cit, start_tag)) == NULL) {
	      if (!n_intype && my_cit) {
		free(my_cit);
	      }
	      break; /* no new start tag, line is finished */
	    }
	    else {
	      n_have_cit = 1; /* new open start tag, need to read more */
	    }
	  }
	  /* else: no '<', should not happen as we have at least
	     the end tag*/
	}
	/* else: keep on reading, no citation end tag yet */

	if (!n_intype && my_cit) {
	  free(my_cit);
	}
      }
    }
    
    /* increase size of buffer */
    *lb_size += LINE_SIZE;
    if ((new_linebuffer = (char*)realloc(*linebuffer, *lb_size)) == NULL) {
      return NULL;
    }
    else {
      *linebuffer = new_linebuffer;
      my_linebuffer = *linebuffer + total_len; /* add next chunk to the end
						  of the existing data */
    }
  }

  return *linebuffer;
}

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  find_start(): finds the start of a RefDB citation element in a string

  char* find_start returns ptr to the first citation element or NULL
                if some error occurs

  char* scan_start ptr to string to scan

  char* start_tag ptr to string containing the start tag search string

  char* attrname_refdb ptr to string containing the attribute name
                used to check for a RefDB citation

  char* attrvalue_refdb ptr to string containing the attribute value
                used to check for a RefDB citation

  char** cit_startend address of ptr to the first char after the
                end of the start tag. Will be updated with the proper
		value or will contain NULL if the start tag is
		incomplete

  int n_intype the type of input

  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
char* find_start(char* scan_start, char* start_tag, char* attrname_refdb, char* attrvalue_refdb, char** cit_startend, int n_intype) {
  char *cit_start;
  char *end_starttag;
  char *attr_start;

  cit_start = scan_start;

  while ((cit_start = strstr(cit_start, start_tag)) != NULL) {
    end_starttag = strchr(cit_start, (int)'>');
    if (end_starttag) {
      *end_starttag = '\0'; /* terminate string temporarily */
    }
    else {
      /* incomplete start tag. This is allowed by SGML, but can't
	 be handled properly here -> bail out. However, as fget_line
	 keeps on reading until it gets the matching end tag, this
	 should never happen */
      break;
    }
    
    /* try to locate the attribute with the name given in attrname_refdb */
    if ((attr_start = strstr(cit_start, attrname_refdb)) != NULL) {
      if ((attr_start = strchr(attr_start, (int)'=')) != NULL) {
	/* skip whitespace */
	attr_start++;
	while (*attr_start == ' ') {
	  attr_start++;
	}
	if (!strncmp(attr_start, attrvalue_refdb, strlen(attrvalue_refdb))) {
	  *end_starttag = '>'; /* restore original string */
	  *cit_startend = end_starttag+1;
	  return cit_start;
	}
      }
      /* else: missing '=', should not happen */
    }
    /* else: no refdb citation */

    if (end_starttag) {
      *end_starttag = '>'; /* restore original string */
      cit_start = end_starttag;
    }
    else {
      break;
    }
  }
  
  /* nothing suitable found */
  *cit_startend = NULL;
  return NULL;
}

/* 
   Liliment defines a linked list for string chunks, each containing
   up to one citation element. New members are added to the end of the
   list, so the first member after the sentinel is the first member
   that was added. For this to work the sentinel has to keep a ptr to
   the next member and a ptr to the last member.
*/

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  add_liliment(): adds a new member to the Liliment linked list

  Liliment* ptr_first ptr to the sentinel of the linked list

  const char* el_startstart ptr to the start of the start tag
              of the citation element in the current chunk

  const char* el_startend ptr to the first char after the end of the
              start tag of the citation element in the current
	      chunk

  const char* chunkstart ptr to the start of the current chunk

  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
Liliment* add_liliment(Liliment* ptr_first, const char* el_startstart, const char* el_startend, const char* chunkstart) {
  Liliment* ptr_new;

  /* allocate memory for a new list member */
  ptr_new = malloc(sizeof(Liliment));

  if (!ptr_new) {
    return NULL; /* out of memory */
  }

  /* set provided values */
  ptr_new->chunkstart = (char*)chunkstart;
  ptr_new->elstartstart = (char*)el_startstart;
  ptr_new->elstartend = (char*)el_startend;

  /* set initial values, will be changed by the app later */
  ptr_new->eldata = NULL;
  ptr_new->ptr_next = NULL;

  /* link new member into the list */
  (ptr_first->ptr_last)->ptr_next = ptr_new;
  ptr_first->ptr_last = ptr_new;

  return ptr_new;
}

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  get_next_liliment(): retrieves the next member from the Liliment
                       linked list

  Liliment* ptr_first ptr to the sentinel of the linked list (first
              call in a loop) or ptr to the previous element (subsequent
	      calls in a loop)

  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
Liliment* get_next_liliment(Liliment* ptr_first) {
  return ptr_first->ptr_next;
}

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  delete_all_liliment(): deletes all list members and deallocates
                         memory

  Liliment* ptr_first ptr to the sentinel of the linked list

  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
int delete_all_liliment(Liliment *ptr_first) {
  Liliment* ptr_curr;
  Liliment* ptr_next;

  /* skip sentinel */
  ptr_curr = ptr_first->ptr_next;

  /* loop over all list members */
  while (ptr_curr) {
    if (ptr_curr->eldata) {
      free(ptr_curr->eldata);
    }
    ptr_next = ptr_curr->ptr_next;
    free(ptr_curr);
    ptr_curr = ptr_next;
  }

  /* reset sentinel */
  ptr_first->ptr_next = NULL;
  ptr_first->ptr_last = ptr_first;
  return 0;
}

/*
  Lilicit defines a linked list for reference strings. New members are
  added to the end of the list, so the first member after the sentinel
  is the first member that was added to the list.
*/

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  add_lilicit(): adds a new member to the Lilicit linked list

  Lilicit* ptr_first ptr to the sentinel of the linked list

  char* cit ptr to string containing the reference

  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
Lilicit* add_lilicit(Lilicit *ptr_first, char* cit) {
  Lilicit* ptr_new;

  ptr_new = malloc(sizeof(Lilicit));

  if (!ptr_new) {
    return NULL;
  }

  ptr_new->cit = strdup(cit);
  ptr_new->ptr_next = NULL;

  (ptr_first->ptr_last)->ptr_next = ptr_new;
  ptr_first->ptr_last = ptr_new;

  return ptr_new;
}

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  get_next_lilicit(): retrieves the next member from the Lilicit
                       linked list

  Lilicit* ptr_first ptr to the sentinel of the linked list (first
              call in a loop) or ptr to the previous element (subsequent
	      calls in a loop)

  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
Lilicit* get_next_lilicit(Lilicit* ptr_first) {
  return ptr_first->ptr_next;
}

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  get_lilicit_byname(): retrieves the member from the Lilicit
                       linked list with the given reference string

  Lilicit* ptr_first ptr to the sentinel of the linked list (first
              call in a loop) or ptr to the previous element (subsequent
	      calls in a loop)

  const char* name ptr to string with reference string to match

  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
Lilicit* get_lilicit_byname(Lilicit* ptr_first, const char* name) {
  Lilicit* ptr_curr;

  ptr_curr = ptr_first->ptr_next;

  while (ptr_curr) {
    if (ptr_curr->cit) {
      if (!strcmp(ptr_curr->cit, name)) {
	return ptr_curr;
      }
    }
    ptr_curr = ptr_curr->ptr_next;
  }
  return NULL;
}

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  count_lilicit(): counts the members of the Lilicit linked list
  
  int count_lilicit returns the number of members minus one. This is
                   the easiest way to distinguish between single (0)
		   and multiple (!=0) references

  Lilicit* ptr_first ptr to the sentinel of the linked list

  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
int count_lilicit(Lilicit* ptr_first) {
  int i = 0;
  Lilicit *ptr_curr;

  ptr_curr = ptr_first;

  while ((ptr_curr = get_next_lilicit(ptr_curr))) {
    i++;
  }

  if (i) {
    i--;
  }

  return i;
}

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  delete_all_lilicit(): deletes all list members and deallocates
                         memory

  Lilicit* ptr_first ptr to the sentinel of the linked list

  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
int delete_all_lilicit(Lilicit *ptr_first) {
  Lilicit* ptr_curr;
  Lilicit* ptr_next;

  ptr_curr = ptr_first->ptr_next;

  while (ptr_curr) {
    if (ptr_curr->cit) {
      free(ptr_curr->cit);
    }
    ptr_next = ptr_curr->ptr_next;
    free(ptr_curr);
    ptr_curr = ptr_next;
  }
  ptr_first->ptr_next = NULL;
  ptr_first->ptr_last = ptr_first;
  return 0;
}

