/* -*-Mode: C++;-*-
 * $Id: alloc.cc 1.4 Wed, 16 May 2001 03:33:56 +0400 jmacd $
 *
 * Copyright (C) 1999, 2000, Joshua P. MacDonald <jmacd@CS.Berkeley.EDU>
 * and The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *    Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 *    Redistributions in binary form must reproduce the above
 *    copyright notice, this list of conditions and the following
 *    disclaimer in the documentation and/or other materials provided
 *    with the distribution.
 *
 *    Neither name of The University of California nor the names of
 *    its contributors may be used to endorse or promote products
 *    derived from this software without specific prior written
 *    permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "xdfs_cpp.h"

#include <unistd.h>
#include <errno.h>

int
DXN::allocate_major (const SAREA &area,
		     XNUM        &next,
		     int          flagsX)
{
    int ret;
    int maskflag = (flagsX & DBFS_ALLOC_MASK);

    if (maskflag == DBFS_ALLOC_MASK) {
	DBFS_ERROR ("DBFS_ALLOC flags are exclusive");
	return EINVAL;
    } else if (maskflag == 0) {
	maskflag = DBFS_ALLOC_DEFAULT;
    }

    SAREA_DESC    &desc = const_cast<SAREA_DESC&> (area.ref ());
    XSEQNO         cseq;
    RECDBT<XSEQNO> cseq_dbt (cseq);
    NULLDBT        null;
    CKEY           ckey (sizeof (XSEQNO));
    bool           search = true;

  again:

    if (maskflag == DBFS_ALLOC_RANDOM) {

	cseq = XSEQNO (random ());

    } else if (search) {

	search = false;

	if ((ret = desc.locc.move_pos (ckey, null, DB_LAST, DBFS_RMW)) && (ret != DBFS_NOTFOUND)) {
	    PROP_ERROR (ret) ("move_pos");
	    return ret;
	}

	if (ret == DBFS_NOTFOUND) {
	    cseq = XSEQNO (0);
	} else {
	    ckey.template get_rec_prefix<XSEQNO> (cseq);
	}
    }

    // cseq is set to the last valid major key, pick the next
    // rounded-off interval: use big endian for computation.
    // @@ Would be much better to use endian-inspecific data
    // format altogether.  The issue only exists when
    // arithmetic (comparisons) are done...  See also the
    // allocation of short cseqs.
    XSEQNO be (XNTOHL (cseq));

    be += DBFS_CONT_ID_INTERVAL;
    be &= ~DBFS_CONT_ID_MASK;

    cseq = XHTONL (be);

    next = XNUM (desc.area_id, cseq);

    // Check whether it was allocated in this transaction already
    MAJOR_MAP::iterator exist = _txn._major_map.find (next.key ());

    if (exist != _txn._major_map.end ()) {
	DEBUG_ALLOC (area) ("allocate major node allocated: ") (cseq);
	goto again;
    }

    if ((ret = desc.locc.get_or_notfound (cseq_dbt, null, DBFS_RMW)) && (ret != DBFS_NOTFOUND)) {
	PROP_ERROR (ret) ("get_or_notfound");
	return ret;
    }

    // Break when DBFS_NOTFOUND
    if (ret == 0) {
	g_assert (maskflag == DBFS_ALLOC_RANDOM);
	DEBUG_ALLOC (area) ("allocate major node exists: ") (cseq);
	goto again;
    }

    DEBUG_ALLOC (area) ("allocate major node ID ") (cseq);

    return 0;
}

int
DXN::create_short (const NODEC   &node,
		   const guint8  *buf,
		   const XSIZ    &length,
		   int            flags)
{
    int            ret;
    XSEQNO         cseq = node.majorc ().number ().cont_seq ();
    RECDBT<XSEQNO> key_dbt (cseq);
    NULLDBT        null;

    if ((ret = node.pre_overwrite_now (flags))) {
	return ret;
    }

    g_assert (length.key () <= DBFS_FS_SHORT_THRESH);

    if (length.key () > 0) {
	// This scan looks for a free XSEQNO in the container DB
	// immediately above NODE.
	do {

	    XSEQNO  n (XNTOHL (cseq));

	    n += 1;

	    if ((n.key () & DBFS_CONT_ID_MASK) == 0) {
		// @@ Check this sometime: need to catch
		// over-allocation of shorts, i.e., this algorithm
		// breaks down if the DBFS_CONT_ID_INTERVAL runs out
		// of space
		n += 1;
	    }

	    cseq = XHTONL (n);

	    // Pass DB_RMW since we're likely to write-lock the first page touched
	    if ((ret = node.locc ().get_or_notfound (key_dbt, null, DBFS_RMW)) && (ret != DBFS_NOTFOUND)) {
		PROP_ERROR (ret) ("get_or_notfound");
		return ret;
	    }
	    // Break when ret == DBFS_NOTFOUND
	} while (ret == 0);

	// Found a slot
	if ((ret = node.locc ().put_overwrite (key_dbt, BYTEDBT (buf, length.key ())))) {
	    PROP_ERROR (ret) ("put_overwrite");
	    return ret;
	}
    }

    MINOR_DESC &desc = const_cast<MINOR_DESC&> (node.ref ());

    desc.rec.type   = XTYPE_SHORTSEG;
    desc.rec.length = length;

    if (length.key () > 0) {
	desc.rec.cont = XNUM (node.sarea ().area_id (), cseq);
    } else {
	desc.rec.cont = XNUM (XAREA (0), XSEQNO (0));
    }

    desc.set_dirty ();

    return 0;
}

int
DXN::allocate_fid (XFID &fid)
{
    int     ret;
    XSEQNO  seqno;

  again:

    if ((ret = dbfs ()._alloc_fids.template consume_T<XFID> (_txn, seqno, fid))) {

	if (ret != DBFS_NOTFOUND) {
	    PROP_ERROR (ret) ("alloc_fids_consume");
	    return ret;
	}

	XFID fid_base;

	if ((ret = dbfs ().create_base (fid_base))) {
	    PROP_ERROR (ret) ("create_base");
	    return ret;
	}

	DEBUG_FREELIST ("allocate_fid underflow");

	for (uint i = 0; i < DBFS_FID_BASE_ALLOC; i += 1) {

	    XSEQNO   seqno;
	    XFID     alloc_fid (fid_base + i);

	    if ((ret = dbfs ()._alloc_fids.template append_T<XFID> (_txn, seqno, alloc_fid))) {
		PROP_ERROR (ret) ("alloc_fids_append");
		return ret;
	    }

	    DEBUG_FREELIST ("allocate_fid append fid ") () << alloc_fid << " [" << seqno << "]";
	}

	goto again;
    }

    DEBUG_FREELIST ("allocate_fid ") () << fid << " [" << seqno << "]";

    _txn._dealloc.push_front (new TXN_DEALLOC (fid));

    return 0;
}

int
DXN::deallocate_fid (const XFID  &fid,
		     const XTYPE &type)
{
    int      ret;
    XSEQNO   seqno;
    XFREEELT free_elt (fid, type);

    if ((ret = dbfs ()._clean_fids.template append_T<XFREEELT> (_txn, seqno, free_elt))) {
	PROP_ERROR (ret) ("clean_fids_append");
	return ret;
    }

    DEBUG_FREELIST ("deallocate fid ") () << fid;

    return 0;
}

void
TXN::post_abort ()
{
    int ret;
    TXN_DEALLOC *deal;

    DEBUG_SHARED ("post_abort ") (*this);

    while (! _dealloc.empty ()) {
	deal = & _dealloc.pop_front ();

	ret = dbfs ()._shared_fds.remove (deal->_fid);

	if (ret == 0) {
	    // Nothing
	} else if (ret != DBFS_NOTFOUND) {
	    PROP_ERROR (ret) ("shared_fds_remove");
	} else {

	    ret = dbfs ()._shared_dbs.remove (deal->_fid);

	    if (ret == 0) {
		// Nothing
	    } else if (ret != DBFS_NOTFOUND) {
		PROP_ERROR (ret) ("shared_dbs_remove");
	    }
	}

	delete deal;
    }
}

int
TXN::post_commit ()
{
    int      ret;
    XSEQNO   alloc_seqno;
    XSEQNO   clean_seqno;
    XFREEELT clean_elt;
    DBCREF   clean_curs;
    STXN     stxn;

    // These are only used in post_abort
    TXN_DEALLOC *deal;
    while (! _dealloc.empty ()) {
	deal = & _dealloc.pop_front ();
	delete deal;
    }

    if ((ret = stxn.begin (dbfs (), DBFS_TXN_NOSYNC_INTERNAL))) {
	PROP_ERROR (ret) ("post_commit_stxn");
	return ret;
    }

    if ((ret = dbfs ()._clean_fids.cursor (stxn, clean_curs))) {
	PROP_ERROR (ret) ("clean_fids_cursor");
	return ret;
    }

    while ((ret = clean_curs.template consume_T<XFREEELT> (clean_seqno, clean_elt)) == 0) {

	DEBUG_FREELIST ("clean_fids consume [") () << clean_seqno << " " << clean_elt.fid
						   << " " << dbfs_xtype_to_string (clean_elt.type) << "]";

	switch ((FileType) clean_elt.type.key ()) {
	case FT_NotPresent:
	    break;

	case FT_DirHash:
	case FT_DirBtree:
	case FT_DirSeq: {

	    string  name;
	    Db     *dbp = new Db (dbfs ()._env, 0);

	    dbfs ().relative_fname (clean_elt.fid, name);

	    DEBUG_FREELIST ("clean_fids unlink db ") () << name;

	    if ((ret = dbp->remove (name.c_str (), NULL, 0))) {
		DB_ERROR (ret) ("db_remove");
		return ret;
	    }

	    break;
	}

	case FT_LongSeg: {

	    string  name;

	    dbfs ().absolute_fname (clean_elt.fid, name);

	    DEBUG_FREELIST ("clean_fids unlink fd ") () << name;

	    if (unlink (name.c_str ()) && errno != ENOENT) {
		ret = errno;
		SYS_ERROR (ret) ("fs_unlink");
		return ret;
	    }

	    break;
	}

	default:
	    g_assert_not_reached ();
	    break;
	}

	if ((ret = dbfs ()._alloc_fids.template append_T<XFID> (stxn, alloc_seqno, clean_elt.fid))) {
	    PROP_ERROR (ret) ("alloc_fids_append");
	    return ret;
	}

	DEBUG_FREELIST ("alloc append [") () << alloc_seqno << " " << clean_elt.fid << "]";
    }

    if (ret != DBFS_NOTFOUND) {
	PROP_ERROR (ret) ("clean_fids_consume");
	return ret;
    }

    if ((ret = clean_curs.close ())) {
	PROP_ERROR (ret) ("clean_curs_close");
	return ret;
    }

    if ((ret = stxn.commit ())) {
	PROP_ERROR (ret) ("stxn_commit");
	return ret;
    }

    return 0;
}
