/*
 *   (C) Copyright IBM Corp. 2001, 2005
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * Module: mdregmgr
 * File: raid5_mgr.c
 *
 * Description: This file contains all of the required engine-plugin APIs
 *              for the Raid5 MD region manager.
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <plugin.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <malloc.h>

#include "md.h"
#include "xor.h"
#include "raid5_mgr.h"

#define my_plugin_record raid5_plugin

/* Global variables */

kill_sectors_t * kill_sector_list_head = NULL;

static list_anchor_t raid5_expand_shrink_list = NULL;
static list_anchor_t raid5_delay_kill_sector_list = NULL;

/* Forward references */

static int raid5_volume_read (
	md_volume_t *volume,
	lsn_t lsn,
	sector_count_t count,
	void *buffer);

static int raid5_volume_write (
	md_volume_t *volume,
	lsn_t lsn,
	sector_count_t count,
	void *buffer);

static int raid5_write (
	storage_object_t * region,
	lsn_t lsn,
	sector_count_t count,
	void * buffer );


static void raid5_free_private_data(md_volume_t * volume)
{
	raid5_conf_t * conf = (raid5_conf_t *)volume->private_data;

	LOG_ENTRY();

	if (!conf) {
		LOG_WARNING("Nothing to free!!!.\n");
		LOG_EXIT_VOID();
		return;
	}

	if (conf->disks) {
		EngFncs->engine_free(conf->disks);
		conf->disks = NULL;
	}
	if (conf->stripe.chunks) {
		EngFncs->engine_free(conf->stripe.chunks);
		conf->stripe.chunks = NULL;
	}

	EngFncs->engine_free(volume->private_data);
	volume->private_data = NULL;
	LOG_EXIT_VOID();
}

static int create_raid5_conf (md_volume_t * vol)
{
	raid5_conf_t * conf = mdvol_to_conf(vol);
	disk_info_t * conf_disk;
	int disk_index;
	md_member_t *member;
	storage_object_t *obj;
	list_element_t iter;
	md_super_info_t info;
	mdu_disk_info_t d;
	int i, count, length = 0;
	int rc = 0;

	LOG_ENTRY();

	if (!conf) {
		LOG_MD_BUG();
		LOG_EXIT_INT(EFAULT);
		return EFAULT;
	}

	md_volume_get_super_info(vol, &info);
	conf->mddev = vol;
	conf->failed_disk_index = -1;

	LOG_DEBUG("%s: info.raid_disks: %d, info.nr_disks: %d.\n", vol->name, info.raid_disks, info.nr_disks);
	conf->stripe.nr_disks = (info.raid_disks > info.nr_disks) ? info.raid_disks : info.nr_disks;
	conf->disks = EngFncs->engine_alloc(sizeof(disk_info_t) * conf->stripe.nr_disks);
	conf->stripe.chunks = EngFncs->engine_alloc(sizeof(chunk_t) * conf->stripe.nr_disks);

	/* Set the volume to which the stripe belongs. */
	conf->stripe.volume = vol;

	if (!conf->disks || !conf->stripe.chunks) {
		rc = ENOMEM;
		goto out;
	}

	LIST_FOR_EACH(vol->members, iter, member) {
		obj = member->obj;
		if (obj && !(member->flags & MD_MEMBER_STALE)) {

			vol->sb_func->get_sb_disk_info(member, &d);

			disk_index = d.raid_disk;

			/*
			 * During discovery, raid_disk is set to -1 for spare devices.
			 * Therefore, we can not use -1, must use i instead.
			 */
			if (disk_index == -1) {
				disk_index = d.number;
			}

			LOG_DEBUG("RAID5 Configuring object(%s) raid_disk(%d).\n",
				  obj->name, disk_index);

			conf_disk = &conf->disks[disk_index];

			if (d.state & (1 << MD_DISK_FAULTY)) {
				LOG_WARNING("Disabled device %s (errors detected)\n",
					    obj->name);
				conf_disk->number = d.number;
				conf_disk->raid_disk = disk_index;
				conf_disk->dev = member;

				conf_disk->operational = 0;
				conf_disk->write_only = 0;
				conf_disk->spare = 0;
				conf_disk->used_slot = 1;

				/*
				 * Save the failed disk index if the disk is
				 * part of the RAID and we don't already have
				 * a failed disk.
				 */
				if (disk_index < info.raid_disks) {
					if (conf->failed_disk_index < 0) {
						conf->failed_disk_index = disk_index;
					}
				}
				conf->failed_disks++;
				continue;
			}

			if (d.state & (1 << MD_DISK_ACTIVE)) {
				if (!(d.state & (1 << MD_DISK_SYNC))) {
					LOG_WARNING("Disabled device %s (not in sync)\n",
						    obj->name);
					continue;
				}
				if (disk_index > info.raid_disks) {
					LOG_WARNING("Disabled device %s (inconsistent descriptor)\n",
						    obj->name);
					continue;
				}
				if (conf_disk->operational) {
					LOG_WARNING("Disabled device %s (device %d already operational)\n",
						    obj->name, disk_index);
					continue;
				}
				if (disk_index >= info.raid_disks) {
					LOG_WARNING("Active device %s is outside of the array\n",
						    obj->name);
					continue;
				}
				LOG_DEBUG("Device %s operational as raid disk %d.\n",
					  obj->name, disk_index);

				conf_disk->number = d.number;
				conf_disk->raid_disk = disk_index;
				conf_disk->dev = member;
				conf_disk->operational = 1;
				conf_disk->used_slot = 1;

				conf->active_disks++;
			} else {
				/*
				 * Must be a spare disk.
				 */
				LOG_DEBUG("spare disk %s\n", obj->name);
				conf_disk->number = d.number;
				conf_disk->raid_disk = disk_index;
				conf_disk->dev = member;

				conf_disk->operational = 0;
				conf_disk->write_only = 0;
				conf_disk->spare = 1;
				conf_disk->used_slot = 1;

				if (conf->spare.used_slot == 0) {
					conf->spare = *conf_disk;
				}
				conf->spare_disks++;
			}
		}
	}

	conf->chunksize    = info.chunksize;
	conf->size         = info.size;
	conf->level        = info.level;
	conf->algorithm    = info.layout;
	conf->raid_disks   = info.raid_disks;;
	conf->failed_raid_disks = conf->raid_disks - conf->active_disks;

	if (conf->failed_raid_disks != 0) {
		if (conf->failed_raid_disks == 1) {
			vol->flags |= MD_DEGRADED;

			if (conf->failed_disk_index < 0) {

				/* Find the missing disk. */
				for (i = 0; (i <conf->raid_disks) && (conf->failed_disk_index < 0); i++) {
					if (conf->disks[i].used_slot == 0) {
						conf->failed_disk_index = i;
					}
				}

				if (conf->failed_disk_index >= 0) {
					/*
					 * Find the entry for the failed disk in
					 * the super block so that we can fill
					 * in as much information as possible
					 * about the missing disk.
					 */
					for (i = 0; i < MAX_DISKS(vol); i++) {
						d.number = i;
						vol->sb_func->get_sb_disk_info_for_index(vol->sb, &d);

						if (d.raid_disk == conf->failed_disk_index) {
							conf_disk = &conf->disks[conf->failed_disk_index];
							conf_disk->number = d.number;
							conf_disk->raid_disk = conf->failed_disk_index;
							conf_disk->dev = NULL;

							conf_disk->operational = 0;
							conf_disk->write_only = 0;
							conf_disk->spare = 0;
							conf_disk->used_slot = 1;
							break;
						}
					}
				} else {
					/*
					 * One of the required RAID disks is not
					 * active, yet we could not find the
					 * failed disk and thus run in degrade
					 * mode.  Mark the volume corrupt.
					 */
					vol->flags &= ~MD_DEGRADED;
					vol->flags |= MD_CORRUPT;
					LOG_CRITICAL("%s is corrupt (%s:%d)\n", vol->name, __FILE__, __LINE__);
					length = sprintf(message_buffer,
							 _("RAID5 region %s is corrupt."
							   "  The number of raid disks for a full functional array is %d."
							   "  The number of active disks is %d."
							   "  However, the missing disk entry can not be determined." ),
							 vol->name, conf->raid_disks, conf->active_disks);
					goto queue_corrupt_message;

				}
			}

			if (vol->flags & MD_DEGRADED) {

				sprintf(message_buffer,
					_("RAID%d array %s is missing the member %s with RAID index %d.  "
					  "The array is running in degrade mode."),
					conf->level, vol->name,
					(conf->disks[conf->failed_disk_index].dev != NULL) ? conf->disks[conf->failed_disk_index].dev->obj->name : "",
					conf->failed_disk_index);
				
				if (vol->region && md_is_recovery_running(vol->region)) {
					strcat(message_buffer,
					       _("  The MD recovery process is running, please wait..."));
				}
				MESSAGE("%s\n", message_buffer);
			}

		} else {
			/* Too many failed disks in the RAID. */
			vol->flags |= MD_CORRUPT;
			LOG_CRITICAL("%s is corrupt (%s:%d)\n", vol->name, __FILE__, __LINE__);
			length = sprintf(message_buffer,
					 _("RAID5 region %s is corrupt."
					   "  The number of raid disks for a full functional array is %d."
					   "  The number of active disks is %d."
					   "  The are %d missing or failed raid disks."),
					 vol->name, conf->raid_disks, conf->active_disks, conf->failed_raid_disks);
			goto queue_corrupt_message;
		}
	}

queue_corrupt_message:
	if ((vol->flags & MD_CORRUPT) && (length > 0)) {
		/*
		 * We don't want to confuse the user with many corrupt messages
		 * for the same volume.
		 */
		count = md_count_corrupt_messages(vol);
		if (count == 0) {
			md_queue_corrupt_message(vol, message_buffer, length);
		} else {
			LOG_WARNING("Number of corrupt message had been queued for %s: %d.\n",
				    vol->name, count);
		}
	}
out:
	if (rc) {
		raid5_free_private_data(vol);
	}
	LOG_EXIT_INT(0);
	return 0;
}

/* Function: raid5_setup_evms_plugin
 *
 *  This function gets called shortly after the plugin is loaded by the
 *  Engine. It performs all tasks that are necessary before the initial
 *  discovery pass.
 */
static int raid5_setup_evms_plugin(engine_functions_t * functions) {
	int rc = 0;

	/* Parameter check */
	if (!functions) {
		return EINVAL;
	}

	EngFncs = functions;

	my_plugin = raid5_plugin;
	LOG_ENTRY();
	rc = md_register_name_space();

	if (rc != 0) {
		LOG_SERIOUS("Failed to register the MD name space.\n");
	}

	if (raid5_expand_shrink_list == NULL) {
		raid5_expand_shrink_list = EngFncs->allocate_list();
	}
	
	if (raid5_delay_kill_sector_list == NULL) {
		raid5_delay_kill_sector_list = EngFncs->allocate_list();
	}

	LOG_EXIT_INT(rc);
	return rc;
}


/****** Region Checking Functions ******/


/* All of the following md_can_ functions return 0 if they are able to
 * perform the specified action, or non-zero if they cannot.
 */


/* Function: raid5_can_delete
 *
 *  Can we remove the specified MD logical volume
 */
static int raid5_can_delete( storage_object_t * region ) {

	LOG_ENTRY();
	LOG_EXIT_INT(0);
	return 0;
}


static int raid5_can_replace_child(storage_object_t *region,
				   storage_object_t *child,
				   storage_object_t *new_child)
{
	int rc;
	my_plugin = raid5_plugin;
	LOG_ENTRY();
	rc = md_can_replace_child(region, child, new_child);
	LOG_EXIT_INT(rc);
	return rc;
}

static int add_active_disk(md_volume_t *vol, storage_object_t *new_disk)
{
	md_member_t *member = NULL;
	md_super_info_t info;
	int rc=0;

	LOG_ENTRY();

	md_volume_get_super_info(vol, &info);
	if (vol->nr_disks != info.nr_disks) {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}

	member = md_allocate_member(new_disk);
	if (!member) {
		rc = ENOMEM;
		goto out;
	}

	rc = md_volume_find_empty_slot(vol, &member->dev_number);
	if (rc) {
		goto out;
	}

	member->flags |= (MD_MEMBER_NEW | MD_MEMBER_DISK_ACTIVE | MD_MEMBER_DISK_PENDING);
	member->data_size = md_object_usable_size(new_disk, &vol->sb_ver, vol->chunksize);
	rc = md_volume_add_new_member(vol, member);
	if (rc) {
		goto out;
	}

	md_append_region_to_object(vol->region, new_disk);

out:
	if (rc && member) {
		md_free_member(member);
	}
	LOG_EXIT_INT(rc);
	return rc;
}

static int remove_active_disk(md_volume_t *vol, storage_object_t *child)
{
	int rc = 0;
	md_member_t *member;
	list_element_t iter;
	boolean found = FALSE;

	LOG_ENTRY();

	LIST_FOR_EACH(vol->members, iter, member) {
		if (member->obj == child) {
			found = TRUE;
			break;
		}
	}

	if (found == FALSE) {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}

	rc = md_volume_remove_member(member, TRUE);
	if (!rc) {
		md_free_member(member);
		/*
		 * Just in case of error, hold on to the child for now.
		 * Don't unbind the child from the MD region.
		 */
	}

out:
	LOG_EXIT_INT(rc);
	return (rc);
}

/*
 * Function: raid5_expand
 */
static int raid5_expand( storage_object_t    * region,
                         storage_object_t    * expand_object,
                         list_anchor_t         input_objects,
                         option_array_t      * options )
{
	int rc = 0;
	list_element_t iter;
	list_element_t li = NULL;
	storage_object_t *obj;
	md_volume_t *org_vol = (md_volume_t *)region->private_data;
	md_volume_t *new_vol = NULL;
	logical_volume_t *evms_volume;
	u_int64_t add_size = 0;
	md_member_t *member;

	my_plugin = raid5_plugin;
	
	LOG_ENTRY();

	/* Don't allow expanding if volume is mounted */
	if (EngFncs->is_offline(region, &evms_volume) == FALSE) {
		LOG_WARNING("Hmm... %s is mounted.\n", evms_volume->name);
		LOG_EXIT_INT(EINVAL);
		return(EINVAL);
	}

	/* Don't allow expanding if the region is syncing */
	if (md_is_recovery_running(region) == TRUE) {
		LOG_EXIT_INT(EBUSY);
		return(EBUSY);
	}
	
	/* Ask the engine if it's ok to add these objects. */
	LIST_FOR_EACH(input_objects, iter, obj) {
		add_size += obj->size;
	}
	rc = EngFncs->can_expand_by(region, &add_size);
	if (rc) {
		LOG_ERROR("Expand of region %s rejectd by the engine.\n",
			  region->name);
		LOG_EXIT_INT(rc);
		return rc;
	}

	/* Make a copy of the original MD volume */
	new_vol = md_clone_volume(org_vol);
	if (!new_vol) {
		rc = ENOMEM;
		goto out;
	}
	
	li = EngFncs->insert_thing(raid5_expand_shrink_list, org_vol, INSERT_AFTER, NULL);
	if (!li) {
		rc = ENOMEM;
		goto out;
	}

	LIST_FOR_EACH(input_objects, iter, obj) {
		
		rc = add_active_disk(new_vol, obj);
		if (rc) {
			goto out;
		}
	}

	/* Build a new raid5_conf */
	new_vol->private_data = EngFncs->engine_alloc(sizeof (raid5_conf_t));
	if (!new_vol->private_data) {
		rc = ENOMEM;
		goto out;
	}

	rc = create_raid5_conf(new_vol);
	if (!rc) {
		/* recalculate size */
		new_vol->flags |= MD_NEEDS_UPDATE_SIZE;
		region->private_data = new_vol;
		region->size = md_volume_calc_size(new_vol);

		region->flags |= SOFLAG_DIRTY;
		if (region->flags & SOFLAG_ACTIVE)
			region->flags |= (SOFLAG_NEEDS_DEACTIVATE | SOFLAG_NEEDS_ACTIVATE);
		new_vol->region_mgr_flags |= MD_RAID5_EXPAND_PENDING;
		new_vol->flags |= MD_ARRAY_RESIZE_PENDING;
	}

out:
	if (rc) {
		if (new_vol) {
			if (new_vol->private_data) {
				raid5_free_private_data(new_vol);
			}
			/* Error, unwinding... */
			LIST_FOR_EACH(new_vol->members, iter, member) {
				if (!md_volume_find_object(org_vol, member->obj)) {
					md_remove_region_from_object(region, member->obj);
				}
			}
			md_free_volume(new_vol);
		}

		region->size = md_volume_calc_size(org_vol);
		region->private_data = org_vol;
		if (li) {
			EngFncs->delete_element(li);
		}
	}
	
	LOG_EXIT_INT(rc);
	return(rc);
}

/* Function: raid5_can_expand
 *
 */
static int raid5_can_expand(
	storage_object_t * region,
	u_int64_t expand_limit,
	list_anchor_t expansion_points )
{
	int rc = 0;
	md_volume_t *vol = (md_volume_t *)region->private_data;
	expand_object_info_t * expand_object;
	list_anchor_t acceptable_objects = NULL;
	list_element_t iter, li=NULL;
	storage_object_t *obj;
	sector_count_t expand_size;
	logical_volume_t *evms_volume;
	sector_count_t size;
	int disk_count;
	raid5_conf_t *conf = mdvol_to_conf(vol);

	my_plugin = raid5_plugin;
	LOG_ENTRY();

	/* Don't allow expanding if the region is corrupt or degraded. */
	if (vol->flags & (MD_DEGRADED | MD_CORRUPT)) {
		LOG_EXIT_INT(EINVAL);
		return (EINVAL);
	}

	/* Don't allow expanding if volume is mounted */
	if (EngFncs->is_offline(region, &evms_volume) == FALSE) {
		LOG_EXIT_INT(EINVAL);
		return(EINVAL);
	}

	/* Don't allow expanding if changes are pending */
	if (region->flags & SOFLAG_DIRTY) {
		LOG_EXIT_INT(EBUSY);
		return(EBUSY);
	}
	
	/* Don't allow expanding if the region is syncing */
	if (md_is_recovery_running(region) == TRUE) {
		LOG_EXIT_INT(EBUSY);
		return(EBUSY);
	}

	/* This region can expand if nr_disks < max_disks */
	if ( vol->nr_disks > MAX_DISKS(vol)) {
		/* TODO: We should allow the last child to expand */
		rc = EINVAL;
		LOG_EXIT_INT(rc);
		return (rc);
	}

	/* Calculate maximum expansion size */
	rc = EngFncs->get_object_list(DISK | SEGMENT | REGION,
				DATA_TYPE,
				NULL,
				region->disk_group,
				VALID_INPUT_OBJECT | NO_DISK_GROUP,
				&acceptable_objects);

	if (rc) {
		LOG_WARNING("Error getting available object list.\n");
		LOG_EXIT_INT(rc);
		return rc;
	}
	
	if (!acceptable_objects) {
		goto out;
	}
	
	/* Remove all parents of this MD region from acceptable list */
	remove_parent_regions_from_list(acceptable_objects, region);

	if (EngFncs->list_count(acceptable_objects) == 0) {
		goto out;
	}

	expand_size = 0;
	disk_count = vol->nr_disks;
	LIST_FOR_EACH(acceptable_objects, iter, obj) {
		if ( obj != region ) {
			if ( disk_count <= MAX_DISKS(vol)) {
				size = md_object_usable_size(obj, &vol->sb_ver, conf->chunksize);
				
				/* Only consider objects which are larger than the smallest MD object */
				if (size >= conf->size) {
					if (expand_size + conf->size > expand_limit) {
						break;
					}
					expand_size += conf->size;
					disk_count++;
				}
			}
		}
	}

	if (expand_size) {

		expand_object = (expand_object_info_t *) EngFncs->engine_alloc( sizeof(expand_object_info_t) );
		if (expand_object) {
			expand_object->object          = region;
			expand_object->max_expand_size = expand_size;

			li = EngFncs->insert_thing(expansion_points,
						   expand_object,
						   INSERT_AFTER,
						   NULL);

			if (!li) {
				EngFncs->engine_free( expand_object );
				rc = ENOMEM;
			}
		} else {
			rc = ENOMEM;
		}
	}

out:
	if (acceptable_objects) {
		EngFncs->destroy_list(acceptable_objects);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/*
 * Function: raid5_unwind_expansion
 *	This function should be called during RAID5 discovery time to set up
 * the "original" md region.
 *	
 */
int raid5_unwind_expansion( storage_object_t *region)
{
	md_volume_t *new_vol = NULL;
	md_volume_t *volume = (md_volume_t *)region->private_data;
	md_member_t *saved_member = NULL;
	md_saved_info_t *info = NULL;
	list_element_t li = NULL;
	list_element_t iter;
	int i, rc = 0;
	list_anchor_t remove_list = NULL;
	storage_object_t *obj;
	md_member_t *member;

	LOG_ENTRY();
	
	/* Make a copy of the orignal MD volume */
	new_vol = md_clone_volume(volume);
	if (!new_vol) {
		rc = ENOMEM;
		goto out;
	}

	li = EngFncs->insert_thing(raid5_expand_shrink_list, volume, INSERT_AFTER, NULL);
	if (!li) {
		rc = ENOMEM;
		goto out;
	}

	/*
	 * Find out which child object's saved area
	 * has the interrupted expansion information.
	 */
	
	if (md_check_for_expand_shrink_in_progress(volume, &saved_member)) {
		info = saved_member->saved_info;
	} else {
		LOG_ERROR("%s: Internal error: No expand info.\n", region->name);
		rc = EINVAL;
		goto out;
	}

	/*
	 * Check the saved expand info for the new objects that were added to the region.
	 * Put these objects in a list to remove.
	 */
	remove_list = EngFncs->allocate_list();
	if (!remove_list) {
		rc = ENOMEM;
		goto out;
	}
	for (i=0; !rc && i<info->expand_shrink_cnt; i++) {
		int idx = info->expand_shrink_devs[i];
		member = md_volume_find_member(volume, idx);
		if (member) {
			li = EngFncs->insert_thing(remove_list, member->obj, INSERT_AFTER, NULL);
			if (!li) {
				rc = ENOMEM;
			}
		} else {
			LOG_MD_BUG();
			rc = EINVAL;
		}
	}
	
	if (rc) {
		goto out;
	}
	
	if (EngFncs->list_count(remove_list) == 0) {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}
	
	/* Remove all objects in the list from the region */
	LIST_FOR_EACH(remove_list, iter, obj) {
		rc = remove_active_disk(new_vol, obj);
		if (rc) {
			goto out;
		}
	}


	/* Build a new raid5_conf */
	new_vol->private_data = EngFncs->engine_alloc(sizeof (raid5_conf_t));
	if (!new_vol->private_data) {
		rc = ENOMEM;
		goto out;
	}

	rc = create_raid5_conf(new_vol);
	if (!rc) {
		/* recalculate size */
		new_vol->flags |= MD_NEEDS_UPDATE_SIZE;
		region->private_data = new_vol;
		region->size = md_volume_calc_size(new_vol);

		region->flags |= SOFLAG_DIRTY;
		if (region->flags & SOFLAG_ACTIVE)
			region->flags |= (SOFLAG_NEEDS_DEACTIVATE | SOFLAG_NEEDS_ACTIVATE);
		new_vol->region_mgr_flags |= MD_RAID5_UNWIND_EXPANSION_PENDING;
	}

	if (!rc) {
		LOG_DEFAULT("%s: expanded size: %"PRIu64", original size: %"PRIu64".\n",
			  region->name, md_volume_calc_size(new_vol), region->size);
	}
out:
	if (rc) {
		if (new_vol) {
			if (new_vol->private_data) {
				raid5_free_private_data(new_vol);
			}
			md_free_volume(new_vol);
		}
		if (li) {
			EngFncs->delete_element(li);
		}
		region->private_data = volume;
		region->size = md_volume_calc_size(volume);
		volume->flags |= MD_CORRUPT;
		region->flags |= SOFLAG_CORRUPT;
	}
	
	if (remove_list) {
		EngFncs->destroy_list(remove_list);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/* Function: raid5_shrink
 */
static int raid5_shrink( storage_object_t    * region,
                         storage_object_t    * shrink_object,
                         list_anchor_t         input_objects,
                         option_array_t      * options )
{
	int rc = 0;
	list_element_t iter;
	list_element_t li = NULL;
	storage_object_t *obj;
	md_volume_t *org_vol = (md_volume_t *)region->private_data;
	md_volume_t *new_vol = NULL;
	logical_volume_t *evms_volume;
	u_int64_t shrink_size;
	md_member_t *member;

	my_plugin = raid5_plugin;
	
	LOG_ENTRY();

	/* Don't allow shrinking if volume is mounted */
	if (EngFncs->is_offline(region, &evms_volume) == FALSE) {
		LOG_WARNING("Hmm... %s is mounted.\n", evms_volume->name);
		LOG_EXIT_INT(EINVAL);
		return(EINVAL);
	}

	/* Don't allow shrinking if the region is syncing */
	if (md_is_recovery_running(region) == TRUE) {
		LOG_EXIT_INT(EBUSY);
		return(EBUSY);
	}

	/* Calculate how much the region will be shrunk by */
	shrink_size = 0;
	LIST_FOR_EACH(input_objects, iter, obj) {
		member = md_volume_find_object(org_vol, obj);
		if (member) {
			shrink_size += member->data_size;
		}
	}
	
	/* Ask the engine if it's ok to remove these objects. */
	rc = EngFncs->can_shrink_by(region, &shrink_size);
	if (rc) {
		LOG_ERROR("Shrink of region %s rejected by the engine.\n",
			  region->name);
		LOG_EXIT_INT(rc);
		return rc;
	}

	/* Make a backup copy of the orignal MD volume */
	new_vol = md_clone_volume(org_vol);
	if (!new_vol) {
		rc = ENOMEM;
		goto out;
	}

	li = EngFncs->insert_thing(raid5_expand_shrink_list, org_vol, INSERT_AFTER, NULL);
	if (!li) {
		rc = ENOMEM;
		goto out;
	}

	LIST_FOR_EACH(input_objects, iter, obj) {
		rc = remove_active_disk(new_vol, obj);
		if (rc) {
			goto out;
		}
	}
	
	/* Build a new raid5_conf */
	new_vol->private_data = EngFncs->engine_alloc(sizeof (raid5_conf_t));
	if (!new_vol->private_data) {
		rc = ENOMEM;
		goto out;
	}

	rc = create_raid5_conf(new_vol);
	if (!rc) {
		/* recalculate size */
		new_vol->flags |= MD_NEEDS_UPDATE_SIZE;
		region->private_data = new_vol;
		region->size = md_volume_calc_size(new_vol);
		region->flags |= SOFLAG_DIRTY;
		if (region->flags & SOFLAG_ACTIVE)
			region->flags |= (SOFLAG_NEEDS_DEACTIVATE | SOFLAG_NEEDS_ACTIVATE);
		new_vol->region_mgr_flags |= MD_RAID5_SHRINK_PENDING;
		new_vol->flags |= MD_ARRAY_RESIZE_PENDING;
	}

out:
	if (rc) {
		if (new_vol) {
			if (new_vol->private_data) {
				raid5_free_private_data(new_vol);
			}
			md_free_volume(new_vol);
		}
		region->size = md_volume_calc_size(org_vol);
		region->private_data = org_vol;
		if (li) {
			EngFncs->delete_element(li);
		}
	}
	
	LOG_EXIT_INT(rc);
	return (rc);
}

static int raid5_can_shrink(
	storage_object_t * region,
	u_int64_t shrink_limit,
	list_anchor_t shrink_points )
{
	int rc = 0;
	md_volume_t *vol;
	shrink_object_info_t * shrink_object;
	u_int64_t shrink_size = 0;
	u_int64_t smallest_size;
	list_element_t li = NULL;
	logical_volume_t *evms_volume;
	raid5_conf_t *conf;

	my_plugin = raid5_plugin;
	LOG_ENTRY();
	
	vol = (md_volume_t *)region->private_data;
	if (vol->flags & (MD_DEGRADED | MD_CORRUPT)) {
		LOG_EXIT_INT(EINVAL);
		return (EINVAL);
	}
	
	/* Don't allow shrinking if volume is mounted */
	if (EngFncs->is_offline(region, &evms_volume) == FALSE) {
		LOG_EXIT_INT(EINVAL);
		return(EINVAL);
	}

	/* Don't allow shrinking if changes are pending */
	if (region->flags & SOFLAG_DIRTY) {
		LOG_EXIT_INT(EBUSY);
		return(EBUSY);
	}

	/* Don't allow shrinking if the region is syncing */
	if (md_is_recovery_running(region) == TRUE) {
		LOG_EXIT_INT(EBUSY);
		return(EBUSY);
	}
	
	conf = mdvol_to_conf(vol);

	if (vol->raid_disks <= RAID5_MIN_RAID_DISKS) {
		LOG_EXIT_INT(0);
		return (0);
	}

	smallest_size = conf->size;
	
	/* If the shrink limit is less than size of 1 child, we can't shrink */
	if (smallest_size > shrink_limit) {
		LOG_EXIT_INT(0);
		return 0;
	}

	/*
	 * Calculate maximum shrink size.
	 * First, assume we can shrink down to RAID5_MIN_RAID_DISKS.
	 * Adjust shrink_size down as necessary.
	 */
	shrink_size = smallest_size * (vol->raid_disks - RAID5_MIN_RAID_DISKS);
	while (shrink_size > shrink_limit) {
		shrink_size -= smallest_size;
	}

	shrink_object = (shrink_object_info_t *) EngFncs->engine_alloc( sizeof(shrink_object_info_t) );
	if (shrink_object) {
		shrink_object->object = region;
		shrink_object->max_shrink_size = shrink_size;

		li = EngFncs->insert_thing(shrink_points, shrink_object, INSERT_AFTER, NULL);

		if (!li) {
			EngFncs->engine_free( shrink_object );
			rc = ENOMEM;
		}
	} else {
		rc = ENOMEM;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/*
 * Function: raid5_resume_shrinking
 *	This function should be called during RAID5 discovery time to set up for
 * continuation of the interruped shrinking.
 */
int raid5_resume_shrinking( storage_object_t *region)
{
	md_volume_t *new_vol = NULL;
	md_volume_t *volume = (md_volume_t *)region->private_data;
	md_member_t *saved_member = NULL;
	md_saved_info_t *info = NULL;
	list_element_t li = NULL;
	list_element_t iter;
	int i, rc = 0;
	list_anchor_t remove_list = NULL;
	storage_object_t *obj;
	md_member_t *member;

	LOG_ENTRY();
	
	/* Make a copy of the orignal MD volume */
	new_vol = md_clone_volume(volume);
	if (!new_vol) {
		rc = ENOMEM;
		goto out;
	}
	
	li = EngFncs->insert_thing(raid5_expand_shrink_list, volume, INSERT_AFTER, NULL);
	if (!li) {
		rc = ENOMEM;
		goto out;
	}
	
	/*
	 * Find out which child object's saved area
	 * has the interrupted SHRINK information.
	 */
	if (md_check_for_expand_shrink_in_progress(volume, &saved_member)) {
		info = saved_member->saved_info;
	} else {
		LOG_ERROR("%s: Internal error: No shrink info.\n", region->name);
		rc = EINVAL;
		goto out;
	}

	/*
	 * Check the saved shrink info for the objects that were removed
	 * from the region.  Put these objects in a list to remove.
	 */
	remove_list = EngFncs->allocate_list();
	for (i=0; !rc && i<info->expand_shrink_cnt; i++) {
		int idx = info->expand_shrink_devs[i];
		member = md_volume_find_member(volume, idx);
		if (member) {
			li = EngFncs->insert_thing(remove_list, member->obj, INSERT_AFTER, NULL);
			if (!li) {
				rc = ENOMEM;
			}
		} else {
			LOG_MD_BUG();
			rc = EINVAL;
		}
	}
	
	if (rc) {
		goto out;
	}
	
	if (EngFncs->list_count(remove_list) == 0) {
		LOG_MD_BUG();
		rc = EINVAL;
		goto out;
	}
	
	/* Remove all objects in the list from the region */
	LIST_FOR_EACH(remove_list, iter, obj) {
		rc = remove_active_disk(volume, obj);
		if (rc) {
			goto out;
		}
	}

	/* Build a new raid5_conf */
	new_vol->private_data = EngFncs->engine_alloc(sizeof (raid5_conf_t));
	if (!new_vol->private_data) {
		rc = ENOMEM;
		goto out;
	}

	rc = create_raid5_conf(new_vol);
	if (!rc) {
		/* recalculate size */
		new_vol->flags |= MD_NEEDS_UPDATE_SIZE;
		region->private_data = new_vol;
		region->size = md_volume_calc_size(new_vol);
		region->flags |= SOFLAG_DIRTY;
		if (region->flags & SOFLAG_ACTIVE)
			region->flags |= (SOFLAG_NEEDS_DEACTIVATE | SOFLAG_NEEDS_ACTIVATE);
		new_vol->region_mgr_flags |= MD_RAID5_RESUME_SHRINKING_PENDING;
	}

	if (!rc) {
		LOG_DEFAULT("%s: shrunk size: %"PRIu64", original size: %"PRIu64".\n",
			  region->name, region->size, md_volume_calc_size(volume));
	}
	
out:
	if (rc) {
		if (new_vol) {
			if (new_vol->private_data) {
				raid5_free_private_data(new_vol);
			}
			md_free_volume(new_vol);
		}
		if (li) {
			EngFncs->delete_element(li);
		}
		volume->flags |= MD_CORRUPT;
		region->flags |= SOFLAG_CORRUPT;
	}

	if (remove_list) {
		EngFncs->destroy_list(remove_list);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

static void raid5_show_degraded(md_volume_t *vol)
{
	if (vol->flags & MD_DEGRADED) {
		int missing = vol->raid_disks - vol->active_disks;
		char * devices = "devices";
		char * device = "device";
		MESSAGE(_("Region %s is currently in degraded mode.  "
			  "To bring it back to normal state, add %d new spare %s"
			  " to replace the faulty or missing %s.\n"),
			vol->name, missing,
			(missing > 1) ? devices : device,
			(missing > 1) ? devices : device);
	}

}

static void raid5_show_stale_disks(md_volume_t *vol)
{
	int stale_disks;
	md_member_t *member;
	list_element_t iter;

	stale_disks = md_volume_count_stale_disks(vol);
	if (stale_disks == 0) {
		return;
	}

	message_buffer[0] = '\0';
	LIST_FOR_EACH(vol->members, iter, member) {
		if (member->flags & MD_MEMBER_STALE) {
			strcat(message_buffer, member->obj->name);
			strcat(message_buffer, " ");
		}
	}

	MESSAGE(_("Region %s : MD superblocks found in object(s) [%s] are not valid.  "
		  "[%s] will not be activated and should be removed from the region.\n"),
		vol->name, message_buffer, message_buffer);
}


static int raid5_init_region(md_volume_t *vol, storage_object_t *region, boolean final_call)
{
	int rc = 0;
	md_member_t *member;
	list_element_t iter;
	mdu_array_info_t info;

	LOG_ENTRY();

	LIST_FOR_EACH(vol->members, iter, member) {
		if (member->obj) {
			md_append_region_to_object(region, member->obj);
		} else {
			LOG_MD_BUG();
		}
	}

	region->size = md_volume_calc_size(vol);
	region->data_type = DATA_TYPE;
	region->plugin = raid5_plugin;
	region->private_data = (void *)vol;
	region->dev_major = MD_MAJOR;
	region->dev_minor = vol->md_minor;

	vol->flags |= MD_DISCOVERED;
	vol->region = region;

	md_get_kernel_info(region, &info);
	if (final_call) {
		if (region->flags & SOFLAG_ACTIVE) {
			rc = md_analyze_active_region(vol);
		} else {
			md_analyze_volume(vol);
			md_fix_dev_major_minor(vol, TRUE);
		}
	} else {
		/* Needs to validate later */
		vol->flags |= MD_NEEDS_VALIDATE;
	}

	if (vol->flags & MD_CORRUPT) {
		goto out;
	}
	
	vol->private_data = EngFncs->engine_alloc(sizeof (raid5_conf_t));
	if (vol->private_data) {
		if (!(vol->flags & MD_CORRUPT)) {
			rc = create_raid5_conf(vol);
		}

		if (rc || vol->flags & MD_CORRUPT) {
			goto out;
		}

	} else {
		LOG_CRITICAL("Error allocating memory for raid5 configuration structure.\n");
		rc = ENOMEM;
		vol->flags |= MD_CORRUPT;
		goto out;
	}


out:
	if (vol->flags & MD_CORRUPT) {
		region->size = 0;
		region->flags |= SOFLAG_CORRUPT;
	}
	
	LOG_DETAILS("Region [%s] has been created (%s, %s, %s)\n",
		    region->name,
		    (vol->flags & MD_DISCOVERED) ? "discovered" : "BUG: not discovered",
		    (region->flags & SOFLAG_ACTIVE) ? "active" : "inactive",
		    (vol->flags & MD_DEGRADED) ? "degraded" : ((vol->flags & MD_CORRUPT) ? "corrupt" : "normal"));
	
	LOG_EXIT_INT(rc);
	return rc;
}


static int raid5_create_region(md_volume_t * vol, list_anchor_t output_list, boolean final_call)
{
	int rc = 0;
	storage_object_t * region;
	md_member_t *saved_member = NULL;
	md_saved_info_t *saved_info = NULL;
	md_super_info_t info;

	LOG_ENTRY();

	if (!vol->sb) {
		LOG_MD_BUG();
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	md_volume_get_super_info(vol, &info);

	if (!final_call && (vol->nr_disks != info.nr_disks)) {
		int missing;
		
		missing = info.raid_disks - vol->active_disks;
		
		/*
		 * It's not final discovery call and we have not found all the disks.
		 * If we have all active disks, create the region.
		 * Also, try  to create the region in "degraded" mode if possible.
		 */
		switch (missing) {
		case 0:
			LOG_DEBUG("We have all active disks to create region %s.\n",
				  vol->name);
			break;
		case 1:
			LOG_WARNING("About to create region %s in degraded mode.\n",
				    vol->name);
			break;
		default:
			LOG_DEBUG("Volume %s currently does not have enough active disks,"
				  " found %d active out of %d raid disks.  delaying discovery.\n",
				  vol->name, vol->active_disks, info.raid_disks);
			goto out;
			break;
		}
	}

	rc = EngFncs->allocate_region(vol->name, &region);
	if (rc) {
		LOG_WARNING("Region %s is already created, try new name.\n", vol->name);
		rc = md_volume_get_alternative_name(vol, 255);
		if (!rc) {
			LOG_WARNING("Trying tnew region name: %s...\n", vol->name);
			rc = EngFncs->allocate_region(vol->name, &region);
			if (!rc) {
				LOG_WARNING("OK. got it.\n");
			} else {
				LOG_ERROR("Give up.\n");
				goto out;
			}
		}
	}

	rc = raid5_init_region(vol, region, final_call);

	if (!rc) {

		/*
		 * Check EXPAND/SHRINK flag in the "saved" superblock.
		 * If EXPAND_IN_PROGRESS, we will need to unwind the expansion.
		 * If SHRINK_IN_PROGRESS, we will need to resume the shrinking.
		 */
		if (md_check_for_expand_shrink_in_progress(vol, &saved_member)) {
			saved_info = saved_member->saved_info;
			if (saved_info->sector_mark > 0) {
				if (saved_info->flags & MD_SAVED_INFO_EXPAND_IN_PROGRESS) {
					rc = raid5_unwind_expansion(region);
					if (!rc) {
						MESSAGE(_("The process to expand region %s was interrupted.  "
							  "The orginal configuration will be restored."),
							region->name);
					}
				} else
				if (saved_info->flags & MD_SAVED_INFO_SHRINK_IN_PROGRESS) {
					rc = raid5_resume_shrinking(region);
					if (!rc) {
						MESSAGE(_("The process to shrink region %s was interrupted.  "
							  "The process will be resumed."),
							region->name);
					}
				} else {
					LOG_MD_BUG();
				}
			} else {
				LOG_WARNING("%s: The sector mark is 0.\n",
					    region->name);
			}
		}
	}	
	
	md_add_object_to_list(region, output_list);
out:
	LOG_EXIT_INT(rc);
	return rc;
}

static int raid5_rediscover_region( storage_object_t * region, boolean final_call)
{
	int rc = 0;
	md_volume_t   *vol;
	list_anchor_t children;
	list_anchor_t output_list;
	md_member_t *member;
	list_element_t iter;
	storage_object_t *obj;
	int md_minor;

	LOG_ENTRY();
	
	if (region == NULL) {
		LOG_EXIT_INT(EFAULT);
		return EFAULT;
	}

	vol = region->private_data;
	
	if (md_is_recovery_running(region)) {
		/*
		 * If array is syncing, don't rediscover
		 * Set flag to rediscover the array when it's done.
		 */
		LOG_DEBUG("MD array %s is syncing, skipping rediscovery.\n", vol->name);
		vol->flags |= MD_ARRAY_SYNCING;
		LOG_EXIT_INT(0);
		return 0;
	}

	LOG_DEBUG("About to rediscover volume %s.\n", vol->name);

	md_minor = vol->md_minor; // Save the minor for later comparison

	children = EngFncs->allocate_list();
	output_list = EngFncs->allocate_list();
	md_clear_child_list(region, children);

	LIST_FOR_EACH(vol->members, iter, member) {
		if (member->obj) {
			EngFncs->insert_thing(children, member->obj, INSERT_AFTER | EXCLUSIVE_INSERT, NULL);
		}
	}

	raid5_free_private_data(vol);
	md_free_volume(vol);

	LIST_FOR_EACH(children, iter, obj) {
		LOG_DEBUG("   Rediscover on this object: %s.\n", obj->name);
	}
	md_discover_volumes(children, output_list);
	region->private_data = NULL;

	vol = volume_list_head;
	while (vol != NULL) {
		if ((!(vol->flags & MD_DISCOVERED)) && (vol->personality == RAID5) && vol->md_minor == md_minor) {
			region->flags &= ~(SOFLAG_DIRTY | SOFLAG_CORRUPT | SOFLAG_NEEDS_ACTIVATE | SOFLAG_NEEDS_DEACTIVATE | SOFLAG_ACTIVE);
			rc = raid5_init_region(vol, region, final_call);
		}
		vol = vol->next;
	}

	EngFncs->destroy_list(children);
	EngFncs->destroy_list(output_list);

	if (region->private_data == NULL) {
		LOG_MD_BUG();
		EngFncs->free_region(region);
		rc = ENODEV;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

int raid5_discover_regions(list_anchor_t output_list, int *count, boolean final_call )
{
	int rc = 0;
	md_volume_t *vol;

	my_plugin = raid5_plugin;
	LOG_ENTRY();

retry:
	for (vol = volume_list_head; vol; vol = vol->next) {
		if (vol->personality != RAID5) {
			continue;
		}
		
		/* Create RAID5 regions */
		if (!(vol->flags & MD_DISCOVERED)) {
			rc = raid5_create_region(vol, output_list, final_call);
			if (!rc && (vol->flags & MD_DISCOVERED)) {
				*count = *count + 1;
			}
		}

		/* Rediscover RAID5 regions */
		if ((vol->flags & MD_DISCOVERED) && (vol->flags & MD_NEEDS_REDISCOVER) ) {
			vol->flags &= ~MD_NEEDS_REDISCOVER;
			rc = raid5_rediscover_region(vol->region, final_call);
			if (!(vol->flags & MD_NEEDS_REDISCOVER)) {
				/* The region was rediscovered,
				 * go back to check everything again.
				 */
				goto retry;
			}
		}
	}

	if (final_call) {
		for (vol=volume_list_head; vol; vol=vol->next) {
			if (vol->personality != RAID5) {
				continue;
			}
			if (vol->flags & MD_NEEDS_VALIDATE) {
				if (vol->region->flags & SOFLAG_ACTIVE) {
					rc = md_analyze_active_region(vol);
				} else {
					md_analyze_volume(vol);
					md_fix_dev_major_minor(vol, TRUE);
				}
				vol->flags &= ~MD_NEEDS_VALIDATE;
			}
			if ((vol->flags & MD_DEGRADED) && !(vol->flags & MD_ARRAY_SYNCING)) {
				raid5_show_degraded(vol);
			}
			if (vol->stale_disks) {
				raid5_show_stale_disks(vol);
			}
			md_display_corrupt_messages(RAID5);
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/* Function: raid5_discover
 *
 *  Examine all disk segments and find MD PVs. Assemble volume groups
 *  and export all MD logical volumes as EVMS regions.
 *
 *  All newly created regions must be added to the output list, and all
 *  segments from the input list must either be claimed or moved to the
 *  output list.
 */
static int raid5_discover( list_anchor_t input_list,
			   list_anchor_t output_list,
			   boolean final_call ) {
	int count = 0;

	my_plugin = raid5_plugin;
	LOG_ENTRY();

	/* Parameter check */
	if (!input_list || !output_list) {
		LOG_EXIT_INT(EFAULT);
		return EFAULT;
	}

	if (final_call) {
		md_discover_final_call(input_list, output_list, &count);
	} else {
		md_discover_volumes(input_list, output_list);
		LOG_DETAILS("PV discovery complete.\n");

		/* LV discovery and exporting */
		raid5_discover_regions(output_list, &count, final_call);
		LOG_DETAILS("RAID4/5 volume discovery complete.\n");
	}

	LOG_EXIT_INT(count);
	return count;
}


/*
 * Input: a 'big' sector number,
 * Output: index of the data and parity disk, and the sector # in them.
 */
static lsn_t raid5_compute_sector(lsn_t vol_sector,
				  unsigned int raid_disks, unsigned int data_disks,
				  unsigned int * dd_idx, unsigned int * pd_idx,
				  raid5_conf_t * conf) {

	unsigned long long stripe;
	unsigned long long chunk_number;
	lsn_t              chunk_offset;
	lsn_t              new_sector;

	LOG_ENTRY();

	/*
	 * Compute the chunk number and the sector offset inside the chunk
	 */
	chunk_number = (unsigned long long) (vol_sector / conf->chunksize);
	chunk_offset = (lsn_t) (vol_sector % conf->chunksize);

	/*
	 * Compute the stripe number
	 */
	stripe = (unsigned long long) (chunk_number / data_disks);

	/*
	 * Compute the data disk and parity disk indexes inside the stripe
	 */
	*dd_idx = chunk_number % data_disks;

	/*
	 * Select the parity disk based on the user selected algorithm.
	 */
	if (conf->level == 4)
		*pd_idx = data_disks;
	else switch (conf->algorithm) {
		case ALGORITHM_LEFT_ASYMMETRIC:
			*pd_idx = data_disks - stripe % raid_disks;
			if (*dd_idx >= *pd_idx)
				(*dd_idx)++;
			break;
		case ALGORITHM_RIGHT_ASYMMETRIC:
			*pd_idx = stripe % raid_disks;
			if (*dd_idx >= *pd_idx)
				(*dd_idx)++;
			break;
		case ALGORITHM_LEFT_SYMMETRIC:
			*pd_idx = data_disks - stripe % raid_disks;
			*dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks;
			break;
		case ALGORITHM_RIGHT_SYMMETRIC:
			*pd_idx = stripe % raid_disks;
			*dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks;
			break;
		default:
			LOG_WARNING("raid5: unsupported algorithm %d\n", conf->algorithm);
			break;
		}

	/*
	 * Finally, compute the new sector number.
	 */
	new_sector = (lsn_t) (stripe * conf->chunksize + chunk_offset);
	LOG_DEBUG("new sector is %"PRIu64".\n", new_sector);
	LOG_EXIT_U64(new_sector);
	return new_sector;
}


static void get_child_run( md_volume_t       * volume,
			  lsn_t               lsn,
			  sector_count_t      count,
			  md_member_t      ** child,
			  lsn_t             * child_lsn,
			  sector_count_t    * child_count) {

	raid5_conf_t * conf = mdvol_to_conf(volume);
	unsigned int data_disk_index;
	unsigned int parity_disk_index;
	sector_count_t sectors_per_chunk = conf->chunksize;

	*child_lsn = raid5_compute_sector(lsn,
					  conf->raid_disks, conf->raid_disks - 1,
					  &data_disk_index, &parity_disk_index,
					  conf);

	*child = conf->disks[data_disk_index].dev;

	*child_count = min(count, sectors_per_chunk - (*child_lsn & (sectors_per_chunk - 1)));
}



/****** Region Functions ******/


static int raid5_get_create_options( option_array_t * options,
				     md_sb_ver_t    * sb_ver,
				     char          ** spare_disk,
				     unsigned int   * chunk_size,
				     unsigned int   * raid_level,
				     unsigned int   * parity_algorithm ) {
	int i;
	int rc = 0;
	boolean ver1_superblock = FALSE;

	LOG_ENTRY();

	for (i = 0; i < options->count; i++) {

		if (options->option[i].is_number_based) {

			switch (options->option[i].number) {
			case RAID5_CREATE_OPT_SB1_INDEX:
				ver1_superblock = options->option[i].value.b;
				break;
			
			case RAID5_CREATE_OPT_SPARE_DISK_INDEX:
				/*
				 * Not worth validation, will catch errors when
				 * we try to find the original.
				 */
				*spare_disk = options->option[i].value.s;
				break;

			case RAID5_CREATE_OPT_CHUNK_SIZE_INDEX:
				*chunk_size = options->option[i].value.ui32 * 2;
				break;

			case RAID5_CREATE_OPT_RAID_LEVEL_INDEX:
				if (strcmp(options->option[i].value.s, RAID4_LEVEL_NAME) == 0) {
					*raid_level = 4;
				} else if (strcmp(options->option[i].value.s, RAID5_LEVEL_NAME) == 0) {
					*raid_level = 5;
				}
				break;

			case RAID5_CREATE_OPT_PARITY_ALGORITHM_INDEX:
				if (strcmp(options->option[i].value.s, ALGORITHM_LEFT_ASYMMETRIC_NAME) == 0) {
					*parity_algorithm = ALGORITHM_LEFT_ASYMMETRIC;
				} else if (strcmp(options->option[i].value.s, ALGORITHM_RIGHT_ASYMMETRIC_NAME) == 0) {
					*parity_algorithm = ALGORITHM_RIGHT_ASYMMETRIC;
				} else if (strcmp(options->option[i].value.s, ALGORITHM_LEFT_SYMMETRIC_NAME) == 0) {
					*parity_algorithm = ALGORITHM_LEFT_SYMMETRIC;
				} else if (strcmp(options->option[i].value.s, ALGORITHM_RIGHT_SYMMETRIC_NAME) == 0) {
					*parity_algorithm = ALGORITHM_RIGHT_SYMMETRIC;
				}
				break;

			default:
				break;

			}

		} else {
			if (strcmp(options->option[i].name, RAID5_CREATE_OPT_SB1_NAME) == 0) {
				ver1_superblock = options->option[i].value.b;
			} else if (strcmp(options->option[i].name, RAID5_CREATE_OPT_SPARE_DISK_NAME) == 0) {
				*spare_disk = options->option[i].value.s;
			} else if (strcmp(options->option[i].name, RAID5_CREATE_OPT_CHUNK_SIZE_NAME) == 0) {
				*chunk_size = options->option[i].value.ui32 * 2;

			} else if (strcmp(options->option[i].name, RAID5_CREATE_OPT_RAID_LEVEL_NAME) == 0) {
				if (strcmp(options->option[i].value.s, RAID4_LEVEL_NAME) == 0) {
					*raid_level = 4;
				} else if (strcmp(options->option[i].value.s, RAID5_LEVEL_NAME) == 0) {
					*raid_level = 5;
				}

			} else if (strcmp(options->option[i].name, RAID5_CREATE_OPT_PARITY_ALGORITHM_NAME) == 0) {
				if (strcmp(options->option[i].value.s, ALGORITHM_LEFT_ASYMMETRIC_NAME) == 0) {
					*parity_algorithm = ALGORITHM_LEFT_ASYMMETRIC;
				} else if (strcmp(options->option[i].value.s, ALGORITHM_RIGHT_ASYMMETRIC_NAME) == 0) {
					*parity_algorithm = ALGORITHM_RIGHT_ASYMMETRIC;
				} else if (strcmp(options->option[i].value.s, ALGORITHM_LEFT_SYMMETRIC_NAME) == 0) {
					*parity_algorithm = ALGORITHM_LEFT_SYMMETRIC;
				} else if (strcmp(options->option[i].value.s, ALGORITHM_RIGHT_SYMMETRIC_NAME) == 0) {
					*parity_algorithm = ALGORITHM_RIGHT_SYMMETRIC;
				}
			}
		}
	}

	if (ver1_superblock == TRUE) {
		sb_ver->major_version = MD_SB_VER_1;
		sb_ver->minor_version = 0;
		sb_ver->patchlevel = 0;
	} else {
		sb_ver->major_version = MD_SB_VER_0;
		sb_ver->minor_version = 90;
		sb_ver->patchlevel = 0;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

static int raid5_create_new_region(md_volume_t * vol, list_anchor_t output_list)
{
	int rc=0;
	md_member_t *member;
	list_element_t iter;
	storage_object_t *region;

	LOG_ENTRY();
	rc = EngFncs->allocate_region(vol->name, &region);
	if (rc) {
		LOG_ERROR("Region %s is already created (rc=%d).\n",
			  vol->name, rc);
	}

	LOG_DEBUG("Creating new region %s: nr_disks=%d, raid_disks=%d, spares=%d, actives=%d, working=%d\n",
		  vol->name, vol->nr_disks, vol->raid_disks, vol->spare_disks, vol->active_disks, vol->working_disks);
	
	if (!rc) {

		vol->private_data = EngFncs->engine_alloc(sizeof (raid5_conf_t));
		if (!vol->private_data) {
			rc = ENOMEM;
			goto out;
		}

		rc = create_raid5_conf(vol);
		if (!rc) {

			LIST_FOR_EACH(vol->members, iter, member) {
				md_append_region_to_object(region, member->obj);
			}
			region->size = md_volume_calc_size(vol);
			region->data_type = DATA_TYPE;
			region->plugin = raid5_plugin;
			region->private_data = (void *)vol;
			region->dev_major = MD_MAJOR;
			region->dev_minor = vol->md_minor;
			vol->region = region;
			region->flags |= SOFLAG_DIRTY;
			md_add_object_to_list(region, output_list);
		} else {
			raid5_free_private_data(vol);
		}
	}
out:
	LOG_EXIT_INT(rc);
	return rc;
}


/* Function: raid5_create
 *
 *  Create a new MD volume.
 */
static int raid5_create( list_anchor_t objects,
			 option_array_t *options,
			 list_anchor_t new_region_list )
{
	md_volume_t * volume = NULL;
	storage_object_t * object;
	u_int64_t size = -1;
	storage_object_t * spare=NULL;
	char * spare_disk = NULL;
	int chunksize = MD_DEFAULT_CHUNK_SIZE >> EVMS_VSECTOR_SIZE_SHIFT;
	int raid_level;
	unsigned int parity_algorithm = ALGORITHM_LEFT_SYMMETRIC;
	list_element_t iter1, iter2;
	md_sb_ver_t sb_ver = {MD_SB_VER_0, 90, 0};
	md_member_t *member;
	int rc = 0;

	my_plugin = raid5_plugin;
	LOG_ENTRY();

	/* Parameter check */
	if (!objects || !options || !new_region_list) {
		LOG_EXIT_INT(EFAULT);
		return EFAULT;
	}
	
	// Must have at least RAID5_MIN_RAID_DISKS
	if (EngFncs->list_count(objects) < RAID5_MIN_RAID_DISKS) {
		LOG_CRITICAL("Must have at least %d objects.\n", RAID5_MIN_RAID_DISKS);
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	if (!(volume = md_allocate_volume())) {
		LOG_EXIT_INT(ENOMEM);
		return ENOMEM;
	}

	rc = md_volume_get_available_name(volume, 256);
	if (rc) {
		goto error_free;
	}

	raid5_get_create_options(options, &sb_ver, &spare_disk, &chunksize, &raid_level, &parity_algorithm);

	LIST_FOR_EACH(objects, iter1, object) {
		size = min(size, md_object_usable_size(object, &sb_ver, chunksize));
	}
	
	if (spare_disk) {
		spare = md_find_valid_input_object(spare_disk);
		if (spare) {
			size = min(size, md_object_usable_size(spare, &sb_ver, chunksize));
		}
	}

	rc = md_init_sb(volume, &sb_ver, raid_level, 0, size, chunksize);
	if (rc) {
		goto error_free;
	}

	// Add raid members
	LIST_FOR_EACH_SAFE(objects, iter1, iter2, object) {
		member = md_allocate_member(object);
		if (member) {
			// This will add the member and update the MD superblock.
			member->data_size = size;
			member->flags |= (MD_MEMBER_NEW | MD_MEMBER_DISK_ACTIVE | MD_MEMBER_DISK_SYNC);
			rc = md_volume_add_new_member(volume, member);
			if (rc) {
				md_free_member(member);
				goto error_free;
			}
		} else {
			rc = ENOMEM;
		}
		if (rc) {
			goto error_free;
		}
		EngFncs->delete_element(iter1);
	}

	// Add spare member
	if (spare) {
		member = md_allocate_member(spare);
		if (member) {
			// This will add the member and update the MD superblock.
			member->flags |= (MD_MEMBER_NEW | MD_MEMBER_DISK_SPARE);
			member->data_size = size;
			rc = md_volume_add_new_member(volume, member);
			if (rc) {
				md_free_member(member);
				goto error_free;
			}
		} else {
			rc = ENOMEM;
		}
		if (rc) {
			goto error_free;
		}
	}

	rc = raid5_create_new_region(volume, new_region_list);
	if (rc) {
		goto error_free;
	} else {
		volume->flags |= MD_DIRTY;
	}
	
	LOG_EXIT_INT(rc);
	return rc;

error_free:
	md_free_volume(volume);
	LOG_EXIT_INT(rc);
	return rc;
}


static int forward_kill_sectors() {

	int rc = 0;
	kill_sectors_t * ks = kill_sector_list_head;
	md_member_t      * member;
	lsn_t              child_lsn;
	sector_count_t     child_count;

	LOG_ENTRY();

	while ((rc == 0) && (ks != NULL)) {
		md_volume_t * volume = (md_volume_t *) ks->region->private_data;

		while ((rc == 0) && (ks->count > 0)) {
			get_child_run(volume, ks->lsn, ks->count,
				      &member, &child_lsn, &child_count);

			if (member != NULL) {
				rc = KILL_SECTORS(member->obj, member->data_offset + child_lsn, child_count);
			}

			if (rc == 0) {
				ks->count -= child_count;
				ks->lsn += child_count;
			}
		}

		kill_sector_list_head = ks->next;
		free(ks);
		ks = kill_sector_list_head;
	}

	LOG_EXIT_INT(rc);
	return rc;
}


/* Function: w_delete
 *
 * Worker function for raid5_delete and raid5_discard
 */
static int w_delete(storage_object_t *region, list_anchor_t children, boolean tear_down)
{
	int     rc;
	md_volume_t * volume = (md_volume_t *) region->private_data;
	raid5_conf_t * conf = mdvol_to_conf(volume);
	
	LOG_ENTRY();

	/* Check that this region can be removed. */
	if ((rc = raid5_can_delete(region))) {
		LOG_EXIT_INT(rc);
		return rc;
	}
	volume = region->private_data;

	rc = forward_kill_sectors();

	if (rc == 0) {
		/* Remove the parent/child associations with the PVs. */
		md_clear_child_list(region, children);

		EngFncs->engine_free(conf);
		/* Delete the volume. */
		md_delete_volume(volume, tear_down);
		region->private_data = NULL;
		EngFncs->free_region(region);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/* Function: raid5_delete
 *
 *  Remove the specified region and consolidate all of its space into
 *  the appropriate freespace region.
 */
static int raid5_delete(storage_object_t * region, list_anchor_t children)
{
	int     rc;
	
	my_plugin = raid5_plugin;
	LOG_ENTRY();
	rc = w_delete(region, children, TRUE);
	LOG_EXIT_INT(rc);
	return rc;
}


/*
 * Function: raid5_discard
 *
 * This function is similar to delete.  Just call delete to free all
 * data structures related to the regions.
 */
static int raid5_discard(list_anchor_t regions)
{
	storage_object_t * region;
	list_element_t le;

	LOG_ENTRY();

	LIST_FOR_EACH(regions, le, region) {
		w_delete(region, NULL, FALSE);
	}

	LOG_EXIT_INT(0);
	return 0;
}


static int raid5_replace_child(storage_object_t *region,
			       storage_object_t *child,
			       storage_object_t *new_child)
{
	int rc;
	my_plugin = raid5_plugin;
	LOG_ENTRY();
	rc = md_replace_child(region, child, new_child);
	LOG_EXIT_INT(rc);
	return rc;
}


/* Function: raid5_add_sectors_to_kill_list
 *
 *  The kill_sectors list contains a list of sectors that need to be zeroed
 *  during the next commit. This function is very similar to read/write.
 */
static int raid5_add_sectors_to_kill_list( storage_object_t * region,
					   lsn_t              lsn,
					   sector_count_t     count ) {

	int              rc = 0;
	md_volume_t    * volume = (md_volume_t *)region->private_data;
	kill_sectors_t * ks;

	my_plugin = raid5_plugin;
	LOG_ENTRY();

	if (volume->flags & MD_CORRUPT) {
		MESSAGE(_("MD Object %s is corrupt.  Writing data is not allowed.\n"),volume->name);
		LOG_EXIT_INT(EIO);
		return EIO;
	}
	if ((lsn + count) > region->size) {
		LOG_ERROR("Attempt to write past end of region %s sector=%"PRIu64"\n",volume->name,lsn+count);
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}
	
	if (volume->region_mgr_flags & MD_RAID5_EXPAND_PENDING) {
		ks = EngFncs->engine_alloc(sizeof(kill_sectors_t));
		if (ks) {
			ks->region = region;
			ks->lsn = lsn;
			ks->count = count;
			EngFncs->insert_thing(raid5_delay_kill_sector_list,
					      ks, INSERT_AFTER, NULL);
			LOG_EXIT_INT(0);
			return 0;
		}
		/* Note: If memory allocation fails, fall thru the next while loop */
	}


	ks = malloc(sizeof(kill_sectors_t));

	if (ks != NULL) {

		ks->region = region;
		ks->lsn    = lsn;
		ks->count  = count;

		ks->next = kill_sector_list_head;
		kill_sector_list_head = ks;

		/*
		 * Mark the region dirty so that it will get called to commit
		 * the kill sectors.
		 */
		region->flags |= SOFLAG_DIRTY;

	} else {
		rc = ENOMEM;
	}

	LOG_EXIT_INT(rc);
	return rc;
}


/*
 * Process the kill sectors list.
 */
static int kill_sectors(void) {

	int rc = 0;
	kill_sectors_t * ks;
	unsigned char  * buffer = NULL;
	sector_count_t   buffer_size = 0;

	LOG_ENTRY();

	/*
	 * Copy the kill sector list head and NULL out the gloabal variable.
	 * This function uses raid5_write() to write out the kill sectors,
	 * but raid5_write() has a check to write kill sectors before it does
	 * any writing.  We could end up in infinite recursion between
	 * kill_sectors() and raid5_write().  raid5_write() has a check to
	 * see if there are any kill sectors on the global list.  By having
	 * this function remove the kill sectors from the global list the
	 * recursion is stopped.
	 */
	ks = kill_sector_list_head;
	kill_sector_list_head = NULL;

	while ((rc == 0) && (ks != NULL)) {
		if (buffer_size < ks->count) {
			if (buffer != NULL) {
				free(buffer);
			}
			buffer = calloc(1, EVMS_VSECTOR_SIZE * ks->count);

			if (buffer != NULL) {
				buffer_size = ks->count;
			} else {
				buffer_size = 0;
				rc = ENOMEM;
			}
		}

		if (rc == 0) {
			kill_sectors_t * ks_prev = ks;

			LOG_DEBUG("Killing %"PRIu64" sectors on %s at sector offset %"PRIu64".\n", ks->count, ks->region->name, ks->lsn);
			rc = raid5_write(ks->region, ks->lsn, ks->count, buffer);

			ks = ks->next;
			free(ks_prev);
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/*
 * Function: raid5_copy_data
 *
 *	Copy data from source volume to target volume.
 *
 * PARAMETERS:
 *	src : source volume
 *	target : target volume
 *	sectors (IN/OUT) :
 *		On input, number of sectors to copy
 *		On ouput, the number of sectors successfully copied
 *	forward : copy direction
 *		TRUE, forward copy start fron LSN 0
 *		FALSE, backward copy
 *	show_progress : TRUE, show progress indicator
 *	message : message to be displayed to user
 *
 * RETURN:
 *	ENOMEM - not enough memory to perform I/O
 *	other - from READ/WRITE function
 */
static int raid5_copy_data(
	md_volume_t *src,
	md_volume_t *target,
	sector_count_t *sectors,
	boolean forward,
	boolean show_progress,
	char *message)
{
	int rc=0;
	lsn_t lsn;
	sector_count_t transfer_sects;
	char *buf = NULL;
	progress_t progress;
	u_int32_t buf_size;
	md_member_t *saved_member = NULL;
	md_saved_info_t *info = NULL;
	raid5_conf_t * conf = mdvol_to_conf(src);

	LOG_ENTRY();

	LOG_DEFAULT("Region: %s, nr_disks (src:%d, target:%d),"
		    " sectors: %"PRIu64", copy direction: %s\n",
		    src->name, src->nr_disks, target->nr_disks,
		    *sectors, forward ? "FORWARD" : "BACKWARD");

	memset(&progress, 0, sizeof(progress_t));
	progress.total_count = *sectors;

	buf_size = (conf->chunksize << EVMS_VSECTOR_SIZE_SHIFT) * min(src->nr_disks, target->nr_disks);

	buf = memalign(4096, buf_size);
	if (buf == NULL) {
		buf = malloc(buf_size);
	}
	
	if (buf == NULL) {
		*sectors = 0;
		rc = ENOMEM;
		goto out;
	}

	transfer_sects = buf_size >> EVMS_VSECTOR_SIZE_SHIFT;

	if (show_progress == TRUE) {
		progress.title = message;
		progress.description = "Transferring data, please wait...";
		progress.type = DISPLAY_PERCENT;
		EngFncs->progress(&progress);
	}

	/*
	 * Find out which child object's saved area has the
	 * EXPAND_IN_PROGRESS / SHRINK_IN_PROGRESS set, use that child object
	 * to keep track of copy progress.
	 */
	if (md_check_for_expand_shrink_in_progress(target, &saved_member)) {
		info = saved_member->saved_info;
	} else {
		if (md_check_for_expand_shrink_in_progress(src, &saved_member)) {
			info = saved_member->saved_info;
		} else {
			LOG_ERROR("Can't keep track of copy progress.\n");
			*sectors = 0;
			rc = EINVAL;
			goto out;
		}
	}
	
	if (forward) {
		lsn = 0;
	} else {
		lsn = progress.total_count;
	}

	while (progress.count < progress.total_count) {

		if ((progress.count + transfer_sects) > progress.total_count) {
			transfer_sects = progress.total_count - progress.count;
		}
		
		if (forward == TRUE) {
			lsn = progress.count;
		} else {
			lsn -= transfer_sects;
		}

		rc = raid5_volume_read(src, lsn, transfer_sects, buf);
		if (!rc) {
			rc = raid5_volume_write(target, lsn, transfer_sects, buf);
		}

		if (rc)
			break;

		progress.count += transfer_sects;

		/* update progress indicator */
		if (show_progress == TRUE) {
			EngFncs->progress(&progress);
		}

		if (forward)
			info->sector_mark = progress.count;
		else
			info->sector_mark = lsn;
		rc = md_write_saved_info(saved_member);
		if (rc) {
			LOG_ERROR("Can't keep track of copy progress, rc=%d.\n", rc);
			break;
		}
	}

	*sectors = progress.count;
	
	if (progress.count > progress.total_count) {
		LOG_WARNING("count=(%"PRIu64") is greater than total_count(%"PRIu64").\n",
			    progress.count, progress.total_count);
	}

	if ((show_progress == TRUE) && (progress.count < progress.total_count)) {
		/* close the progress indicator by setting count = total_count */
		progress.count = progress.total_count;
		EngFncs->progress(&progress);
	}

	LOG_DEFAULT("Last LSN=%"PRIu64", used %"PRIu64"-sector blocks.\n",
		lsn, transfer_sects);

out:
	if (buf) {
		free(buf);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

static int raid5_do_kill_sector(storage_object_t *region)
{
	md_volume_t *volume;
	list_element_t iter1, iter2;
	kill_sectors_t *killsect;
	void *buffer;
	int current_buffer_size = 4096;
	int buffer_size_needed = 0;
	int rc=0;

	LOG_ENTRY();

	buffer = EngFncs->engine_alloc(current_buffer_size);
	if (!buffer) {
		LOG_EXIT_INT(ENOMEM);
		return ENOMEM;
	}

	volume = (md_volume_t *)region->private_data;
	LIST_FOR_EACH_SAFE(raid5_delay_kill_sector_list, iter1, iter2, killsect) {
		if (killsect->region == region) {
			/* Grow our zero filled buffer if needed. */
			buffer_size_needed = killsect->count * EVMS_VSECTOR_SIZE;
			if (current_buffer_size < buffer_size_needed) {
				buffer = EngFncs->engine_realloc(buffer, buffer_size_needed);
				if (buffer != NULL) {
					current_buffer_size = buffer_size_needed;

				} else {
					LOG_CRITICAL("Error allocating memory for a zero filled"
						     " buffer for killing sectors.\n");
					rc = ENOMEM;
				}
			}

			/* Zap the sectors. */
			if (rc == 0) {
				rc = raid5_volume_write(volume, killsect->lsn,
						     killsect->count, buffer);
				if (rc == 0) {
					/*
					 * The sectors were killed.
					 * Remove the ksr from the list
					 * and free it.
					 */
					EngFncs->delete_element(iter1);
					EngFncs->engine_free(killsect);

				}
			}
		}
	}

	EngFncs->engine_free(buffer);

	LOG_EXIT_INT(rc);
	return rc;
}

static int raid5_do_expand(storage_object_t *region)
{
	int rc = 0;
	int rc2 = 0;
	list_element_t iter;
	md_volume_t *volume;
	md_volume_t *org_vol = NULL;
	boolean found = FALSE;
	sector_count_t sectors;
	md_saved_info_t *info = NULL;
	md_member_t *saved_member = NULL;
	md_member_t *member;
	char msg[256];

	LOG_ENTRY();

	volume = (md_volume_t *)region->private_data;

	LIST_FOR_EACH(raid5_expand_shrink_list, iter, org_vol) {
		if (org_vol->region == region) {
			found = TRUE;
			break;
		}
	}
	if (found == FALSE) {
		LOG_CRITICAL("Internal Error, could not find original volume to expand region %s.\n",
			     region->name);
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	/* Don't allow expanding if the region is syncing */
	if (md_is_recovery_running(region) == TRUE) {
		LOG_ERROR("%s: Can't allow expanding because the region is actively syncing.\n",
				region->name);
		rc = EBUSY;
		goto error;
	}

	/* Create saved_info struct on the first disk to keep track of the expansion progress */
	info = EngFncs->engine_alloc(MD_SAVED_INFO_BYTES);
	if (!info) {
		rc = ENOMEM;
		goto error;
	}

	saved_member = EngFncs->first_thing(volume->members, NULL);
	saved_member->saved_info = info;

	/* Set EXPAND flag on the first disk */
	info->flags |= MD_SAVED_INFO_EXPAND_IN_PROGRESS;
	info->sector_mark = 0;
		
	/* Keep track of newly added objects */
	LIST_FOR_EACH(volume->members, iter, member) {
		if (!md_volume_find_object(org_vol, member->obj)) {
			info->expand_shrink_devs[info->expand_shrink_cnt] = member->dev_number;
			info->expand_shrink_cnt++;
		}
	}

	rc = md_write_saved_info(saved_member);
	if (rc) {
		LOG_CRITICAL("Failed to write info on expand progress for regions %s.\n",
			     region->name);
		goto error;
	}

	sectors = md_volume_calc_size(org_vol);
	
	sprintf(msg, "Expanding RAID5 region %s...", region->name);
	LOG_DEBUG("%s (sectors=%"PRIu64")\n", msg, sectors);
	
	rc = raid5_copy_data(org_vol, volume, &sectors, TRUE, TRUE, msg);
	
	LOG_DEBUG("raid5_copy_data returned rc=%d, (sectors=%"PRIu64")\n", rc, sectors);
	
	/* Clear EXPAND flag  */
	info->flags &= ~MD_SAVED_INFO_EXPAND_IN_PROGRESS;
	rc2 = md_write_saved_info(saved_member);
	if (rc2) {
		LOG_CRITICAL("Failed to update info on expand progress for regions %s.\n",
			     region->name);
	}

	switch (rc) {
	case 0:
		/* Success */
		rc2 = raid5_do_kill_sector(region);
		if (rc2) {
			LOG_CRITICAL("Failed to process kill sectors"
				     " after successful expansion of RAID5 region %s.\n",
				     region->name);
		}
		raid5_free_private_data(org_vol);
		EngFncs->remove_thing(raid5_expand_shrink_list, org_vol);
		md_free_volume(org_vol);
		volume->region_mgr_flags &= ~MD_RAID5_EXPAND_PENDING;
		volume->flags &= ~MD_ARRAY_RESIZE_PENDING;
		goto out;
		break;
	default:
		/* Error, unwinding... */
		if (sectors) {
			sprintf(msg, "RAID5 region %s failed to expand, restoring data...",
				region->name);
			rc2 = raid5_copy_data(volume, org_vol, &sectors, FALSE, TRUE, msg);
			if (rc2) {
				LOG_CRITICAL("Could not unwind the failed expand, %s is corrupt.\n",
					     region->name);
				region->flags |= SOFLAG_CORRUPT;
				volume->flags |= MD_CORRUPT;
			}
		}
		break;
	}

error:
	if (rc && org_vol) {

		/* For each of the added members
		 *    (ie. it's not in the orginal volume)
		 * - remove the object from the region
		 * - delete the member from the volume member list
		 * - free the member
		 */

		LIST_FOR_EACH(volume->members, iter, member) {
			if (!md_volume_find_object(org_vol, member->obj)) {
				md_remove_region_from_object(region, member->obj);
			}
		}

		raid5_free_private_data(volume);
		md_free_volume(volume);
		region->size = md_volume_calc_size(org_vol);
		region->private_data = org_vol;
		EngFncs->remove_thing(raid5_expand_shrink_list, org_vol);
		org_vol->region_mgr_flags &= ~MD_RAID5_EXPAND_PENDING;
		org_vol->flags &= ~MD_ARRAY_RESIZE_PENDING;
	}

out:
	LOG_EXIT_INT(rc);
	return rc;
}

static int raid5_do_unwind_expansion(storage_object_t *region)
{
	md_volume_t *volume;
	md_volume_t *exp_vol;
	list_element_t iter;
	sector_count_t sectors = 0;
	md_saved_info_t *info = NULL;
	int rc = 0;
	boolean found = FALSE;
	md_member_t *saved_member = NULL;
	md_member_t *member;
	char msg[256];

	LOG_ENTRY();

	volume = (md_volume_t *)region->private_data;


	LIST_FOR_EACH(raid5_expand_shrink_list, iter, exp_vol) {
		if (exp_vol->region == region) {
			found = TRUE;
			break;
		}
	}
	if (found == FALSE) {
		LOG_CRITICAL("Internal Error, Could not find original volume "
			     "to unwind the interrupred expansion of region %s.\n",
			     region->name);
		rc = EINVAL;
		goto error_free;
	}
	
	/* Get EXPAND info : sector_mark */
	if (md_check_for_expand_shrink_in_progress(volume, &saved_member)) {
		info = saved_member->saved_info;
		sectors = info->sector_mark;
	} else {
		LOG_CRITICAL("%s: Internal error: No expand info.\n", region->name);
		rc = EINVAL;
		goto error_free;
	}
	
	if (!rc && sectors) {
		sprintf(msg, "RAID5 region %s failed to expand, restoring data...", region->name);
		rc = raid5_copy_data(exp_vol, volume, &sectors, FALSE, TRUE, msg);
		if (rc) {
			LOG_CRITICAL("Error restoring data after expand failure.\n");
			goto error_free;
		}
	}

	rc = md_zero_saved_info(saved_member, TRUE);
	if (rc) {
		goto error_free;
	}

	LIST_FOR_EACH(exp_vol->members, iter, member) {
		if (!md_volume_find_object(volume, member->obj)) {
			LOG_DEFAULT("Delete MD superblock on %s.\n", member->obj->name);
			md_zero_superblock(member, TRUE);
			md_remove_region_from_object(region, member->obj);
		}
	}
	
	/*
	 * Great!  Unwinding of incomplete expansion succeeded.
	 * - free the raid5 private data
	 * - free superblock
	 * - free "expanded" volume which was built during discovery
	 */

	EngFncs->remove_thing(raid5_expand_shrink_list, exp_vol);
	if (exp_vol->private_data)
		raid5_free_private_data(exp_vol);
	md_free_volume(exp_vol);
	
	volume->region_mgr_flags &= ~MD_RAID5_UNWIND_EXPANSION_PENDING;

	LOG_EXIT_INT(0);
	return 0;

error_free:
	
	EngFncs->remove_thing(raid5_expand_shrink_list, exp_vol);
	if (exp_vol->private_data)
		raid5_free_private_data(exp_vol);
	md_free_volume(exp_vol);

	volume->region_mgr_flags &= ~MD_RAID5_UNWIND_EXPANSION_PENDING;
	region->flags |= SOFLAG_CORRUPT;
	volume->flags |= MD_CORRUPT;
	
	LOG_EXIT_INT(rc);
	return rc;
}

static int raid5_do_shrink(storage_object_t *region)
{
	int rc = 0;
	int rc2;
	list_element_t iter;
	md_volume_t *volume;
	md_volume_t *org_vol;
	boolean found = FALSE;
	sector_count_t sectors;
	md_saved_info_t *info = NULL;
	md_member_t *saved_member = NULL;
	md_member_t *member;
	char msg[256];

	LOG_ENTRY();
	

	volume = (md_volume_t *)region->private_data;

	LIST_FOR_EACH(raid5_expand_shrink_list, iter, org_vol) {
		if (org_vol->region == region) {
			found = TRUE;
			break;
		}
	}
	if (found == FALSE) {
		LOG_CRITICAL("Internal Error, could not find original volume to shrink region %s.\n",
			     region->name);
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	if (volume->region_mgr_flags & MD_RAID5_RESUME_SHRINKING_PENDING) {
		/* Find the disk that has the saved shrink info */
		if (md_check_for_expand_shrink_in_progress(volume, &saved_member)) {
			info = saved_member->saved_info;
		} else {
			LOG_MD_BUG();
			LOG_EXIT_INT(EINVAL);
			return EINVAL;
		}
	} else {
		/* Create saved_info struct on the first disk to keep track of the shrink progress */
		info = EngFncs->engine_alloc(MD_SAVED_INFO_BYTES);
		if (!info) {
			LOG_EXIT_INT(ENOMEM);
			return ENOMEM;
		}
		saved_member = EngFncs->first_thing(volume->members, NULL);
		saved_member->saved_info = info;
	}

	if (info->flags & MD_SAVED_INFO_SHRINK_IN_PROGRESS) {
		/* resume shrinking */
		sectors = info->sector_mark;
	} else {
		sectors = md_volume_calc_size(volume);
	}
	
	info->flags |= MD_SAVED_INFO_SHRINK_IN_PROGRESS;
	info->sector_mark = sectors;
		
	/*
	 * The original volume structure contains the orignal set of child objects.
	 * The new (shrunk) volume structure contains the new set of child objects.
	 * Set up the bit map to keep track of removed objects.
	 */
	info->expand_shrink_cnt = 0;
	LIST_FOR_EACH(org_vol->members, iter, member) {
		if (!md_volume_find_object(volume, member->obj)) {
			info->expand_shrink_devs[info->expand_shrink_cnt] = member->dev_number;
			info->expand_shrink_cnt++;
		}
	}
	
	rc = md_write_saved_info(saved_member);
	if (rc) {
		LOG_EXIT_INT(rc);
		return rc;
	}
		
	/* Don't allow shrinking if the region is syncing */
	if (md_is_recovery_running(region) == TRUE) {
		LOG_ERROR("%s: Can't allow shrinking because the region is actively syncing.\n",
			  region->name);
		LOG_EXIT_INT(EBUSY);
		return(EBUSY);
	}
	
	sprintf(msg, "Shrinking RAID5 region %s...", region->name);
	LOG_DEBUG("%s (sectors=%"PRIu64")\n", msg, sectors);
	
	rc = raid5_copy_data(org_vol, volume, &sectors, FALSE, TRUE, msg);
	
	LOG_DEBUG("raid5_copy_data returned rc=%d, (sectors=%"PRIu64")\n", rc, sectors);
	
	/* Clear SHRINK flag  */
	info->flags &= ~MD_SAVED_INFO_SHRINK_IN_PROGRESS;
	rc2 = md_write_saved_info(saved_member);
	if (rc2) {
		LOG_EXIT_INT(rc2);
		return rc2;
	}

	switch (rc) {
	case 0:
		/* Delete superblocks */
		/* Delete superblocks */
		LIST_FOR_EACH(org_vol->members, iter, member) {
			if (!md_volume_find_object(volume, member->obj)) {
				md_remove_region_from_object(org_vol->region, member->obj);
				md_zero_superblock(member, TRUE);
				md_zero_saved_info(member, TRUE);
			}
		}
		raid5_free_private_data(org_vol);
		EngFncs->remove_thing(raid5_expand_shrink_list, org_vol);
		md_free_volume(org_vol);
		volume->region_mgr_flags &= ~MD_RAID5_SHRINK_PENDING;
		volume->flags &= ~MD_ARRAY_RESIZE_PENDING;
		break;
	default:
		/* Error, unwinding... */
		if (sectors) {
			sprintf(msg, "RAID5 region %s failed to shrink, restoring data...",
				region->name);
			rc2 = raid5_copy_data(volume, org_vol, &sectors, FALSE, TRUE, msg);
			if (rc2) {
				LOG_CRITICAL("Error restoring data after shrink failure.\n");
			}
		}
		
		raid5_free_private_data(volume);
		md_free_volume(volume);
		region->size = md_volume_calc_size(org_vol);
		region->private_data = org_vol;
		EngFncs->remove_thing(raid5_expand_shrink_list, org_vol);
		org_vol->region_mgr_flags &= ~MD_RAID5_SHRINK_PENDING;
		org_vol->flags &= ~MD_ARRAY_RESIZE_PENDING;
		break;
	}


	LOG_EXIT_INT(rc);
	return rc;
}

static int raid5_do_resume_shrinking(storage_object_t *region)
{
	int rc = 0;
	md_volume_t *volume;

	LOG_ENTRY();

	volume = (md_volume_t *)region->private_data;

	rc = raid5_do_shrink(region);
	if (rc) {
		volume = (md_volume_t *)region->private_data;
		region->flags |= SOFLAG_CORRUPT;
		volume->flags |= MD_CORRUPT;
	}
	
	volume->region_mgr_flags &= ~MD_RAID5_RESUME_SHRINKING_PENDING;

	LOG_EXIT_INT(rc);
	return rc;
}

/*
 * FUNCTION: raid5_commit_expand
 *
 * Steps to expand raid5 region:
 *	- update superblocks
 *	- expand the region (raid5_do_expand)
 *	- if expansion failed, rewrite superblocks
 */
static int raid5_commit_expand(storage_object_t *region)
{
	md_volume_t * volume = (md_volume_t *)region->private_data;
	logical_volume_t *evms_volume;
	md_volume_t * org_vol;
	boolean found = FALSE;
	list_element_t iter;
	md_member_t *member;
	int rc = 0;
	int rc2 = 0;

	LOG_ENTRY();
	
	/* Don't allow expansion if volume is mounted */
	if (EngFncs->is_offline(region, &evms_volume) == FALSE) {
		LOG_WARNING("Hmm... %s is mounted.\n", evms_volume->name);
		rc = EINVAL;
		goto out;
	}

	/* Don't allow expansion if region is active */
	if (region->flags & SOFLAG_ACTIVE) {
		LOG_WARNING("Hmm... %s is still active.\n", region->name);
		rc = EINVAL;
		goto out;
	}
	
	LIST_FOR_EACH(raid5_expand_shrink_list, iter, org_vol) {
		if (org_vol->region == region) {
			found = TRUE;
			break;
		}
	}
	if (found == FALSE) {
		LOG_CRITICAL("Internal Error,"
			     " could not find original volume to expand region %s.\n",
			     region->name);
		rc = EINVAL;
		goto out;
	}

	/* Assume success, new disks are now active and in sync */
	//raid_disk = volume->raid_disks;
	LIST_FOR_EACH(volume->members, iter, member) {
		if (!md_volume_find_object(org_vol, member->obj)) {
			//member->raid_disk = raid_disk;
			//raid_disk++;
			member->flags = (MD_MEMBER_DISK_ACTIVE | MD_MEMBER_DISK_SYNC);
			//volume->sb_func->set_device_state(member);
		}
	}

	volume->flags |= MD_DIRTY;
	volume->commit_flag |= MD_COMMIT_DONT_CHECK_ACTIVE;	
	rc = md_write_sbs_to_disk(volume);
	if (!rc) {
		rc = raid5_do_expand(region);
		if (rc) {
			/* If expansion failed, write back old superblocks */
			volume = (md_volume_t *)region->private_data;
			volume->flags |= MD_DIRTY;
			volume->commit_flag |= MD_COMMIT_DONT_CHECK_ACTIVE;
			rc2 = md_write_sbs_to_disk(volume);
		}
	}
out:
	region->flags &= ~SOFLAG_DIRTY;
	LOG_EXIT_INT(rc | rc2);
	return (rc | rc2);
}

/*
 * FUNCTION: raid5_commit_shrink
 *
 * Steps to shrink raid5 region:
 *	- shrink the region (raid5_do_shrink)
 *	- update superblocks
 */
static int raid5_commit_shrink(storage_object_t *region)
{
	md_volume_t * volume = (md_volume_t *)region->private_data;
	logical_volume_t *evms_volume;
	int rc = 0;
	
	LOG_ENTRY();
	
	/* Don't allow shrinking if volume is mounted */
	if (EngFncs->is_offline(region, &evms_volume) == FALSE) {
		region->flags &= ~SOFLAG_DIRTY;
		LOG_WARNING("Hmm... %s is mounted.\n", evms_volume->name);
		LOG_EXIT_INT(EINVAL);
		return(EINVAL);
	}
	
	/* Don't allow shrinking if region is active */
	if (region->flags & SOFLAG_ACTIVE) {
		LOG_WARNING("Hmm... %s is still active.\n", region->name);
		LOG_EXIT_INT(EINVAL);
		return(EINVAL);
	}
	
	rc = raid5_do_shrink(region);

	if (!rc) {
		/* update superblocks */
		volume->flags |= MD_DIRTY;
		rc = md_write_sbs_to_disk(volume);
	}
	LOG_EXIT_INT(rc);
	return (rc);
}

static int raid5_commit_unwind_expansion(storage_object_t *region)
{
	md_volume_t * volume = (md_volume_t *)region->private_data;
	int rc = 0;

	LOG_ENTRY();
	rc = raid5_do_unwind_expansion(region);
	if (!rc) {
		volume->flags |= MD_DIRTY;
		volume->commit_flag |= MD_COMMIT_DONT_CHECK_ACTIVE;
		rc = md_write_sbs_to_disk(volume);
	}

	region->flags &= ~SOFLAG_DIRTY;
	LOG_EXIT_INT(rc);
	return rc;
}

static int raid5_commit_resume_shrinking(storage_object_t *region)
{
	md_volume_t * volume = (md_volume_t *)region->private_data;
	int rc = 0;

	LOG_ENTRY();
	rc = raid5_do_resume_shrinking(region);
	if (!rc) {
		volume->flags |= MD_DIRTY;
		volume->commit_flag |= MD_COMMIT_DONT_CHECK_ACTIVE;
		rc = md_write_sbs_to_disk(volume);
	}

	region->flags &= ~SOFLAG_DIRTY;
	LOG_EXIT_INT(rc);
	return rc;
}


/* Function: raid5_commit_changes
 *
 * The phases:
 *	SETUP
 *	  - for actions required prior to superblocks update
 *	FIRST_METADATA_WRITE
 *	  - write MD superblocks, must have enabled MD_DIRTY flag
 *	SECOND_METADATA_WRITE
 *	  - not used
 *	POST_ACTIVATE
 *	  -  for actions required after superblocks update, _or_
 *	     for queued IOCTLs
 *	  -  reload superblocks via raid1_rediscover_region
 *	
 *	NOTE:  In order to get invoked for all phases of commit process,
 *		leave SOFLAG_DIRTY until the last phase (POST_ACTIVATE)
 */
static int raid5_commit_changes( storage_object_t * region,
				 uint               phase )
{
	md_volume_t * vol;
	int         rc = 0;
	int saved_rc;

	my_plugin = raid5_plugin;
	LOG_ENTRY();

	if (!region) {
		LOG_EXIT_INT(EFAULT);
		return EFAULT;
	}

	if (region->plugin != raid5_plugin) {
		LOG_ERROR("Region %s does not belong to MD.\n", region->name);
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}
	if (!(region->flags & SOFLAG_DIRTY)) {
		LOG_WARNING("Region %s is not dirty - not committing.\n", region->name);
		LOG_EXIT_INT(0);
		return 0;
	}
	
	vol = (md_volume_t *)region->private_data;
	if (!vol) {
		LOG_EXIT_INT(EFAULT);
		return EFAULT;
	}
	
	switch (phase) {
	case SETUP:
		rc = process_setup_funcs(region);
		break;
	case FIRST_METADATA_WRITE:
		kill_sectors();
		if (vol->region_mgr_flags & MD_RAID5_EXPAND_PENDING) {
			rc = raid5_commit_expand(region);
			break;
		}
		if (vol->region_mgr_flags & MD_RAID5_SHRINK_PENDING) {
			/* Will do re-stripping process in SECOND_METADATA_WRITE phase */
			break;
		}
		if (vol->region_mgr_flags & MD_RAID5_UNWIND_EXPANSION_PENDING) {
			rc = raid5_commit_unwind_expansion(region);
			break;
		}
		if (vol->region_mgr_flags & MD_RAID5_RESUME_SHRINKING_PENDING) {
			rc = raid5_commit_resume_shrinking(region);
			break;
		}
		
		if (vol->flags & MD_DIRTY) {
			rc = md_write_sbs_to_disk(vol);
			vol->flags &= ~MD_NEW_REGION;
		}
		break;
	case SECOND_METADATA_WRITE:
		if (vol->region_mgr_flags & MD_RAID5_SHRINK_PENDING) {
			rc = raid5_commit_shrink(region);
			break;
		}
		break;
	case POST_ACTIVATE:
		rc = process_md_ioctl_pkgs(region);
		free_ioctl_pkgs(vol);

		/* Despite rc, we will rediscover the MD array, save the return code */
		saved_rc = rc;
		rc = raid5_rediscover_region(region, TRUE);
		if (!rc) {
			/*
			 * The region was rediscovered, volume pointer is now invalid.
			 * The correct volume pointer is region->private_data.
			 */
			vol = (md_volume_t *)region->private_data;
			region->flags &= ~SOFLAG_DIRTY;
		} else {
			md_display_corrupt_messages(RAID5);
		}
		
		if (saved_rc != 0)
			rc = saved_rc;
		break;
	default	:
		break;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

static int raid5_can_activate_region(storage_object_t * region)
{
	md_volume_t   * volume = (md_volume_t *)region->private_data;
	my_plugin = raid0_plugin;
	LOG_ENTRY();

	if (volume->flags & MD_CORRUPT) {
		LOG_WARNING("MD region %s is corrupt.\n", volume->name);
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	LOG_EXIT_INT(0);
	return 0;
}

static int raid5_activate_region(storage_object_t * region)
{
	int rc;
	md_volume_t *volume;
	my_plugin = raid5_plugin;
	LOG_ENTRY();
	
	if (!region) {
		LOG_EXIT_INT(EFAULT);
		return EFAULT;
	}

	volume = (md_volume_t *)region->private_data;
	if (!volume) {
		LOG_EXIT_INT(EFAULT);
		return EFAULT;
	}

	rc = md_activate_region (region);

	LOG_EXIT_INT(rc);
	return rc;
}

static int raid5_can_deactivate_region(storage_object_t * region)
{
	LOG_ENTRY();
	
	LOG_EXIT_INT(0);
	return 0;
}

static int raid5_deactivate_region(storage_object_t * region)
{
	int rc=0;
	my_plugin = raid5_plugin;
	LOG_ENTRY();

	rc = md_deactivate_region (region);

	LOG_EXIT_INT(rc);
	return rc;
}



/* Function: raid5_get_option_count
 *
 *  Determine the type of Task that is being performed, and return
 *  the number of options that are available for that Task.
 */
static int raid5_get_option_count( task_context_t * task ) {
	int count = 0;

	my_plugin = raid5_plugin;
	LOG_ENTRY();

	switch (task->action) {
	case EVMS_Task_Create:
		count = MD_CREATE_OPTIONS_COUNT;
		break;
	case EVMS_Task_Expand:
		count = RAID5_EXPAND_OPTION_COUNT;
		break;
	case EVMS_Task_Shrink:
		count = RAID5_SHRINK_OPTION_COUNT;
		break;
	case MD_RAID5_FUNCTION_ADD_SPARE:
	case MD_RAID5_FUNCTION_REMOVE_SPARE:
	case MD_RAID5_FUNCTION_REMOVE_FAULTY:
	case MD_RAID5_FUNCTION_MARK_FAULTY:
	case MD_RAID5_FUNCTION_REMOVE_STALE:
		count = 0;
		break;

	default:
		count = 0;
		break;
	}

	LOG_EXIT_INT(count);
	return count;
}


/* Get the list of objects on the system that we can use. */
static int get_object_list( value_list_t ** value_list,
			    list_anchor_t   selected_objects,
			    sector_count_t  min_size) {

	int rc = 0;
	storage_object_t * object;
	list_anchor_t tmp_list;
	list_element_t li;
	int count, i;

	LOG_ENTRY();

	rc = EngFncs->get_object_list(DISK | SEGMENT | REGION,
				      DATA_TYPE,
				      NULL,
				      NULL,
				      VALID_INPUT_OBJECT,
				      &tmp_list);
	
	if (rc) {
		LOG_ERROR("Could not get available objects.\n");
		LOG_EXIT_INT(rc);
		return rc;
	}

	/*
	 * Loop through the selected objects, removing those objects from
	 * tmp_list.
	 */

	LIST_FOR_EACH(selected_objects, li, object) {
		LOG_DETAILS("Object %s selected, removing from spare list\n",object->name);
		EngFncs->remove_thing(tmp_list, object);
	}
	
	if (*value_list) {
		for (i = 0; i < (*value_list)->count; i++) {
			if ((*value_list)->value[i].s) {
				EngFncs->engine_free((*value_list)->value[i].s);
			}
		}
		EngFncs->engine_free(*value_list);
	}

	count = EngFncs->list_count(tmp_list);
	/* Increment count to hold the 'None' selection. */
	count++;
	*value_list = EngFncs->engine_alloc(count * sizeof(value_t) + sizeof(value_list_t));
	
	if (*value_list == NULL) {
		LOG_ERROR("No memory\n");
		LOG_EXIT_INT(ENOMEM);
		return ENOMEM;
	}


	if (!rc) {
		i = 0;
		(*value_list)->value[i].s = EngFncs->engine_strdup(RAID5_CREATE_NO_SELECTION);
		i++;
		LIST_FOR_EACH(tmp_list, li, object) {
			if (object->size >= min_size) {
				(*value_list)->value[i].s = EngFncs->engine_alloc(strlen(object->name) + 1);
				strcpy((*value_list)->value[i].s, object->name);
				i++;
			}
		}
		(*value_list)->count = i;
	}
	EngFncs->destroy_list(tmp_list);

	LOG_EXIT_INT(rc);
	return rc;
}


static int get_raid_level_list(value_list_t * * raid_level_list) {

	int rc = 0;

	LOG_ENTRY();

	*raid_level_list = EngFncs->engine_alloc(sizeof(value_list_t) + 2 * sizeof(value_t));

	if (*raid_level_list != NULL) {
		(*raid_level_list)->count = 2;

		(*raid_level_list)->value[0].s = EngFncs->engine_strdup(RAID4_LEVEL_NAME);
		(*raid_level_list)->value[1].s = EngFncs->engine_strdup(RAID5_LEVEL_NAME);

	} else {
		LOG_EXIT_INT(ENOMEM);
		return ENOMEM;
	}

	LOG_EXIT_INT(rc);
	return rc;
}


static int get_algorithm_list(value_list_t * * algorithm_list) {

	int rc = 0;

	LOG_ENTRY();

	*algorithm_list = EngFncs->engine_alloc(sizeof(value_list_t) + 4 * sizeof(value_t));

	if (*algorithm_list != NULL) {
		(*algorithm_list)->count = 4;

		(*algorithm_list)->value[ALGORITHM_LEFT_ASYMMETRIC].s = EngFncs->engine_strdup(ALGORITHM_LEFT_ASYMMETRIC_NAME);
		(*algorithm_list)->value[ALGORITHM_RIGHT_ASYMMETRIC].s = EngFncs->engine_strdup(ALGORITHM_RIGHT_ASYMMETRIC_NAME);
		(*algorithm_list)->value[ALGORITHM_LEFT_SYMMETRIC].s = EngFncs->engine_strdup(ALGORITHM_LEFT_SYMMETRIC_NAME);
		(*algorithm_list)->value[ALGORITHM_RIGHT_SYMMETRIC].s = EngFncs->engine_strdup(ALGORITHM_RIGHT_SYMMETRIC_NAME);

	} else {
		LOG_EXIT_INT(ENOMEM);
		return ENOMEM;
	}

	LOG_EXIT_INT(rc);
	return rc;
}



static int get_spare_disks(md_volume_t *vol, list_anchor_t spare_disks)
{
	int rc = 0;
	list_element_t li;
	list_element_t iter;
	md_member_t *member;

	LOG_ENTRY();

	/* Clear out the spare_disks list. */
	EngFncs->delete_all_elements(spare_disks);

	/*
	 * If there is only one spare and the array is in degrade mode, then
	 * the spare cannot be removed.
	 */
	if (vol->spare_disks <= 1) {
		if (vol->flags & MD_DEGRADED) {
			LOG_EXIT_INT(0);
			return 0;
		}
	}

	LIST_FOR_EACH(vol->members, iter, member) {
		if (member->obj &&
		    (member->flags & MD_MEMBER_DISK_SPARE) &&
		    !(member->flags & MD_MEMBER_NEW) ) {
			li = EngFncs->insert_thing(spare_disks,
						   member->obj,
						   INSERT_AFTER,
						   NULL);
			if (!li) {
				LOG_ERROR("Could not insert object into spare list.\n");
				rc = ENOMEM;
				break;
			}
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}


static int get_faulty_disks(md_volume_t *vol, list_anchor_t faulty_disks)
{
	int rc = 0;
	list_element_t li;
	list_element_t iter;
	md_member_t *member;

	LOG_ENTRY();

	EngFncs->delete_all_elements(faulty_disks);

	LIST_FOR_EACH(vol->members, iter, member) {
		if (member->obj &&
		    (member->flags & MD_MEMBER_DISK_FAULTY) &&
		    !(member->flags & MD_MEMBER_NEW) ) {
			li = EngFncs->insert_thing(faulty_disks,
						   member->obj,
						   INSERT_AFTER,
						   NULL);
			if (!li) {
				LOG_ERROR("Could not insert object into faulty list.\n");
				rc = ENOMEM;
				break;
			}
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

static int get_stale_disks(md_volume_t * vol, list_anchor_t stale_disks)
{

	int rc = 0;
	list_element_t li;
	list_element_t iter;
	md_member_t *member;

	LOG_ENTRY();

	EngFncs->delete_all_elements(stale_disks);
	LIST_FOR_EACH(vol->members, iter, member) {
		if (member->obj &&
		    (member->flags & MD_MEMBER_STALE)) {
			li = EngFncs->insert_thing(stale_disks, member->obj,
						   INSERT_AFTER, NULL);
			if (!li) {
				/* Log error, but continue */
				LOG_ERROR("Could not insert object to list.\n");
				rc = ENOMEM;
			}
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

static int get_active_disks(md_volume_t *vol, list_anchor_t active_disks)
{
	int rc = 0;
	list_element_t iter;
	md_member_t *member;
	list_element_t li=NULL;

	LOG_ENTRY();

	EngFncs->delete_all_elements(active_disks);
	LIST_FOR_EACH(vol->members, iter, member) {
		if (member->obj && md_member_is_raid_disk(member)) {
			li = EngFncs->insert_thing(active_disks, member->obj,
						   INSERT_AFTER, NULL);
			if (!li) {
				/* Log error, but continue */
				LOG_ERROR("Could not insert object to list.\n");
				rc = ENOMEM;
			}
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}


static void prune_small_objects(list_anchor_t objects, md_volume_t *vol)
{
	list_element_t iter1, iter2;
	storage_object_t * obj;
	sector_count_t size;
	raid5_conf_t *conf = mdvol_to_conf(vol);

	LOG_ENTRY();

	if (!conf) {
		LOG_MD_BUG();
		LOG_EXIT_VOID();
		return;
	}


	LIST_FOR_EACH_SAFE(objects, iter1, iter2, obj) {
		size = md_object_usable_size(obj, &vol->sb_ver, conf->chunksize);
		if (size < conf->size) {
			EngFncs->delete_element(iter1);
		}
	}

	LOG_EXIT_VOID();
}


/* Function: raid5_init_task
 *
 *  Determine the type of Task that is being performed, and set up the
 *  context structure with the appropriate initial values.
 */
static int raid5_init_task( task_context_t * context )
{
	int rc = 0;
	list_anchor_t tmp_list;
	list_element_t iter1, iter2;
	storage_object_t * obj;
	sector_count_t size;
	md_volume_t * volume;
	raid5_conf_t * conf;
	md_member_t *member;

	my_plugin = raid5_plugin;
	LOG_ENTRY();

	/* Parameter check */
	if (!context) {
		LOG_EXIT_INT(EFAULT);
		return EFAULT;
	}

	switch (context->action) {
	
	case EVMS_Task_Create:

		context->option_descriptors->count = MD_CREATE_OPTIONS_COUNT;
		
		// Version 1 Superblock Option
		if (md_can_create_sb_1() == TRUE) {
			context->option_descriptors->option[RAID5_CREATE_OPT_SB1_INDEX].flags = 0;
			context->min_selected_objects = RAID5_MIN_RAID_DISKS;
			context->max_selected_objects = MD_SB_1_DISKS;
		} else {
			context->option_descriptors->option[RAID5_CREATE_OPT_SB1_INDEX].flags = EVMS_OPTION_FLAGS_INACTIVE;
			context->min_selected_objects = RAID5_MIN_RAID_DISKS;
			context->max_selected_objects = MD_SB_DISKS;
		}
		context->option_descriptors->option[RAID5_CREATE_OPT_SB1_INDEX].constraint.list = NULL;
		context->option_descriptors->option[RAID5_CREATE_OPT_SB1_INDEX].constraint_type = EVMS_Collection_None;
		context->option_descriptors->option[RAID5_CREATE_OPT_SB1_INDEX].help = NULL;
		context->option_descriptors->option[RAID5_CREATE_OPT_SB1_INDEX].name =
			EngFncs->engine_strdup( RAID5_CREATE_OPT_SB1_NAME );
		context->option_descriptors->option[RAID5_CREATE_OPT_SB1_INDEX].tip =
			EngFncs->engine_strdup( _("Choose Yes if you want to create MD version 1 super block.") );
		context->option_descriptors->option[RAID5_CREATE_OPT_SB1_INDEX].title = EngFncs->engine_strdup( _("Version 1 Super Block") );
		context->option_descriptors->option[RAID5_CREATE_OPT_SB1_INDEX].type = EVMS_Type_Boolean;
		context->option_descriptors->option[RAID5_CREATE_OPT_SB1_INDEX].unit = EVMS_Unit_None;
		context->option_descriptors->option[RAID5_CREATE_OPT_SB1_INDEX].value.b = FALSE;

		/* Spare disk option */
		context->option_descriptors->option[RAID5_CREATE_OPT_SPARE_DISK_INDEX].flags = EVMS_OPTION_FLAGS_NOT_REQUIRED;
		/* Get the list of disks that can be spares. */
		get_object_list((value_list_t **)&context->option_descriptors->option[RAID5_CREATE_OPT_SPARE_DISK_INDEX].constraint.list,
				context->selected_objects,
				0);
		context->option_descriptors->option[RAID5_CREATE_OPT_SPARE_DISK_INDEX].constraint_type = EVMS_Collection_List;
		context->option_descriptors->option[RAID5_CREATE_OPT_SPARE_DISK_INDEX].help = NULL;
		context->option_descriptors->option[RAID5_CREATE_OPT_SPARE_DISK_INDEX].name = EngFncs->engine_strdup(RAID5_CREATE_OPT_SPARE_DISK_NAME );
		context->option_descriptors->option[RAID5_CREATE_OPT_SPARE_DISK_INDEX].min_len = 1;
		context->option_descriptors->option[RAID5_CREATE_OPT_SPARE_DISK_INDEX].max_len = EVMS_VOLUME_NAME_SIZE + 1;
		context->option_descriptors->option[RAID5_CREATE_OPT_SPARE_DISK_INDEX].tip = EngFncs->engine_strdup(_("Object to use as a spare disk in the array"));
		context->option_descriptors->option[RAID5_CREATE_OPT_SPARE_DISK_INDEX].title = EngFncs->engine_strdup(_("Spare Disk"));
		context->option_descriptors->option[RAID5_CREATE_OPT_SPARE_DISK_INDEX].type = EVMS_Type_String;
		context->option_descriptors->option[RAID5_CREATE_OPT_SPARE_DISK_INDEX].unit = EVMS_Unit_None;
		context->option_descriptors->option[RAID5_CREATE_OPT_SPARE_DISK_INDEX].value.s = EngFncs->engine_alloc(EVMS_VOLUME_NAME_SIZE + 1);

		/* Chunk size option */
		context->option_descriptors->option[RAID5_CREATE_OPT_CHUNK_SIZE_INDEX].flags = 0;
		SET_POWER2_LIST(context->option_descriptors->option[RAID5_CREATE_OPT_CHUNK_SIZE_INDEX].constraint.list, MD_MIN_CHUNK_SIZE, MD_MAX_CHUNK_SIZE);
		context->option_descriptors->option[RAID5_CREATE_OPT_CHUNK_SIZE_INDEX].constraint_type = EVMS_Collection_List;
		context->option_descriptors->option[RAID5_CREATE_OPT_CHUNK_SIZE_INDEX].help = NULL;
		context->option_descriptors->option[RAID5_CREATE_OPT_CHUNK_SIZE_INDEX].name = EngFncs->engine_strdup(RAID5_CREATE_OPT_CHUNK_SIZE_NAME );
		context->option_descriptors->option[RAID5_CREATE_OPT_CHUNK_SIZE_INDEX].tip = EngFncs->engine_strdup(_("Size of the chunks in the RAID array"));
		context->option_descriptors->option[RAID5_CREATE_OPT_CHUNK_SIZE_INDEX].title = EngFncs->engine_strdup(_("Chunk size"));
		context->option_descriptors->option[RAID5_CREATE_OPT_CHUNK_SIZE_INDEX].type = EVMS_Type_Unsigned_Int32;
		context->option_descriptors->option[RAID5_CREATE_OPT_CHUNK_SIZE_INDEX].unit = EVMS_Unit_Kilobytes;
		context->option_descriptors->option[RAID5_CREATE_OPT_CHUNK_SIZE_INDEX].value.ui32 = MD_DEFAULT_CHUNK_SIZE;

		/* RAID level option */
		context->option_descriptors->option[RAID5_CREATE_OPT_RAID_LEVEL_INDEX].flags = 0;
		get_raid_level_list(&context->option_descriptors->option[RAID5_CREATE_OPT_RAID_LEVEL_INDEX].constraint.list);
		context->option_descriptors->option[RAID5_CREATE_OPT_RAID_LEVEL_INDEX].constraint_type = EVMS_Collection_List;
		context->option_descriptors->option[RAID5_CREATE_OPT_RAID_LEVEL_INDEX].help = NULL;
		context->option_descriptors->option[RAID5_CREATE_OPT_RAID_LEVEL_INDEX].name = EngFncs->engine_strdup(RAID5_CREATE_OPT_RAID_LEVEL_NAME);
		context->option_descriptors->option[RAID5_CREATE_OPT_RAID_LEVEL_INDEX].min_len = 5;
		context->option_descriptors->option[RAID5_CREATE_OPT_RAID_LEVEL_INDEX].max_len = 19;
		context->option_descriptors->option[RAID5_CREATE_OPT_RAID_LEVEL_INDEX].tip = EngFncs->engine_strdup(_("RAID Level -- RAID4 or RAID5"));
		context->option_descriptors->option[RAID5_CREATE_OPT_RAID_LEVEL_INDEX].title = EngFncs->engine_strdup(_("RAID level"));
		context->option_descriptors->option[RAID5_CREATE_OPT_RAID_LEVEL_INDEX].type = EVMS_Type_String;
		context->option_descriptors->option[RAID5_CREATE_OPT_RAID_LEVEL_INDEX].unit = EVMS_Unit_None;
		context->option_descriptors->option[RAID5_CREATE_OPT_RAID_LEVEL_INDEX].value.s = EngFncs->engine_strdup(RAID5_LEVEL_NAME);

		/* Parity algorithm option */
		context->option_descriptors->option[RAID5_CREATE_OPT_PARITY_ALGORITHM_INDEX].flags = 0;
		get_algorithm_list(&context->option_descriptors->option[RAID5_CREATE_OPT_PARITY_ALGORITHM_INDEX].constraint.list);
		context->option_descriptors->option[RAID5_CREATE_OPT_PARITY_ALGORITHM_INDEX].constraint_type = EVMS_Collection_List;
		context->option_descriptors->option[RAID5_CREATE_OPT_PARITY_ALGORITHM_INDEX].help = NULL;
		context->option_descriptors->option[RAID5_CREATE_OPT_PARITY_ALGORITHM_INDEX].name = EngFncs->engine_strdup(RAID5_CREATE_OPT_PARITY_ALGORITHM_NAME);
		context->option_descriptors->option[RAID5_CREATE_OPT_PARITY_ALGORITHM_INDEX].min_len = 5;
		context->option_descriptors->option[RAID5_CREATE_OPT_PARITY_ALGORITHM_INDEX].max_len = 19;
		context->option_descriptors->option[RAID5_CREATE_OPT_PARITY_ALGORITHM_INDEX].tip = EngFncs->engine_strdup(_("RAID5 Parity algorithm"));
		context->option_descriptors->option[RAID5_CREATE_OPT_PARITY_ALGORITHM_INDEX].title = EngFncs->engine_strdup(_("RAID5 Algorithm"));
		context->option_descriptors->option[RAID5_CREATE_OPT_PARITY_ALGORITHM_INDEX].type = EVMS_Type_String;
		context->option_descriptors->option[RAID5_CREATE_OPT_PARITY_ALGORITHM_INDEX].unit = EVMS_Unit_None;
		context->option_descriptors->option[RAID5_CREATE_OPT_PARITY_ALGORITHM_INDEX].value.s = EngFncs->engine_alloc(20);
		if (context->option_descriptors->option[RAID5_CREATE_OPT_PARITY_ALGORITHM_INDEX].value.s != NULL) {
			strcpy(context->option_descriptors->option[RAID5_CREATE_OPT_PARITY_ALGORITHM_INDEX].value.s, ALGORITHM_LEFT_SYMMETRIC_NAME);
		} else {
			LOG_EXIT_INT(ENOMEM);
			return ENOMEM;
		}

		/* Get a list of all valid input disks, segments, and regions. */
		EngFncs->get_object_list(DISK | SEGMENT | REGION,
					 DATA_TYPE,
					 NULL,
					 NULL,
					 VALID_INPUT_OBJECT,
					 &tmp_list);

		/* Move these items to the acceptable objects list. */
		md_transfer_list(tmp_list, context->acceptable_objects);
		EngFncs->destroy_list(tmp_list);

		break;
		
	case EVMS_Task_Expand:
		/* No options on expand */
		volume = (md_volume_t *) context->object->private_data;
		conf = mdvol_to_conf(volume);
		context->option_descriptors->count = RAID5_EXPAND_OPTION_COUNT;

		// get a list of all valid input disks, segments, and regions.
		EngFncs->get_object_list(DISK | SEGMENT | REGION,
					 DATA_TYPE,
					 NULL,
					 context->object->disk_group,
					 VALID_INPUT_OBJECT | NO_DISK_GROUP,
					 &tmp_list);


		// remove this region from the list
		EngFncs->remove_thing(tmp_list, context->object);
		
		// remove small objects from the list
		LIST_FOR_EACH_SAFE(tmp_list, iter1, iter2, obj) {
			size = md_object_usable_size(obj, &volume->sb_ver, conf->chunksize);
			if (size < conf->size) {
				EngFncs->delete_element(iter1);
			}
		}
		
		/* Remove all parents of this MD region from acceptable list */
		remove_parent_regions_from_list(tmp_list, context->object);

		// move these items to the acceptable objects list.
		md_transfer_list(tmp_list, context->acceptable_objects);

		EngFncs->destroy_list(tmp_list);

		context->min_selected_objects = 1;
		context->max_selected_objects = MAX_DISKS(volume) - conf->raid_disks;
		break;

	case EVMS_Task_Shrink:
		/* No options on shrink */
		context->option_descriptors->count = RAID5_SHRINK_OPTION_COUNT;
		volume = (md_volume_t *) context->object->private_data;
		conf = mdvol_to_conf(volume);

		LIST_FOR_EACH(volume->members, iter1, member) {
			if (md_member_is_raid_disk(member)) {
				EngFncs->insert_thing(context->acceptable_objects, member->obj, INSERT_AFTER, NULL);
			}
		}

		context->min_selected_objects = 1;
		context->max_selected_objects = (conf->raid_disks - RAID5_MIN_RAID_DISKS);

		break;

	case MD_RAID5_FUNCTION_ADD_SPARE:
		{
			volume = (md_volume_t *) context->object->private_data;
			conf = mdvol_to_conf(volume);

			context->min_selected_objects = 1;
			context->max_selected_objects = MAX_DISKS(volume) - volume->nr_disks;
			context->option_descriptors->count = 0;

			rc = EngFncs->get_object_list(DISK | SEGMENT | REGION,
						      DATA_TYPE,
						      NULL,
						      context->object->disk_group,
						      VALID_INPUT_OBJECT | NO_DISK_GROUP,
						      &tmp_list);

			if (rc == 0) {
				//prune_small_object_parms_t parms;

				/*
				 * If this MD region is available, it will
				 * appear in the list.  Bad things happen if
				 * this region is made a child of itself.
				 * Remove this MD region if it is in the list.
				 */
				EngFncs->remove_thing(tmp_list, context->object);

				prune_small_objects(tmp_list, volume);
				
				/* Remove all parents of this MD region for */
				remove_parent_regions_from_list(tmp_list, context->object);

				// move these items to the acceptable objects list.
				md_transfer_list(tmp_list, context->acceptable_objects);
				EngFncs->destroy_list(tmp_list);					
			}
		}
		break;

	case MD_RAID5_FUNCTION_REMOVE_SPARE:
		volume = (md_volume_t *) context->object->private_data;
		conf = mdvol_to_conf(volume);

		context->min_selected_objects = 1;
		context->max_selected_objects = -1;
		context->option_descriptors->count = 0;

		rc = get_spare_disks(volume, context->acceptable_objects);
		break;

	case MD_RAID5_FUNCTION_REMOVE_FAULTY:
		volume = (md_volume_t *) context->object->private_data;
		conf = mdvol_to_conf(volume);

		context->min_selected_objects = 1;
		context->max_selected_objects = -1;
		context->option_descriptors->count = 0;

		rc = get_faulty_disks(volume, context->acceptable_objects);
		break;

	case MD_RAID5_FUNCTION_REMOVE_STALE:
		volume = (md_volume_t *) context->object->private_data;

			context->min_selected_objects = 1;
			context->max_selected_objects = -1;	
			context->option_descriptors->count = 0;

			rc = get_stale_disks(volume, context->acceptable_objects);
		break;

	case MD_RAID5_FUNCTION_MARK_FAULTY:
		volume = (md_volume_t *) context->object->private_data;
		conf = mdvol_to_conf(volume);

		context->min_selected_objects = 1;
		context->max_selected_objects = 1;
		context->option_descriptors->count = 0;

		rc = get_active_disks(volume, context->acceptable_objects);
		break;

	default:
		rc = EINVAL;
		break;
	}

	LOG_EXIT_INT(rc);
	return rc;
}


#define PERCENT_WARN_THRESHOLD  5

static void issue_warning_big_obj(storage_object_t *obj, u_int64_t diffsize)
{
	int answer = 0;
	char * choice_text[2] = { _("OK"), NULL };
	char number_buffer[64];

	sprintf(number_buffer, "%"PRIu64, diffsize * EVMS_VSECTOR_SIZE / (1024*1024) );
	
	QUESTION(&answer, choice_text,
		_("The %s object is %s MB larger than the smallest object in the RAID array.  "
		  "The extra space will not be used.\n"),
		obj->name, number_buffer);
}

static void warn_if_big_objects( task_context_t * context )
{
	storage_object_t * obj = NULL;
	storage_object_t * spare = NULL;
	u_int64_t smallest_size = -1;
	list_element_t li;
	md_sb_ver_t sb_ver = {MD_SB_VER_0, 90, 0};
	int chunksize = MD_DEFAULT_CHUNK_SIZE >> EVMS_VSECTOR_SIZE_SHIFT;

	LOG_ENTRY();

	if (context->option_descriptors->option[RAID5_CREATE_OPT_SB1_INDEX].value.b == TRUE) {
		sb_ver.major_version = MD_SB_VER_1;
		sb_ver.minor_version = 0;
	}

	chunksize = context->option_descriptors->option[RAID5_CREATE_OPT_CHUNK_SIZE_INDEX].value.ui32 * 2;

	/* Find the smallest object. */
	LIST_FOR_EACH(context->selected_objects, li, obj) {
		smallest_size = min(smallest_size, md_object_usable_size(obj, &sb_ver, chunksize));
	}

	/*
	 * If we got a smallest size, then check the size of the spare, if one
	 * is specified and see if it is the smallest.
	 */
	if (smallest_size != -1) {
		if (context->option_descriptors->option[RAID5_CREATE_OPT_SPARE_DISK_INDEX].value.s != NULL) {
			spare = md_find_valid_input_object(
				context->option_descriptors->option[RAID5_CREATE_OPT_SPARE_DISK_INDEX].value.s);

			if (spare != NULL) {
				smallest_size = min(smallest_size, md_object_usable_size(spare, &sb_ver, chunksize));
			}
		}
	}

	/*
	 * Now go through the objects again and issue a warnign message for
	 * any object whose size exceeds the threshold over the smallest
	 * object size.
	 */
	if (smallest_size != -1) {
		u_int64_t diffsize;

		LIST_FOR_EACH(context->selected_objects, li, obj) {
			diffsize = md_object_usable_size(obj, &sb_ver, chunksize) - smallest_size;

			if (diffsize > (smallest_size * PERCENT_WARN_THRESHOLD) / 100) {
				issue_warning_big_obj(obj, diffsize);
			}
		}

		/*
		 * If we have a spare, check its size too.
		 */
		if (spare != NULL) {
			diffsize = md_object_usable_size(spare, &sb_ver, chunksize) - smallest_size;

			if (diffsize > (smallest_size * PERCENT_WARN_THRESHOLD) / 100) {
				issue_warning_big_obj(spare, diffsize);
			}
		}
	}

	LOG_EXIT_VOID();
}

/*
 * FUNCTION: warn_if_big_new_objects
 *
 * This function is similiar to warn_if_big_objects().  This function should be
 * used when adding new objects to an existing RAID5 region.
 */
static void warn_if_big_new_objects( md_volume_t *volume, list_anchor_t objects)
{
	u_int64_t size;
	u_int64_t diffsize;
	list_element_t li;
	storage_object_t *obj;
	raid5_conf_t *conf = mdvol_to_conf(volume);
	
	LOG_ENTRY();

	if (!conf) {
		LOG_MD_BUG();
		LOG_EXIT_VOID();
		return;
	}

	LIST_FOR_EACH(objects, li, obj) {
		size = md_object_usable_size(obj, &volume->sb_ver, conf->chunksize);
		if (size > conf->size) {
			diffsize = size - conf->size;
			if (diffsize > (conf->size * PERCENT_WARN_THRESHOLD) / 100) {
				issue_warning_big_obj(obj, diffsize);
			}
		}
	}
	LOG_EXIT_VOID();
}

/* Function: raid5_set_option
 *
 *  Determine the type of Task that is being performed. Then examine the
 *  desired option (using the index), and verify that the given value is
 *  appropriate. Reset the value if necessary and possible. Adjust other
 *  options as appropriate.
 */
static int raid5_set_option( task_context_t * context,
			     u_int32_t        index,
			     value_t        * value,
			     task_effect_t  * effect ) {
	int rc = 0;

	my_plugin = raid5_plugin;
	LOG_ENTRY();

	/* Parameter check */
	if (!context || !value || !effect) {
		LOG_EXIT_INT(EFAULT);
		return EFAULT;
	}

	switch (context->action) {
	
	case EVMS_Task_Create:
		switch (index) {
		case RAID5_CREATE_OPT_SB1_INDEX:
			context->option_descriptors->option[index].value.b = value->b;
			if (value->b == TRUE) {
				context->max_selected_objects = MD_SB_1_DISKS;
			} else {
				context->max_selected_objects = MD_SB_DISKS;
			}
			break;
		
		case RAID5_CREATE_OPT_SPARE_DISK_INDEX:
			/*
			 * Not worth validation, will catch when we try to find
			 * the original.
			 */
			strcpy(context->option_descriptors->option[index].value.s, value->s);
			warn_if_big_objects(context);
			break;

		case RAID5_CREATE_OPT_CHUNK_SIZE_INDEX:
			if ((value->ui32 < MD_MIN_CHUNK_SIZE) ||
			    (value->ui32 > MD_MAX_CHUNK_SIZE)) {
				/* Chunk size is out of bounds. */
				rc = EINVAL;

			} else {
				/*
				 * Chunk size must be a power of 2.
				 * calc_log2 returns -1 if the number is not a
				 * power of 2.
				 */
				if (calc_log2((long) value->ui32) == -1) {
					rc = EINVAL;
				}
			}
			if (rc == 0) {
				context->option_descriptors->option[index].value.ui32 = value->ui32;
			}
			break;

		case RAID5_CREATE_OPT_RAID_LEVEL_INDEX:
			if (strcmp(value->s, RAID4_LEVEL_NAME) == 0) {
				strcpy(context->option_descriptors->option[index].value.s, value->s);
				/*
				 * RAID4 does not have a parity algorithm.
				 * Disable the algorithm option.
				 */
				context->option_descriptors->option[RAID5_CREATE_OPT_PARITY_ALGORITHM_INDEX].flags |= (EVMS_OPTION_FLAGS_NOT_REQUIRED | EVMS_OPTION_FLAGS_INACTIVE);

				*effect |= EVMS_Effect_Reload_Options;

			} else if (strcmp(value->s, RAID5_LEVEL_NAME) == 0) {
				strcpy(context->option_descriptors->option[index].value.s, value->s);
				/*
				 * RAID5 does have a parity algorithm.
				 * Make sure the algorthm option is active and required.
				 */
				context->option_descriptors->option[RAID5_CREATE_OPT_PARITY_ALGORITHM_INDEX].flags &= ~(EVMS_OPTION_FLAGS_NOT_REQUIRED | EVMS_OPTION_FLAGS_INACTIVE);

				*effect |= EVMS_Effect_Reload_Options;

			} else {
				rc = EINVAL;
			}
			break;

		case RAID5_CREATE_OPT_PARITY_ALGORITHM_INDEX:
			if ((strcmp(value->s,ALGORITHM_LEFT_ASYMMETRIC_NAME) == 0)  ||
			    (strcmp(value->s,ALGORITHM_RIGHT_ASYMMETRIC_NAME) == 0) ||
			    (strcmp(value->s,ALGORITHM_LEFT_SYMMETRIC_NAME) == 0)   ||
			    (strcmp(value->s,ALGORITHM_RIGHT_SYMMETRIC_NAME) == 0) ) {
				strcpy(context->option_descriptors->option[index].value.s, value->s);

			} else {
				rc = EINVAL;
			}

			break;

		default:
			break;
		}

		break;

	default:
		break;
	}
	LOG_EXIT_INT(rc);
	return rc;
}

static int raid5_set_expand_object(
	task_context_t * context,
	list_anchor_t declined_objects,
	task_effect_t * effect )
{
	int rc = 0;
	md_volume_t *volume = (md_volume_t *)context->object->private_data;
	
	LOG_ENTRY();
	
	warn_if_big_new_objects(volume, context->selected_objects);

	LOG_EXIT_INT(rc);
	return rc;
}

/*
 * FUNCTION: raid5_set_shrink_object
 *
 * 	Maintain a minimum 3-disk RAID5 region.
 */
static int raid5_set_shrink_object(
	task_context_t * context,
	list_anchor_t declined_objects,
	task_effect_t * effect )
{
	int rc = 0;
	int rc2;
	md_volume_t *vol = (md_volume_t *)context->object->private_data;
	raid5_conf_t *conf;
	storage_object_t *obj;
	list_anchor_t my_list = NULL;
	list_anchor_t decline_list = NULL;
	list_element_t li, iter;
	declined_object_t *declined_obj;
	sector_count_t shrink_size, smallest;

	LOG_ENTRY();

	decline_list = EngFncs->allocate_list();
	my_list = EngFncs->allocate_list();
	if (!decline_list || !my_list) {
		rc = ENOMEM;
		LOG_EXIT_INT(rc);
		return rc;
	}
	

	conf = mdvol_to_conf(vol);
	if ((conf->raid_disks - EngFncs->list_count(context->selected_objects))
			< RAID5_MIN_RAID_DISKS)
	{
		/* Decline all objects */
		md_transfer_list(context->selected_objects, decline_list);
		goto decline_objects;
	}

	/*
	 * Transfer all selected objects to local list (my_list).
	 * If we can shrink, we will transfer the objects back to selected list.
	 * If we cannot shrink, the selected object list will be empty.
	 */
	md_transfer_list(context->selected_objects, my_list);

	/* Calculate how much the region would be shrunk by */
	smallest = conf->size;

try_again:
	shrink_size = EngFncs->list_count(my_list) * smallest;
	
	/* Ask the engine if it's ok to remove these objects. */
	rc2 = EngFncs->can_shrink_by(context->object, &shrink_size);
	switch (rc2) {
		case 0:
			/* Transfer all remaining objects back to selected objects list. */
			md_transfer_list(my_list, context->selected_objects);
			goto decline_objects;
			break;

		case EAGAIN:
			/*
			 * Remove the last object from the list.
			 * Append this object to the decline list.
			 * If the list cound is not empty, try again.
			 */
			obj = EngFncs->last_thing(my_list, &li);
			EngFncs->delete_element(li);
			EngFncs->insert_thing(decline_list, obj, INSERT_AFTER, NULL);
			if (EngFncs->list_count(my_list) == 0) {
				/* Decline all objects */
				goto decline_objects;
			}
			goto try_again;
			break;

		default:
			/* Some other error has occurred, decline all objects */
			md_transfer_list(my_list, decline_list);
			break;
	}
	

decline_objects:

	/* Decline objects in decline list */
	LIST_FOR_EACH(decline_list, iter, obj) {
		declined_obj = EngFncs->engine_alloc(sizeof(declined_object_t));
		if (!declined_obj) {
			rc = ENOMEM; /* Set error code, but continue looping */
		} else {
			declined_obj->object = obj;
			declined_obj->reason = EINVAL;
			EngFncs->insert_thing(declined_objects, declined_obj, INSERT_AFTER, NULL);
		}
	}

	EngFncs->destroy_list(decline_list);
	EngFncs->destroy_list(my_list);

	*effect |= EVMS_Effect_Reload_Objects;

	LOG_EXIT_INT(rc);
	return rc;
}


/* Function: raid5_set_objects
 *
 *  Determine the type of task, and then validate that the objects on the
 *  "selected" list are valid for that task. If so, adjust the option
 *  descriptor as appropriate.
 */
static int raid5_set_objects( task_context_t * context,
			      list_anchor_t          declined_objects,
			      task_effect_t  * effect ) {
	int rc = 0;
	uint count = 0;
	storage_object_t * obj;
	list_element_t li;
	declined_object_t * dec_obj;
	md_volume_t * volume = NULL;
	raid5_conf_t *conf;

	my_plugin = raid5_plugin;
	LOG_ENTRY();

	/* Parameter check */
	if (!context || !declined_objects || !effect) {
		LOG_EXIT_INT(EFAULT);
		return EFAULT;
	}

	count = EngFncs->list_count(context->selected_objects);

	switch (context->action) {
	
	case EVMS_Task_Create:
		get_object_list((value_list_t **)&context->option_descriptors->option[RAID5_CREATE_OPT_SPARE_DISK_INDEX].constraint.list,
				context->selected_objects,
				0);
		warn_if_big_objects(context);
		*effect |= EVMS_Effect_Reload_Options;
		break;

	case EVMS_Task_Expand:
		raid5_set_expand_object( context, declined_objects, effect);
		break;
	
	case EVMS_Task_Shrink:
		raid5_set_shrink_object( context, declined_objects, effect);
		break;

		/*
		 * The Engine makes sure that only available objects appear in the
		 * selected_objects list.
		 */
	case MD_RAID5_FUNCTION_ADD_SPARE:
		/* Make sure too many objects are not selected. */
		volume = (md_volume_t *) context->object->private_data;
		conf = mdvol_to_conf(volume);

		if (count > (MAX_DISKS(volume) - volume->nr_disks)) {
			LOG_ERROR("Can only specify up to %d object(s) to add as spare(s).\n",
				  MAX_DISKS(volume) - volume->nr_disks);
			rc = EINVAL;
		}
		warn_if_big_new_objects(volume, context->selected_objects);
		break;

	case MD_RAID5_FUNCTION_REMOVE_SPARE:
		volume = (md_volume_t *) context->object->private_data;
		conf = mdvol_to_conf(volume);

		/*
		 * If the array is running in degrade mode, make sure at
		 * least one spare is available.
		 */
		if (count &&
		    (conf->active_disks < conf->raid_disks)) {

			MESSAGE(_("At least one spare object must be left for recovering degraded array %s.\n"),
				volume->region->name);

			/*
			 * One spare must be left.
			 * Reject the last object(s) in
			 * the list.
			 */

			obj = EngFncs->last_thing(context->selected_objects, &li);
			EngFncs->delete_element(li);
				
			dec_obj = EngFncs->engine_alloc(sizeof(declined_object_t));

			if (dec_obj != NULL) {

				dec_obj->object = obj;
				dec_obj->reason = EPERM;

				li = EngFncs->insert_thing(declined_objects,
							   dec_obj,
							   INSERT_AFTER,
							   NULL);

				if (!li) {
					LOG_CRITICAL("Could not insert into declined object list.\n");
					rc = ENOMEM;
				}
			} else {
				LOG_CRITICAL("Error allocating memory for a declined object.\n");
				rc = ENOMEM;
			}
		}
		break;

	case MD_RAID5_FUNCTION_REMOVE_FAULTY:
	case MD_RAID5_FUNCTION_REMOVE_STALE:
		/* No additional checks needed. */
		break;

	case MD_RAID5_FUNCTION_MARK_FAULTY:
		/* Verify that only one object is selected. */
		if (count > 1) {
			LOG_ERROR("Must select only one object to be marked faulty.\n");
			rc = EINVAL;
		}
		break;

	default:
		LOG_ERROR("%d is not a valid task action.\n", context->action);
		rc = EINVAL;
		break;
	}
	LOG_EXIT_INT(rc);
	return rc;
}


/* Function: raid5_get_info
 *
 *  Return MD-specific information about the specified region. If the
 *  name field is set, only return the "extra" information pertaining
 *  to that name.
 */
static int raid5_get_info( storage_object_t       * region,
			   char                   * name,
			   extended_info_array_t ** info_array ) {

	md_volume_t * volume = NULL;
	int           rc= 0;

	my_plugin = raid5_plugin;
	LOG_ENTRY();

	/* Parameter check */
	if (!info_array) {
		LOG_EXIT_INT(EFAULT);
		return EFAULT;
	}

	/* Make sure this is an MD RAID5 region */
	if (region->plugin != raid5_plugin) {
		LOG_ERROR("Region %s is not owned by MD RAID5\n", region->name);
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	volume = region->private_data;

	rc = md_get_info(volume, name, info_array);

	LOG_EXIT_INT(rc);
	return rc;
}



/* Function: raid5_get_plugin_info
 *
 *  Return information about the MD plugin. There is no "extra"
 *  information about MD, so "name" should always be NULL.
 */
static int raid5_get_plugin_info( char                     * name,
				  extended_info_array_t   ** info_array ) {

	extended_info_array_t   * info = NULL;
	char buffer[50] = {0};
	int i = 0;

	my_plugin = raid5_plugin;
	LOG_ENTRY();

	/* Parameter check */
	if (!info_array) {
		LOG_EXIT_INT(EFAULT);
		return EFAULT;
	}

	if (!name) {
		/* Get memory for the info array. */
		if (!(info = EngFncs->engine_alloc(sizeof(extended_info_array_t) + sizeof(extended_info_t)*6))) {
			LOG_ERROR("Error allocating memory for info array\n");
			LOG_EXIT_INT(ENOMEM);
			return ENOMEM;
		}

		info->info[i].name = EngFncs->engine_strdup("ShortName");
		info->info[i].title = EngFncs->engine_strdup(_("Short Name"));
		info->info[i].desc = EngFncs->engine_strdup(_("A short name given to this plug-in"));
		info->info[i].type = EVMS_Type_String;
		info->info[i].value.s = EngFncs->engine_strdup(raid5_plugin->short_name);
		i++;

		info->info[i].name = EngFncs->engine_strdup("LongName");
		info->info[i].title = EngFncs->engine_strdup(_("Long Name"));
		info->info[i].desc = EngFncs->engine_strdup(_("A longer, more descriptive name for this plug-in"));
		info->info[i].type = EVMS_Type_String;
		info->info[i].value.s = EngFncs->engine_strdup(raid5_plugin->long_name);
		i++;

		info->info[i].name = EngFncs->engine_strdup("Type");
		info->info[i].title = EngFncs->engine_strdup(_("Plug-in Type"));
		info->info[i].desc = EngFncs->engine_strdup(_("There are various types of plug-ins, each responsible for some kind of storage object or logical volume."));
		info->info[i].type = EVMS_Type_String;
		info->info[i].value.s = EngFncs->engine_strdup("Region Manager");
		i++;

		info->info[i].name = EngFncs->engine_strdup("Version");
		info->info[i].title = EngFncs->engine_strdup(_("Plug-in Version"));
		info->info[i].desc = EngFncs->engine_strdup(_("This is the version number of the plug-in."));
		info->info[i].type = EVMS_Type_String;
		snprintf(buffer, 50, "%d.%d.%d", MAJOR_VERSION, MINOR_VERSION, PATCH_LEVEL);
		info->info[i].value.s = EngFncs->engine_strdup(buffer);
		i++;

		// Required Engine Services Version
		info->info[i].name = EngFncs->engine_strdup("Required_Engine_Version");
		info->info[i].title = EngFncs->engine_strdup(_("Required Engine Services Version"));
		info->info[i].desc = EngFncs->engine_strdup(_("This is the version of the Engine services that this plug-in requires.  "
							      "It will not run on older versions of the Engine services."));
		info->info[i].type = EVMS_Type_String;
		snprintf(buffer, 50, "%d.%d.%d", raid5_plugin->required_engine_api_version.major, raid5_plugin->required_engine_api_version.minor, raid5_plugin->required_engine_api_version.patchlevel);
		info->info[i].value.s = EngFncs->engine_strdup(buffer);
		i++;

		// Required Plug-in API Version
		info->info[i].name = EngFncs->engine_strdup("Required_Plugin_Version");
		info->info[i].title = EngFncs->engine_strdup(_("Required Plug-in API Version"));
		info->info[i].desc = EngFncs->engine_strdup(_("This is the version of the Engine plug-in API that this plug-in requires.  "
							      "It will not run on older versions of the Engine plug-in API."));
		info->info[i].type = EVMS_Type_String;
		snprintf(buffer, 50, "%d.%d.%d", raid5_plugin->required_plugin_api_version.plugin.major, raid5_plugin->required_plugin_api_version.plugin.minor, raid5_plugin->required_plugin_api_version.plugin.patchlevel);
		info->info[i].value.s = EngFncs->engine_strdup(buffer);
		i++;

	} else {
		LOG_ERROR("No support for extra plugin information about \"%s\"\n", name);
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	info->count = i;
	*info_array = info;
	LOG_EXIT_INT(0);
	return 0;
}


static int reconstruct_chunk(raid5_conf_t * conf, stripe_t * stripe, unsigned int dev_index) {

	int rc = 0;
	int i;
	int count;
	xorblock_t xorblock;

	LOG_ENTRY();

	xorblock.buf[0] = stripe->chunks[dev_index].data;

	for (i = 0, count = 1; i < conf->raid_disks; i++) {
		if (i == dev_index) {
			continue;
		}

		xorblock.buf[count] = stripe->chunks[i].data;

		count++;
		if (count == MAX_XOR_BLOCKS) {
			xor_block(count, &xorblock, conf->chunksize << EVMS_VSECTOR_SIZE_SHIFT);

			count = 1;
		}
	}

	if (count > 1) {
		xor_block(count, &xorblock, conf->chunksize << EVMS_VSECTOR_SIZE_SHIFT);
	}

	LOG_EXIT_INT(rc);
	return rc;
}


static void free_stripe_data(stripe_t * stripe)
{
	int i;
	if (stripe->data_size != 0) {

		/* Free up all the buffers in the stripe. */
		for (i = 0; i < stripe->nr_disks; i++) {
			if (stripe->chunks[i].dev && stripe->chunks[i].data) {
				free(stripe->chunks[i].data);
				stripe->chunks[i].dev = NULL;
				stripe->chunks[i].data = NULL;
			}
		}
		stripe->number = 0;
		stripe->start_lsn = 0;
		stripe->data_size = 0;
	}
}


static int get_stripe(md_volume_t * volume, lsn_t lsn, stripe_t * stripe) {

	int rc = 0;
	int i;
	raid5_conf_t * conf = mdvol_to_conf(volume);
	lsn_t curr_lsn;
	sector_count_t data_stripe_size = conf->chunksize * (conf->raid_disks - 1);

	LOG_ENTRY();

	/* Calculate stripe number and starting sector. */
	stripe->number = lsn / data_stripe_size;
	stripe->start_lsn = stripe->number * data_stripe_size;
	stripe->data_size = data_stripe_size;

	/* Read in all the chunks for the stripe. */
	for (i = 0, curr_lsn = lsn; (rc == 0) && (i < conf->raid_disks); i++) {
		stripe->chunks[i].data = calloc(1, conf->chunksize << EVMS_VSECTOR_SIZE_SHIFT);
		if (stripe->chunks[i].data != NULL) {
			/* If the disk is operational, fill in the child device. */
			if (conf->disks[i].operational) {
				stripe->chunks[i].dev = conf->disks[i].dev;
			} else {
				stripe->chunks[i].dev = NULL;
			}
			stripe->chunks[i].lsn_on_dev = stripe->number * conf->chunksize;

		} else {
			/* Memory allocation failure */
			rc = ENOMEM;
		}
	}

	if (rc == 0) {
		for (i = 0; (rc == 0) && (i < conf->raid_disks); i++) {

			if (stripe->chunks[i].dev != NULL) {

				LOG_DEBUG("Reading %"PRIu64" sectors from %s at sector offset %"PRIu64".\n",
					  conf->chunksize, stripe->chunks[i].dev->obj->name,
					  stripe->chunks[i].lsn_on_dev + stripe->chunks[i].dev->data_offset);

				rc = READ(stripe->chunks[i].dev->obj,
					  stripe->chunks[i].lsn_on_dev + stripe->chunks[i].dev->data_offset,
					  conf->chunksize, stripe->chunks[i].data);
			}
		}

		if (rc == 0) {
			if (conf->failed_raid_disks != 0) {
				LOG_DEBUG("Reconstructing data for failed disk %d\n", conf->failed_disk_index);
				reconstruct_chunk(conf, stripe, conf->failed_disk_index);
			}
		}
	}

	if (rc != 0) {
		/* Something went wrong.  Clean up the stripe. */
		free_stripe_data(stripe);
	}

	LOG_EXIT_INT(rc);
	return rc;
}


typedef enum {
	STRIPE_IO_READ,
	STRIPE_IO_WRITE
} stripe_io_cmd_t;


static int stripe_io(stripe_io_cmd_t  cmd,
		     md_volume_t    * volume,
		     stripe_t       * stripe,
		     lsn_t            lsn,
		     sector_count_t   sectors,
		     unsigned char  * buffer,
		     sector_count_t * sectors_read) {

	lsn_t dev_offset;
	lsn_t sector_offset_in_chunk;
	unsigned int byte_offset_in_chunk;
	unsigned int chunk_index;
	unsigned int parity_index;
	sector_count_t stripe_end_lsn = stripe->start_lsn + stripe->data_size - 1;
	raid5_conf_t * conf = mdvol_to_conf(volume);
	sector_count_t chunk_size_in_sectors = conf->chunksize;
	sector_count_t sectors_to_copy;
	unsigned int bytes_to_copy;

	LOG_ENTRY();

	if (cmd > STRIPE_IO_WRITE) {
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	if (sectors == 0) {
		/* Nothing to read.  We're finished. */
		LOG_EXIT_INT(0);
		return 0;
	}

	if ((lsn < stripe->start_lsn) ||
	    (lsn > stripe_end_lsn)) {
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	*sectors_read = 0;

	do {
		dev_offset = raid5_compute_sector(lsn,
						  conf->raid_disks, conf->raid_disks - 1,
						  &chunk_index, &parity_index,
						  conf);
		sector_offset_in_chunk = dev_offset & (chunk_size_in_sectors - 1);
		byte_offset_in_chunk = (unsigned int) (sector_offset_in_chunk << EVMS_VSECTOR_SIZE_SHIFT);

		sectors_to_copy = min(sectors, chunk_size_in_sectors - (sector_offset_in_chunk & (chunk_size_in_sectors - 1)));
		bytes_to_copy = (unsigned int) (sectors_to_copy << EVMS_VSECTOR_SIZE_SHIFT);

		if (cmd == STRIPE_IO_READ) {
			LOG_DEBUG("Reading %d bytes from stripe %ld, chunk %d, offset %d.\n",
				  bytes_to_copy, stripe->number, chunk_index, byte_offset_in_chunk);
			LOG_DEBUG("AKA: Reading %"PRIu64" sectors from (%s) at sector offset %"PRIu64".\n",
				  sectors_to_copy, (conf->disks[chunk_index].dev != NULL) ? conf->disks[chunk_index].dev->obj->name : "nul", dev_offset);
			memcpy(buffer,
			       stripe->chunks[chunk_index].data + byte_offset_in_chunk,
			       bytes_to_copy);

		} else if (cmd == STRIPE_IO_WRITE) {
			LOG_DEBUG("Writing %d bytes to stripe %ld, chunk %d, offset %d.\n",
				  bytes_to_copy, stripe->number, chunk_index, byte_offset_in_chunk);
			LOG_DEBUG("AKA: Writing %"PRIu64" sectors from (%s) at sector offset %"PRIu64".\n",
				  sectors_to_copy,
				  (conf->disks[chunk_index].dev != NULL) ? conf->disks[chunk_index].dev->obj->name : "nul",
				  dev_offset);
			memcpy(stripe->chunks[chunk_index].data + byte_offset_in_chunk,
			       buffer,
			       bytes_to_copy);
		}

		sectors -= sectors_to_copy;
		*sectors_read += sectors_to_copy;
		lsn += sectors_to_copy;
		buffer += bytes_to_copy;

	} while ((sectors != 0) && (lsn <= stripe_end_lsn));

	LOG_EXIT_INT(0);
	return 0;
}

/* Function: raid5_volume_read
 *
 */
static int raid5_volume_read(
	md_volume_t *volume,
	lsn_t lsn,
	sector_count_t count,
	void *buffer)
{
	int rc = 0;
	unsigned long      buffer_offset;
	md_member_t      * member;
	lsn_t              child_lsn;
	sector_count_t     child_count;
	raid5_conf_t      *conf = mdvol_to_conf(volume);
	stripe_t          *stripe;
	
	LOG_ENTRY();
	
	stripe = &conf->stripe;
	stripe->data_size = 0;

	buffer_offset = 0;
	if (!(volume->flags & MD_DEGRADED)) {
		while ((count != 0) && (rc == 0)) {
			get_child_run(volume, lsn, count, &member, &child_lsn, &child_count);
			if (member != NULL) {
				LOG_DEBUG("Reading %"PRIu64" sectors from %s at sector offset %"PRIu64".\n",
					  child_count, member->obj->name, member->data_offset+ child_lsn);
				rc = READ(member->obj, member->data_offset + child_lsn, child_count, buffer + buffer_offset);
				count -= child_count;
				lsn += child_count;
				buffer_offset += child_count << EVMS_VSECTOR_SIZE_SHIFT;
			} else {
				rc = EIO;
			}
		}
	} else {
		stripe->start_lsn = 0;
		stripe->number = 0;
		while ((count != 0) && (rc == 0)) {
			if ((lsn >= stripe->start_lsn) &&
			    (lsn < stripe->start_lsn + stripe->data_size)) {
				rc = stripe_io(STRIPE_IO_READ,
					       volume, stripe,
					       lsn, count,
					       buffer + buffer_offset, &child_count);
				count -= child_count;
				lsn += child_count;
				buffer_offset += child_count << EVMS_VSECTOR_SIZE_SHIFT;

			} else {
				free_stripe_data(stripe);

				/* Read a new stripe and try again. */
				rc = get_stripe(volume, lsn, stripe);
			}
		}
	}

	if (stripe->data_size != 0) {
		free_stripe_data(stripe);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

static int raid5_volume_rw_special(
	storage_object_t * region,
	lsn_t lsn,
	sector_count_t count,
	void * buffer,
	int rw )
{
	int rc = 0;
	md_volume_t * volume = (md_volume_t *)region->private_data;
	list_element_t iter;
	md_volume_t *temp;
	md_volume_t *exp_vol;
	md_volume_t *org_vol;
	boolean found;
	md_member_t *saved_member;
	md_saved_info_t *info;

	LOG_ENTRY();
	
	found = FALSE;
	LIST_FOR_EACH(raid5_expand_shrink_list, iter, temp) {
		if (temp->region == region) {
			found = TRUE;
			break;
		}
	}

	if (found == FALSE) {
		if (rw == 0) {
			memset(buffer, 0x0, count * EVMS_VSECTOR_SIZE);
		}
		LOG_ERROR("Region %s, we could not find orginal volume to redirect I/0,"
			  " returning zero filled buffer.\n",
			  volume->name);
		rc = EIO;
		goto out;
	}

	if (volume->region_mgr_flags & (MD_RAID5_EXPAND_PENDING | MD_RAID5_SHRINK_PENDING)) {
		org_vol = temp;
		if (rw == 0) {
			rc = raid5_volume_read(org_vol, lsn, count, buffer);
		} else {
			rc = raid5_volume_write(org_vol, lsn, count, buffer);
		}
		goto out;
	}
	
	if (md_check_for_expand_shrink_in_progress(volume, &saved_member)) {
		info = saved_member->saved_info;
	} else {
		if (rw == 0) {
			memset(buffer, 0x0, count * EVMS_VSECTOR_SIZE);
		}
		LOG_ERROR("No saved superblock info for region %s,"
			  " returning zero filled buffer.\n",
			  volume->name);
		rc = EIO;
		goto out;
	}

	LOG_DEFAULT("Sector mark=%"PRIu64".\n", info->sector_mark);
	
	if (volume->region_mgr_flags & MD_RAID5_UNWIND_EXPANSION_PENDING) {
		/*
		 * The expansion was interrupted.  We now have to handle I/O requests.
		 * Retrieve the sector_mark in the saved superblock.
		 * - All requests for LSN >= sector_mark will go to the original region.
		 * - All requests for LSN + count <= sector_mark will go to the "expanded" region.
		 * - The worse case, LSN < sector_mark and LSN + count > sector_mark,
		 * break up the request.
		 */
		exp_vol = temp;
		if (lsn >= info->sector_mark) {
			/* Request goes to the original region */
			LOG_DEFAULT("Unwind expansion is pending, request to original volume,"
				    " lsn=%"PRIu64", count=%"PRIu64".\n",
				    lsn, count);
			if (rw == 0) {
				rc = raid5_volume_read(volume, lsn, count, buffer);
			} else {
				rc = raid5_volume_write(volume, lsn, count, buffer);
			}
		} else {
			if (lsn + count <= info->sector_mark) {
				/* Request goes to the "expanded" region */
				LOG_DEFAULT("Unwind expansion is pending,"
					    " request to expanded region,"
					    " lsn=%"PRIu64", count=%"PRIu64".\n",
					    lsn, count);
				if (rw == 0) {
					rc = raid5_volume_read(exp_vol, lsn, count, buffer);
				} else {
					rc = raid5_volume_write(exp_vol, lsn, count, buffer);
				}
			} else { /* Bad news: must break up this request */
				u_int64_t count1, count2;
				
				count1 = info->sector_mark - lsn;
				count2 = count - count1;
				if (rw == 0) {
					rc = raid5_volume_read(exp_vol, lsn, count1, buffer);
				} else {
					rc = raid5_volume_write(exp_vol, lsn, count1, buffer);
				}
				if (rw == 0) {
					rc |= raid5_volume_read(volume, lsn+count1, count2, buffer);
				} else {
					rc |= raid5_volume_read(volume, lsn+count1, count2, buffer);
				}
			}
		}
		goto out;
	}

	if (volume->region_mgr_flags & MD_RAID5_RESUME_SHRINKING_PENDING) {
		/*
		 * The shrinking was interrupted.  We now have to handle I/O requests.
		 * Retrieve the sector_mark in the saved superblock.
		 * - All requests for LSN >= sector_mark will go to the "shrunk" region.
		 * - All requests for LSN + count <= sector_mark will go to the original region.
		 * - The worse case, LSN < sector_mark and LSN + count > sector_mark,
		 * break up the request.
		 */
		org_vol = temp;
		if (lsn >= info->sector_mark) {
			/* Request goes to the "expanded" region */
			LOG_DEFAULT("Resume shrinking is pending,"
				    " request to shrunk region,"
				    " lsn=%"PRIu64", count=%"PRIu64".\n",
				    lsn, count);		
			/* Request goes to the "shrunk" region */
			if (rw == 0) {
				rc = raid5_volume_read(volume, lsn, count, buffer);
			} else {
				rc = raid5_volume_write(volume, lsn, count, buffer);
			}
		} else {
			if (lsn + count <= info->sector_mark) {
				/* Request goes to the original region */
				LOG_DEFAULT("Resume shrinking is pending, request to original volume,"
					    " lsn=%"PRIu64", count=%"PRIu64".\n",
					    lsn, count);
				if (rw == 0) {
					rc = raid5_volume_read(org_vol, lsn, count, buffer);
				} else {
					rc = raid5_volume_write(org_vol, lsn, count, buffer);
				}
			} else { /* Bad news: must break up this request */
				u_int64_t count1, count2;
				
				count1 = info->sector_mark - lsn;
				count2 = count - count1;
				if (rw == 0) {
					rc = raid5_volume_read(org_vol, lsn, count1, buffer);
				} else {
					rc = raid5_volume_write(org_vol, lsn, count1, buffer);
				}
				if (rw == 0) {
					rc |= raid5_volume_read(volume, lsn+count1, count2, buffer);
				} else {
					rc |= raid5_volume_write(volume, lsn+count1, count2, buffer);
				}
			}
		}
		goto out;
	}
out:
	LOG_EXIT_INT(rc);
	return rc;

}

/* Function: raid5_read
 *
 *  Perform a logical-to-physical remapping, and send the read down to
 *  the next plugin.
 */
static int raid5_read(
	storage_object_t * region,
	lsn_t              lsn,
	sector_count_t     count,
	void             * buffer )
{

	int rc = 0;
	md_volume_t *volume = (md_volume_t *)region->private_data;

	my_plugin = raid5_plugin;
	LOG_ENTRY();

	LOG_DEBUG("Request to read %"PRIu64" sectors from %s"
		" at sector offset %"PRIu64".\n", count, region->name, lsn);

	/* Parameter check */
	if (!buffer) {
		LOG_EXIT_INT(EFAULT);
		return EFAULT;
	}

	if (volume->flags & MD_CORRUPT) {
		memset(buffer, 0x0, count * EVMS_VSECTOR_SIZE);
		LOG_ERROR("MD Object %s is corrupt, data is suspect \n",
			volume->name);
		LOG_EXIT_INT(0);
		return 0;
	}

	if ((lsn + count) > region->size) {
		LOG_ERROR("Attempt to read past end of region %s sector=%"PRIu64"\n",
			volume->name,lsn+count);
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	rc = md_region_rw(region, lsn, count, buffer, 0);

	if (rc == ENODEV) {
		if (volume->region_mgr_flags &
			(MD_RAID5_EXPAND_PENDING | MD_RAID5_SHRINK_PENDING |
			MD_RAID5_UNWIND_EXPANSION_PENDING | MD_RAID5_RESUME_SHRINKING_PENDING))
		{
			rc = raid5_volume_rw_special(region, lsn, count, buffer, 0);
		} else {
			rc = raid5_volume_read(volume, lsn, count, buffer);
		}
	}
	LOG_EXIT_INT(rc);
	return rc;
}


static int write_stripe(md_volume_t * volume, stripe_t * stripe) {

	int rc = 0;
	int i;
	unsigned int data_index;
	unsigned int parity_index;
	raid5_conf_t * conf = mdvol_to_conf(volume);

	LOG_ENTRY();

	/* Find which one is the parity disk. */
	raid5_compute_sector(stripe->start_lsn,
			     conf->raid_disks, conf->raid_disks - 1,
			     &data_index, &parity_index,
			     conf);

	/* Recalcluate the parity. */
	LOG_DEBUG("Reconstructing parity on disk %d.\n", parity_index);
	memset(stripe->chunks[parity_index].data, 0, conf->chunksize << EVMS_VSECTOR_SIZE_SHIFT);
	reconstruct_chunk(conf, stripe, parity_index);

	/* Write the stripe to the disks. */
	for (i = 0; (rc == 0) && (i < conf->raid_disks); i++) {
		chunk_t * chunk = &stripe->chunks[i];

		/*
		 * One of the devices may be missing or faulty. If so,
		 * its dev field won't be filled in in the chunk
		 * structure.  Skip it since there is no device to
		 * write to.
		 */
		if (chunk->dev != NULL) {
			LOG_DEBUG("Writing %"PRIu64" sectors to %s at sector offset %"PRIu64".\n",
				  conf->chunksize, chunk->dev->obj->name, chunk->lsn_on_dev + chunk->dev->data_offset);
			rc = WRITE(chunk->dev->obj, chunk->lsn_on_dev + chunk->dev->data_offset, conf->chunksize, chunk->data);
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}


/*   Function reads or writes sectors from a disk.
 *   depending on flag 0=READ 1=WRITE
 *   Returns: (positive) amount read if successful,
 *            (negative) else rc=errno of cause of failure.
static int seek_and_io( int fd, lsn_t offset, sector_count_t sectors,
		 void *buf, int flag )
{
    ssize_t n = 0;
    u_int32_t count = sectors << EVMS_VSECTOR_SIZE_SHIFT;
    u_int32_t tot = 0;

    if ( lseek64(fd, offset<<EVMS_VSECTOR_SIZE_SHIFT, SEEK_SET) == -1 ) {
	return -1;
    }

    while (tot < count) {
	do
	    if (flag)
		n = write(fd, buf, count - tot);
	    else
		n = read(fd, buf, count - tot);
	while ((n < 0) && ((errno == EINTR) || (errno == EAGAIN)));

	if (n <= 0)
	    return tot ? tot : n;

	tot += n;
	buf += n;
    }

    return tot;
} */


static int raid5_volume_write (
	md_volume_t *volume,
	lsn_t lsn,
	sector_count_t count,
	void * buffer)
{
	int rc = 0;
	raid5_conf_t * conf = mdvol_to_conf(volume);
	sector_count_t sectors_written;
	stripe_t *stripe;
	
	LOG_ENTRY();

	stripe = &conf->stripe;

	stripe->number = 0;
	stripe->data_size = 0;
	stripe->start_lsn = 0;
	while ((count != 0) && (rc == 0)) {
		if ((lsn >= stripe->start_lsn) &&
		    (lsn < stripe->start_lsn + stripe->data_size)) {
			rc = stripe_io(STRIPE_IO_WRITE,
				       volume, stripe,
				       lsn, count,
				       buffer, &sectors_written);
			count -= sectors_written;
			lsn += sectors_written;
			buffer += sectors_written << EVMS_VSECTOR_SIZE_SHIFT;

		} else {
			if (stripe->data_size != 0) {
				write_stripe(volume, stripe);
				free_stripe_data(stripe);
			}

			/* Read a new stripe and try again. */
			rc = get_stripe(volume, lsn, stripe);
		}
	}

	if (stripe->data_size != 0) {
		write_stripe(volume, stripe);
		free_stripe_data(stripe);
	}
	LOG_EXIT_INT(rc);
	return rc;
}

/* Function: raid5_write
 *
 *  Perform a logical-to-physical remapping, and send the write down to
 *  the next plugin.
 */
static int raid5_write (
	storage_object_t *region,
	lsn_t lsn,
	sector_count_t count,
	void * buffer)
{

	int                     rc = 0;
	md_volume_t             * volume = (md_volume_t *)region->private_data;

	my_plugin = raid5_plugin;
	LOG_ENTRY();

	LOG_DEBUG("Request to write %"PRIu64" sectors to %s"
		" at sector offset %"PRIu64".\n", count, region->name, lsn);

	/* Parameter check */
	if (!buffer) {
		LOG_EXIT_INT(EFAULT);
		return EFAULT;
	}

	if (volume->flags & MD_CORRUPT) {
		LOG_ERROR("MD Object %s is corrupt, writing data is not allowed\n",
			volume->name);
		LOG_EXIT_INT(EIO);
		return EIO;
	}
	if ((lsn + count) > region->size) {
		LOG_ERROR("Attempt to write past end of region %s sector=%"PRIu64"\n",
			volume->name,lsn+count);
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	if (kill_sector_list_head != NULL) {
		kill_sectors();
	}

	rc = md_region_rw(region, lsn, count, buffer, 1);

	/* If the target RAID5 array does not exist, we can try do to the write by hand. */
	if (rc == ENODEV) {
		if (volume->region_mgr_flags &
			(MD_RAID5_EXPAND_PENDING | MD_RAID5_SHRINK_PENDING |
			MD_RAID5_UNWIND_EXPANSION_PENDING | MD_RAID5_RESUME_SHRINKING_PENDING))
		{
			rc = raid5_volume_rw_special(region, lsn, count, buffer, 1);
		} else {
			rc = raid5_volume_write(volume, lsn, count, buffer);
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

static void raid5_enable_remove_spare_function(
	md_volume_t * volume,
	function_info_array_t * fia,
	int *function_count)
{
	md_member_t *member;
	list_element_t iter;
	LOG_ENTRY();

	/*
	 * If the RAID array has a spare disk then
	 * Remove Spare is available.
	 */
	LIST_FOR_EACH(volume->members, iter, member) {
		if (member->obj &&
		    (member->flags & MD_MEMBER_DISK_SPARE) &&
		    !(member->flags & MD_MEMBER_NEW) ) {
			fia->info[*function_count].function = MD_RAID5_FUNCTION_REMOVE_SPARE;
			fia->info[*function_count].name = EngFncs->engine_strdup("remspare");
			fia->info[*function_count].title = EngFncs->engine_strdup(_("Remove spare object"));
			fia->info[*function_count].verb = EngFncs->engine_strdup(_("Remove"));
			fia->info[*function_count].help = EngFncs->engine_strdup(_("Use this function to remove a spare object from this RAID array."));
			if ((volume->flags & MD_ARRAY_SYNCING) ||
				(volume->region_mgr_flags & MD_RAID5_CONFIG_CHANGE_PENDING) ) {
				fia->info[*function_count].flags |= EVMS_FUNCTION_FLAGS_INACTIVE;
			}
		
			++*function_count;
			break;
		}
	}
	LOG_EXIT_VOID();
}

static void raid5_enable_add_spare_function(
	md_volume_t * volume,
	function_info_array_t * fia,
	int *function_count)
{
	int rc = 0;
	list_anchor_t available_objects = NULL;
	uint count;
	raid5_conf_t *conf = mdvol_to_conf(volume);

	LOG_ENTRY();

	/*
	 * If there are available top level objects in
	 * the system that are at least as big as the
	 * smallest child object, then the Add Spare
	 * action is avalable.
	 */
	rc = EngFncs->get_object_list(DISK | SEGMENT| REGION,
				      DATA_TYPE,
				      NULL,
				      volume->region->disk_group,
				      VALID_INPUT_OBJECT | NO_DISK_GROUP,
				      &available_objects);

	if (rc == 0) {
		/*
		 * If this MD region is available, it
		 * will appear in the list.  Bad things
		 * happen if this region is made a
		 * child of itself.  Remove this MD
		 * region if it is in the list.
		 */
		EngFncs->remove_thing(available_objects, volume->region);

		prune_small_objects(available_objects, volume);
		
		/* Remove all parents of this MD region from available list */
		remove_parent_regions_from_list(available_objects, volume->region);

		count = EngFncs->list_count(available_objects);

		if (count > 0) {

			fia->info[*function_count].function = MD_RAID5_FUNCTION_ADD_SPARE;
			fia->info[*function_count].name = EngFncs->engine_strdup("addspare");
			fia->info[*function_count].verb = EngFncs->engine_strdup(_("Add spare"));
			if (volume->flags & MD_DEGRADED && (volume->nr_disks < conf->raid_disks)) {
				fia->info[*function_count].title =
					EngFncs->engine_strdup(_("Add spare to fix degraded array"));
				fia->info[*function_count].help =
					EngFncs->engine_strdup(_("Use this function to add a spare object to replace a missing or faulty entry of this degraded region."));
			} else {
				fia->info[*function_count].title =
					EngFncs->engine_strdup(_("Add spare object"));
				fia->info[*function_count].help =
					EngFncs->engine_strdup(_("Use this function to add an object as a spare object for this RAID array."));
			}
			if ((volume->flags & MD_ARRAY_SYNCING) ||
			    (volume->region_mgr_flags & MD_RAID5_CONFIG_CHANGE_PENDING) ){
				fia->info[*function_count].flags |= EVMS_FUNCTION_FLAGS_INACTIVE;
			}
			
			++*function_count;
		}

		EngFncs->destroy_list(available_objects);
	}
	
	LOG_EXIT_VOID();
}

static void raid5_enable_remove_faulty_function(
	md_volume_t * volume,
	function_info_array_t * fia,
	int *function_count)
{
	LOG_ENTRY();

	/*
	 * If the RAID array has a faulty disk then
	 * Remove Faulty is available.
	 */
	if (md_volume_count_faulty_disks(volume) > 0) {
		fia->info[*function_count].function = MD_RAID5_FUNCTION_REMOVE_FAULTY;
		fia->info[*function_count].name = EngFncs->engine_strdup("remfaulty");
		fia->info[*function_count].title = EngFncs->engine_strdup(_("Remove a faulty object"));
		fia->info[*function_count].verb = EngFncs->engine_strdup(_("Remove"));
		fia->info[*function_count].help =
			EngFncs->engine_strdup(_("Use this function to permanently remove a faulty object from this RAID array."));
		if (volume->flags & MD_ARRAY_SYNCING) {
			fia->info[*function_count].flags |= EVMS_FUNCTION_FLAGS_INACTIVE;
		}

		++*function_count;
	}
	LOG_EXIT_VOID();
}

static void raid5_enable_remove_stale_disk_function(
	md_volume_t * volume,
	function_info_array_t * fia,
	int *function_count)
{
	LOG_ENTRY();

	/*
	 * If the RAID array has a stale disk then
	 * Remove Stale is available.
	 */
	if (md_volume_count_stale_disks(volume) > 0) {
		fia->info[*function_count].function = MD_RAID5_FUNCTION_REMOVE_STALE;
		fia->info[*function_count].name = EngFncs->engine_strdup("remstale");
		fia->info[*function_count].title = EngFncs->engine_strdup(_("Remove a stale object"));
		fia->info[*function_count].verb = EngFncs->engine_strdup(_("Remove"));
		fia->info[*function_count].help =
			EngFncs->engine_strdup(_("Use this function to permanently remove a stale (possibly faulty) object from this RAID array."));
		if (volume->flags & MD_ARRAY_SYNCING) {
			fia->info[*function_count].flags |= EVMS_FUNCTION_FLAGS_INACTIVE;
		}

		++*function_count;
	}
	LOG_EXIT_VOID();
}

static void raid5_enable_mark_disk_faulty_function(
	md_volume_t * volume,
	function_info_array_t * fia,
	int *function_count)
{
	LOG_ENTRY();

	if ((volume->flags & (MD_NEW_REGION | MD_DEGRADED | MD_CORRUPT)) ||
	    !md_is_region_active(volume->region) ) {
		LOG_EXIT_VOID();
		return;
	}

	/*
	 * If the RAID array is not running in degrade mode and
	 * there is a spare available that is not new then Mark
	 * Faulty is available.
	 */
	if (!(volume->flags & (MD_CORRUPT | MD_DEGRADED))) {

		if (md_volume_count_spare_disks(volume) > 0) {
			fia->info[*function_count].function = MD_RAID5_FUNCTION_MARK_FAULTY;
			fia->info[*function_count].name = EngFncs->engine_strdup("markfaulty");
			fia->info[*function_count].title = EngFncs->engine_strdup(_("Mark object faulty"));
			fia->info[*function_count].verb = EngFncs->engine_strdup(_("Mark faulty"));
			fia->info[*function_count].help = EngFncs->engine_strdup(_("Use this function to mark an object faulty in this RAID array.  If the RAID array has a spare object, the spare object will be brought on-line to replace the faulty object."));
			if ((volume->flags & MD_ARRAY_SYNCING) ||
			    (volume->region_mgr_flags & MD_RAID5_CONFIG_CHANGE_PENDING)) {
				fia->info[*function_count].flags |= EVMS_FUNCTION_FLAGS_INACTIVE;
			}

			++*function_count;
		}
	}
	LOG_EXIT_VOID();
}


/* Function:  raid5_get_plugin_functions
 */
static int raid5_get_plugin_functions(storage_object_t        * region,
				      function_info_array_t * * functions)
{
	function_info_array_t * fia;
	int rc, function_count = 0;
	md_volume_t * volume;

	my_plugin = raid5_plugin;
	LOG_ENTRY();

	/*
	 * If region is NULL, that means the user is asking for plug-in
	 * functions on the plug-in.  We don't have any plug-in functions that
	 * are global for the plug-in.
	 */
	if (region == NULL) {
		LOG_EXIT_INT(ENOSYS);
		return ENOSYS;
	}

	volume = (md_volume_t *) region->private_data;
	
	if (volume->flags & MD_CORRUPT) {
		LOG_WARNING("MD region %s is corrupt.\n", volume->name);
		LOG_EXIT_INT(ENOSYS);
		return ENOSYS;
	}

	fia = EngFncs->engine_alloc(sizeof(function_info_array_t) + sizeof(function_info_t) * MD_RAID5_FUNCTION_COUNT);
	if (!fia) {
		LOG_CRITICAL("Error allocating memory for an action info array.\n");
		LOG_EXIT_INT (ENOMEM);
		return ENOMEM;
	}

	/*
	 * Our functions are only allowed if the Engine is opened for
	 * writing.
	 */
	if (!(EngFncs->get_engine_mode() & ENGINE_WRITE)) {
		fia->count = function_count;
		*functions = fia;
		LOG_EXIT_INT(0);
		return 0;
	}

	if (md_is_recovery_running(region)) {
		volume->flags |= MD_ARRAY_SYNCING;
		LOG_DEBUG("%s : Resync/recovery is running\n", region->name);
	} else {
		if (volume->flags & MD_ARRAY_SYNCING) {
			rc = raid5_rediscover_region(region, TRUE);
			if (!rc) {
				/*
				 * The region was rediscovered,
				 * volume is invalid, must reset!!!
				 */
				volume = (md_volume_t *)region->private_data;
			} else {
				/*
				 * This is bug, return immediately
				 */
				LOG_EXIT_INT(rc);
				return rc;
			}
		}
		volume->flags &= ~MD_ARRAY_SYNCING;
	}

	raid5_enable_add_spare_function(volume, fia, &function_count);
	raid5_enable_remove_spare_function(volume, fia, &function_count);
	raid5_enable_remove_faulty_function(volume, fia, &function_count);
	raid5_enable_remove_stale_disk_function(volume, fia, &function_count);
	raid5_enable_mark_disk_faulty_function(volume, fia, &function_count);

	fia->count = function_count;
	*functions = fia;

	LOG_EXIT_INT(0);
	return 0;
}


static int can_be_added(md_volume_t * volume, storage_object_t * spare_candidate)
{
	raid5_conf_t *conf = mdvol_to_conf(volume);
	sector_count_t size;

	LOG_ENTRY();

	if (!conf) {
		LOG_MD_BUG();
		LOG_EXIT_INT(EFAULT);
		return EFAULT;
	}

	/* The spare must be a disk, segment, or region. */
	if ((spare_candidate->object_type != DISK) &&
	    (spare_candidate->object_type != SEGMENT) &&
	    (spare_candidate->object_type != REGION)) {
		LOG_ERROR("The type of object %s is not data.\n", spare_candidate->name);
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	/* The spare must not be too small. */
	size = md_object_usable_size(spare_candidate, &volume->sb_ver, conf->chunksize);
	if (size < conf->size) {
		LOG_ERROR("Object %s is too small to be a spare object for array %s.\n",
			  spare_candidate->name, volume->region->name);
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	/* The region cannot be a spare of itself. */
	if (spare_candidate == volume->region) {
		LOG_ERROR("Region %s cannot be a spare object for itself.\n", spare_candidate->name);
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	LOG_EXIT_INT(0);
	return 0;
}


static int is_spare(md_volume_t *vol, storage_object_t *spare)
{
	md_member_t *member;
	LOG_ENTRY();

	member = md_volume_find_object(vol, spare);
	if (member && member->flags & MD_MEMBER_DISK_SPARE) {
		LOG_EXIT_INT(0);
		return 0;
	}
	LOG_EXIT_INT(EINVAL);
	return EINVAL;
}


static int is_faulty(md_volume_t *vol, storage_object_t * faulty)
{
	md_member_t *member;

	LOG_ENTRY();

	member = md_volume_find_object(vol, faulty);
	if (member && member->flags & MD_MEMBER_DISK_FAULTY) {
		LOG_EXIT_INT(0);
		return 0;
	}

	LOG_EXIT_INT(EINVAL);
	return EINVAL;
}

static int is_stale(md_volume_t *vol, storage_object_t *stale_disk)
{
	md_member_t *member;
	LOG_ENTRY();

	if (!vol || !stale_disk) {
		LOG_EXIT_INT(EFAULT);
		return EFAULT;
	}

	member = md_volume_find_object(vol, stale_disk);
	if (member && member->flags & MD_MEMBER_STALE) {
		LOG_EXIT_INT(0);
		return 0;
	}

	LOG_EXIT_INT(EINVAL);
	return EINVAL;
}


static int is_active(md_volume_t * vol, storage_object_t * active_disk)
{
	md_member_t *member;
	LOG_ENTRY();

	member = md_volume_find_object(vol, active_disk);
	if (member &&
	    !(member->flags & MD_MEMBER_NEW) &&
	    md_member_is_raid_disk(member) ) {
		LOG_EXIT_INT(0);
		return 0;
	}
	LOG_EXIT_INT(EINVAL);
	return EINVAL;
}


/* Function:  raid5_plugin_function
 */
static int raid5_plugin_function(storage_object_t * region,
				 task_action_t      action,
				 list_anchor_t            objects,
				 option_array_t   * options) {

	int rc = 0;
	md_volume_t * vol = (md_volume_t *) region->private_data;
	storage_object_t * object;
	list_element_t li;
	uint count;
	md_super_info_t info;

	my_plugin = raid5_plugin;
	LOG_ENTRY();

	if ((action < EVMS_Task_Plugin_Function) ||
	    (action >= EVMS_Task_Plugin_Function + MD_RAID5_FUNCTION_COUNT)) {
		LOG_ERROR("Action code 0x%x is out of range.\n", action);
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	md_volume_get_super_info(vol, &info);

	count = EngFncs->list_count(objects);


	switch (action) {
	case MD_RAID5_FUNCTION_ADD_SPARE:
		if (count == 0) {
			LOG_ERROR("Must specify at least one spare object to be added.\n");
			LOG_EXIT_INT(EINVAL);
			return EINVAL;
		}
		if (count > (MAX_DISKS(vol) - info.nr_disks)) {
			LOG_ERROR("Can only specify up to %d object(s) to add as spare(s).\n",
				  MAX_DISKS(vol) - info.nr_disks);
			LOG_EXIT_INT(EINVAL);
			return EINVAL;
		}
		LIST_FOR_EACH(objects, li, object) {
			rc |= can_be_added(vol, object);
		}
		if (rc != 0) {
			LOG_EXIT_INT(rc);
			return rc;
		}
		break;

	case MD_RAID5_FUNCTION_REMOVE_SPARE:
		if (count == 0) {
			LOG_ERROR("Must specify at least one spare object to be removed.\n");
			LOG_EXIT_INT(EINVAL);
			return EINVAL;
		}
		LIST_FOR_EACH(objects, li, object) {
			rc |= is_spare(vol, object);
		}
		if (rc != 0) {
			LOG_EXIT_INT(rc);
			return rc;
		}
		if (vol->flags & MD_DEGRADED) {
			LOG_ERROR("Array %s is running in degrade mode.  "
				  "At least one spare must be left for the array to recover.\n",
				  vol->region->name);
		}
		break;

	case MD_RAID5_FUNCTION_REMOVE_FAULTY:
		if (count == 0) {
			LOG_ERROR("Must specify at least one faulty object to be removed.\n");
			LOG_EXIT_INT(EINVAL);
			return EINVAL;
		}
		LIST_FOR_EACH(objects, li, object) {
			rc |= is_faulty(vol, object);
		}
		if (rc != 0) {
			LOG_EXIT_INT(rc);
			return rc;
		}
		break;
	
	case MD_RAID5_FUNCTION_REMOVE_STALE:
		if (count == 0) {
			LOG_ERROR("Must specify at least one stale object to be removed.\n");
			LOG_EXIT_INT(EINVAL);
			return EINVAL;
		}
		LIST_FOR_EACH(objects, li, object) {
			rc |= is_stale(vol, object);
		}
		if (rc != 0) {
			LOG_EXIT_INT(rc);
			return rc;
		}
		break;

	case MD_RAID5_FUNCTION_MARK_FAULTY:
		if (count != 1) {
			LOG_ERROR("Must specify only one object to be marked faulty.\n");
			LOG_EXIT_INT(EINVAL);
			return EINVAL;
		}
		LIST_FOR_EACH(objects, li, object) {
			rc |= is_active(vol, object);
		}
		if (rc != 0) {
			LOG_EXIT_INT(rc);
			return rc;
		}
		break;

	default:
		LOG_ERROR("0x%x is not a valid action code.\n", action);
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
		break;
	}

	LIST_FOR_EACH(objects, li, object) {
		switch (action) {
		case MD_RAID5_FUNCTION_ADD_SPARE:
			rc = raid5_add_spare_disk(vol, object);
			break;

		case MD_RAID5_FUNCTION_REMOVE_SPARE:
			rc = raid5_remove_spare_disk(vol, object);
			break;

		case MD_RAID5_FUNCTION_REMOVE_FAULTY:
			rc = raid5_remove_faulty_disk(vol, object);
			break;

		case MD_RAID5_FUNCTION_MARK_FAULTY:
			rc = raid5_mark_faulty_disk(vol, object);
			if (!rc) {
				if (follow_up_mark_faulty(vol, object) == TRUE) {
					rc = raid5_remove_faulty_disk(vol, object);
				}
			}

			break;

		case MD_RAID5_FUNCTION_REMOVE_STALE:
			rc = raid5_remove_stale_disk(vol, object);
			break;

		default:
			/*
			 * Shouldn't get here if the validiation
			 * code above did its job.
			 */
			LOG_WARNING("Action code 0x%x slipped past validation.\n", action);
			rc = EINVAL;
			break;
		}

		if (rc) {
			break;
		}

	}

	if (rc == 0) {
		vol->region->flags |= SOFLAG_DIRTY;
	}

	LOG_EXIT_INT(rc);
	return rc;
}


static void free_region (storage_object_t *region)
{
	md_volume_t * volume = (md_volume_t *)region->private_data;
	raid5_conf_t * conf = mdvol_to_conf(volume);

	LOG_ENTRY();

	EngFncs->engine_free(conf);

	md_free_volume(volume);
	LOG_EXIT_VOID();
}

/*
 * raid5_backup_metadata
 *
 * Called to write metadata backup.
 */
int raid5_backup_metadata(storage_object_t *region)
{
	md_volume_t *volume;
	int rc=0;
	
	my_plugin = linear_plugin;
	LOG_ENTRY();


	volume = region->private_data;
	if (volume->flags & MD_CORRUPT) {
		rc = ENOSYS;
		goto out;
	}
	volume->commit_flag |= MD_COMMIT_BACKUP_METADATA;
	volume->commit_flag |= MD_COMMIT_DONT_CHECK_ACTIVE;
	volume->flags |= MD_DIRTY;
	rc = md_write_sbs_to_disk(volume);
	volume->commit_flag &= ~MD_COMMIT_BACKUP_METADATA;
	volume->commit_flag &= ~MD_COMMIT_DONT_CHECK_ACTIVE;

out:
	LOG_EXIT_INT(rc);
	return rc;
}


static void raid5_plugin_cleanup(void) {

	int rc;
	list_anchor_t raid5_regions = NULL;
	list_element_t li;
	storage_object_t *region;
	md_volume_t *vol;
	kill_sectors_t *killsect;

	my_plugin = raid5_plugin;
	LOG_ENTRY();

	rc = EngFncs->get_object_list(REGION, DATA_TYPE, raid5_plugin, NULL, 0, &raid5_regions);

	if (rc == 0) {
		
		LIST_FOR_EACH(raid5_regions, li, region) {
			free_region(region);
		}

		if (raid5_expand_shrink_list) {
			LIST_FOR_EACH(raid5_expand_shrink_list, li, vol) {
				LOG_WARNING("Hmm... Cleaning up %s.\n", vol->name);
				raid5_free_private_data(vol);
				md_free_volume(vol);
			}
	
			EngFncs->destroy_list(raid5_expand_shrink_list);
		}

		if (raid5_delay_kill_sector_list) {
			LIST_FOR_EACH(raid5_delay_kill_sector_list, li, killsect) {
				LOG_CRITICAL("Hmm... Found delayed kill sector (LSN:%"PRIu64", count:%"PRIu64").\n",
				     killsect->lsn, killsect->count);
				EngFncs->engine_free(killsect);
			}
			EngFncs->destroy_list(raid5_delay_kill_sector_list);
		}

		EngFncs->destroy_list(raid5_regions);
	}

	LOG_EXIT_VOID();
}



/* Function tables for the MD Region Manager */
static plugin_functions_t raid5_functions =
{
	setup_evms_plugin        : raid5_setup_evms_plugin,
	cleanup_evms_plugin      : raid5_plugin_cleanup,
	can_delete               : raid5_can_delete,
	can_expand               : raid5_can_expand,
	can_shrink               : raid5_can_shrink,
	can_replace_child        : raid5_can_replace_child,
	discover                 : raid5_discover,
	create                   : raid5_create,
	delete                   : raid5_delete,
	discard                  : raid5_discard,
	expand                   : raid5_expand,
	shrink                   : raid5_shrink,
	replace_child            : raid5_replace_child,
	add_sectors_to_kill_list : raid5_add_sectors_to_kill_list,
	commit_changes           : raid5_commit_changes,
	can_activate             : raid5_can_activate_region,
	activate                 : raid5_activate_region,
	can_deactivate           : raid5_can_deactivate_region,
	deactivate               : raid5_deactivate_region,
	get_option_count         : raid5_get_option_count,
	init_task                : raid5_init_task,
	set_option               : raid5_set_option,
	set_objects              : raid5_set_objects,
	get_info                 : raid5_get_info,
	get_plugin_info          : raid5_get_plugin_info,
	read                     : raid5_read,
	write                    : raid5_write,
	get_plugin_functions     : raid5_get_plugin_functions,
	plugin_function          : raid5_plugin_function,
	backup_metadata          : raid5_backup_metadata
};



/* Function: PluginInit
 *
 *  Initializes the local plugin record
 */

plugin_record_t raid5_plugin_record = {
	.id         = EVMS_MD_RAID5_PLUGIN_ID,
	.version    = {.major = MAJOR_VERSION, .minor = MINOR_VERSION,.patchlevel = PATCH_LEVEL},
	.required_engine_api_version = {.major = 15, .minor = 0,. patchlevel = 0},
	.required_plugin_api_version = {.plugin = {.major = 13,.minor = 0,.patchlevel = 0}},
	.short_name = EVMS_MD_RAID5_PLUGIN_SHORT_NAME,
	.long_name  = EVMS_MD_RAID5_PLUGIN_LONG_NAME,
	.oem_name   = EVMS_IBM_OEM_NAME,
	.functions  = {.plugin = &raid5_functions},
	.container_functions = NULL
};
