/*
 *   (C) Copyright IBM Corp. 2004
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * Module: LVM2 Plugin
 * File: evms2/engine/plugins/lvm2/metadata.c
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <plugin.h>
#include "lvm2.h"

/**
 * Allowable status flags for the LVM2 metadata. These name definitions are
 * from lib/format_text/flags.c in the LVM2 source package.
 **/

typedef struct flags {
	char *name;
	unsigned long value;
} flags_t;

static flags_t vg_flags[] = {
	{ "EXPORTED",		LVM2_CONTAINER_FLAG_EXPORTED },
	{ "RESIZEABLE",		LVM2_CONTAINER_FLAG_RESIZEABLE },
	{ "PARTIAL",		LVM2_CONTAINER_FLAG_PARTIAL },
	{ "PVMOVE",		LVM2_CONTAINER_FLAG_PVMOVE },
	{ "READ",		LVM2_CONTAINER_FLAG_READ },
	{ "WRITE",		LVM2_CONTAINER_FLAG_WRITE },
	{ "CLUSTERED",		LVM2_CONTAINER_FLAG_CLUSTERED },
	{ "SHARED",		LVM2_CONTAINER_FLAG_SHARED },
	{ NULL,			0 }
};

static flags_t pv_flags[] = {
	{ "ALLOCATABLE",	LVM2_PV_FLAG_ALLOCATABLE },
	{ "EXPORTED",		LVM2_PV_FLAG_EXPORTED },
	{ NULL,			0 }
};

static flags_t lv_flags[] = {
	{ "READ",		LVM2_REGION_FLAG_READ },
	{ "WRITE",		LVM2_REGION_FLAG_WRITE },
	{ "FIXED_MINOR",	LVM2_REGION_FLAG_FIXED_MINOR },
	{ "VISIBLE",		LVM2_REGION_FLAG_VISIBLE },
	{ "PVMOVE",		LVM2_REGION_FLAG_PVMOVE },
	{ "LOCKED",		LVM2_REGION_FLAG_LOCKED },
	{ NULL,			0 }
};

static flags_t *metadata_flags[] = {
	vg_flags,
	pv_flags,
	lv_flags,
	NULL
};

/**
 * read_flags
 *
 * Convert the text strings from the status entries in the VGDA into the
 * appropriate flag values.
 **/
void read_flags(key_value_t *node, flag_type_t type, unsigned long *flags)
{
	flags_t *fl;
	int i, j, rc;

	LOG_ENTRY();

	fl = metadata_flags[type];

	for (i = 0; i < node->value.array->count; i++) {
		for (j = 0; fl[j].name; j++) {
			rc = strcmp(node->value.array->strings[i], fl[j].name);
			if (!rc) {
				*flags |= fl[j].value;
				break;
			}
		}
	}

	LOG_EXIT_VOID();
}

/**
 * write_flags
 *
 * Determine which flags are set and add the appropriate string(s) to the
 * text VGDA buffer.
 **/
int write_flags(unsigned long flags, flag_type_t type, char **buffer,
		unsigned long *offset, unsigned long *size)
{
	flags_t *fl;
	char buf[128];
	boolean first = TRUE;
	int i, rc = 0;

	LOG_ENTRY();

	fl = metadata_flags[type];

	for (i = 0; fl[i].name; i++) {
		if (flags & fl[i].value) {
			snprintf(buf, 128, "%s\"%s\"",
				 first ? "" : ", ", fl[i].name);
			WRITE_STRING(buf, buffer, offset, size, rc);
			first = FALSE;
		}
	}

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * lvm2_calc_crc
 *
 * Not sure if the LVM crc routines are compatible with the EVMS routines,
 * so duplicating the LVM2 versions here.
 **/
u_int32_t lvm2_calc_crc(u_int32_t initial, void *buffer, u_int32_t size)
{
	static const u_int32_t lvm2_crc_table[] = {
		0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
		0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
		0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
		0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
	};
	u_int32_t i, crc = initial;
	u_int8_t *data = (uint8_t *) buffer;

	for (i = 0; i < size; i++) {
		crc ^= *data++;
		crc = (crc >> 4) ^ lvm2_crc_table[crc & 0xf];
		crc = (crc >> 4) ^ lvm2_crc_table[crc & 0xf];
	}
	return crc;
}

/**
 * format_uuid and unformat_uuid
 *
 * LVM2 UUIDs are 32-character strings. When they are formatted in the
 * text-based VGDA, they are written in groups of characters separated
 * by dashes. The first group is 6 characters, followed by 5 groups of
 * 4 characters, followed by another group of 6 characters.
 *
 * These routines convert between the formatted and unformatted versions.
 **/

void format_uuid(char *unformatted, char *formatted)
{
	int i = 0, j = 0;

	LOG_ENTRY();

	while (unformatted[i]) {
		if (j == 6 || j == 11 || j == 16 ||
		    j == 21 || j == 26 || j == 31) {
			formatted[j++] = '-';
		}
		formatted[j++] = unformatted[i++];
	}
	formatted[j] = '\0';

	LOG_EXIT_VOID();
}

void unformat_uuid(char *formatted, char *unformatted)
{
	int i = 0, j = 0;

	LOG_ENTRY();

	while (formatted[i]) {
		if (formatted[i] == '-')
			i++;
		unformatted[j++] = formatted[i++];
	}
	unformatted[j] = '\0';

	LOG_EXIT_VOID();
}

/**
 * create_new_uuid
 *
 * Create a new UUID by reading characters from /dev/urandom.
 **/

static char uuid_chars[] =
	"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";

int create_new_uuid(char *new_uuid)
{
	int fd, i, rc;

	LOG_ENTRY();

	fd = open("/dev/urandom", O_RDONLY);
	if (fd < 0) {
		LOG_ERROR("Error opening /dev/urandom.\n");
		rc = errno;
		goto out;
	}

	rc = read(fd, new_uuid, LVM2_UUID_LEN);
	if (rc != LVM2_UUID_LEN) {
		LOG_ERROR("Error reading from /dev/urandom.\n");
		close(fd);
		rc = EINVAL;
		goto out;
	}
	close(fd);

	for (i = 0; i < LVM2_UUID_LEN; i++)
		new_uuid[i] = uuid_chars[new_uuid[i] % (sizeof(uuid_chars)-1)];
	new_uuid[i] = '\0';

	LOG_DEBUG("Created new UUID %s.\n", new_uuid);
	rc = 0;

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * Endian conversion routines for the PV metadata structures.
 **/

static void endian_convert_pv_label(label_header_t *pv_label)
{
	LOG_ENTRY();

	pv_label->sector = DISK_TO_CPU64(pv_label->sector);
	pv_label->crc = DISK_TO_CPU32(pv_label->crc);
	pv_label->offset = DISK_TO_CPU32(pv_label->offset);

	LOG_EXIT_VOID();
}

static void endian_convert_pv_header(pv_header_t *pv_header)
{
	disk_location_t *disk_area;

	LOG_ENTRY();

	pv_header->device_size = DISK_TO_CPU64(pv_header->device_size);

	disk_area = pv_header->disk_areas;
	while (disk_area->offset) {
		disk_area->offset = DISK_TO_CPU64(disk_area->offset);
		disk_area->size = DISK_TO_CPU64(disk_area->size);
		disk_area++;
	}

	disk_area++;
	while (disk_area->offset) {
		disk_area->offset = DISK_TO_CPU64(disk_area->offset);
		disk_area->size = DISK_TO_CPU64(disk_area->size);
		disk_area++;
	}

	LOG_EXIT_VOID();
}

static void endian_convert_mda_header(mda_header_t *mda_header)
{
	raw_location_t *raw_location;

	LOG_ENTRY();

	mda_header->checksum = DISK_TO_CPU32(mda_header->checksum);
	mda_header->version = DISK_TO_CPU32(mda_header->version);
	mda_header->start = DISK_TO_CPU64(mda_header->start);
	mda_header->size = DISK_TO_CPU64(mda_header->size);

	raw_location = &mda_header->raw_locations[0];
	while (raw_location->offset) {
		raw_location->offset = DISK_TO_CPU64(raw_location->offset);
		raw_location->size = DISK_TO_CPU64(raw_location->size);
		raw_location->checksum = DISK_TO_CPU32(raw_location->checksum);
		raw_location++;
	}

	LOG_EXIT_VOID();
}

/**
 * alloc_metadata_location
 *
 * Allocate and initialize a metadata_location structure.
 **/
static metadata_location_t *alloc_metadata_location(u_int64_t start,
						    u_int64_t size)
{
	metadata_location_t *location;

	LOG_ENTRY();

	location = EngFncs->engine_alloc(sizeof(*location));
	if (location) {
		location->mda_start = start;
		location->mda_size = size;
	}

	LOG_EXIT_PTR(location);
	return location;
}

/**
 * get_data_and_metadata_areas
 *
 * Read through the disk_area lists at the end of the pv_header. Create
 * lists of metadata_location_t structures to represent the information
 * from the pv_header.
 **/
static int get_data_and_metadata_areas(pv_header_t *pv_header,
				       list_anchor_t *data_locations,
				       list_anchor_t *metadata_locations)
{
	disk_location_t *disk_areas;
	list_anchor_t list, data, metadata;
	metadata_location_t *location;
	int i, rc = 0;

	LOG_ENTRY();

	data = EngFncs->allocate_list();
	metadata = EngFncs->allocate_list();
	if (!data || !metadata) {
		rc = ENOMEM;
		goto out;
	}

	disk_areas = &pv_header->disk_areas[0];
	list = data;
	for (i = 0; i < 2; i++) {
		while (disk_areas->offset) {
			location = alloc_metadata_location(disk_areas->offset >>
							    EVMS_VSECTOR_SIZE_SHIFT,
							   disk_areas->size >>
							    EVMS_VSECTOR_SIZE_SHIFT);
			if (!location) {
				rc = ENOMEM;
				goto out;
			}
			EngFncs->insert_thing(list, location, INSERT_AFTER, NULL);
			disk_areas++;
		}
		disk_areas++;
		list = metadata;
	}

	*data_locations = data;
	*metadata_locations = metadata;

out:
	if (rc) {
		EngFncs->destroy_list(data);
		EngFncs->destroy_list(metadata);
	}
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * check_pv_size_write_pv_label
 *
 * Update the PV-header's device size and write the label and header to disk.
 **/
static int check_pv_size_write_pv_label(storage_object_t *object,
					label_header_t *pv_label,
					pv_header_t *pv_header)
{
	disk_location_t *disk_area = pv_header->disk_areas;
	u_int32_t crc;
	int rc = 0;

	LOG_ENTRY();

	/* Update the device_size. */
	pv_header->device_size = object->size << EVMS_VSECTOR_SIZE_SHIFT;
	while (disk_area->offset) {
		disk_area++;
	}
	disk_area++;
	pv_header->device_size -= disk_area->offset;
	while (disk_area->offset) {
		pv_header->device_size -= disk_area->size;
		disk_area++;
	}

	/* Convert to on-disk format. */
	endian_convert_pv_label(pv_label);
	endian_convert_pv_header(pv_header);

	/* Calculate the new CRC. */
	crc = lvm2_calc_crc(LVM2_INITIAL_CRC, &pv_label->offset,
			    EVMS_VSECTOR_SIZE -
			    ((void*)&pv_label->offset - (void*)pv_label));
	pv_label->crc = CPU_TO_DISK32(crc);

	/* Write the label and header to disk. */
	rc = WRITE(object, pv_label->sector, 1, pv_label);

	/* Convert back to CPU format. */
	endian_convert_pv_label(pv_label);
	endian_convert_pv_header(pv_header);

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * check_pv_size_prompt
 *
 * If the device sizes don't match, ask the user whether this object is really
 * an LVM2 PV. If it is, ask them if they want to rewrite the metadata to
 * reflect the correct device size.
 **/
static int check_pv_size_prompt(storage_object_t *object,
				label_header_t *pv_label,
				pv_header_t *pv_header,
				sector_count_t total)
{
	char *choices1[] = { _("No, it is not a PV."),
			     _("Yes, it is a PV."), NULL };
	char *choices2[] = { _("No, do not update the metadata."),
			     _("Yes, update the metadata."), NULL };
	int answer = 0, rc = 0;
	char size1[20], size2[20];

	LOG_ENTRY();

	snprintf(size1, 20, "%"PRIu64, total);
	snprintf(size2, 20, "%"PRIu64, object->size);

	QUESTION(&answer, choices1,
		 _("Object %s has an LVM2 PV label and header, but the recorded "
		   "size of the object (%s sectors) does not match the actual "
		   "size (%s sectors). Please indicate whether or not %s is an "
		   "LVM2 PV.\n\n"

		   "If your container includes an MD RAID region, it's "
		   "possible that LVM2 has found the PV label on one of that "
		   "region's child objects instead of on the MD region itself. "
		   "If this is the case, then object %s is most likely NOT one "
		   "of the LVM2 PVs.\n\n"

		   "Choosing \"no\" here is the default, and is always safe, "
		   "since no changes will be made to your configuration. "
		   "Choosing \"yes\" will modify your configuration, and will "
		   "cause problems if it's not the correct choice. The only "
		   "time you would really need to choose \"yes\" here is if "
		   "you are converting an existing container from using the "
		   "LVM2 tools to using EVMS, and the container is NOT created "
		   "from an MD RAID region. If you created and manage your "
		   "containers only with EVMS, you should always be able to "
		   "answer \"no\".\n\n"

		   "If you answer \"no\" and your volumes are correctly "
		   "discovered and activated, you may disable this message in "
		   "the future by editing the EVMS config file and setting the "
		   "device_size_prompt option to \"no\" in the lvm2 section."),
		 object->name, size1, size2, object->name, object->name);
	if (!answer) {
		rc = EINVAL;
		goto out;
	}

	answer = 0;
	QUESTION(&answer, choices2,
		 _("Would you like to update the on-disk metadata to record "
		   "the correct size of the PV %s?"), object->name);
	if (answer) {
		check_pv_size_write_pv_label(object, pv_label, pv_header);
	}

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * check_pv_size
 *
 * Compare the size of the object with the sizes of the data and metadata
 * areas in the PV metadata. Add up the device_size from the PV-header, the
 * offset of the first metadata area, and the sizes of all the metadata
 * areas.
 *
 * The LVM2 tools don't seem to treat the device_size field as a constant.
 * Each time a vgremove is performed, the PVs from that group have their
 * device_size decremented by some amount. So, if the size check fails, we
 * need to ask the user if this object is an LVM2 PV or not. We'll also add
 * a config option to turn off this message.
 **/
static int check_pv_size(storage_object_t *object,
			 label_header_t *pv_label,
			 pv_header_t *pv_header,
			 list_anchor_t metadata_locations)
{
	metadata_location_t *location;
	list_element_t iter;
	sector_count_t total;
	int rc = 0;

	LOG_ENTRY();

	location = EngFncs->first_thing(metadata_locations, NULL);

	total = pv_header->device_size >> EVMS_VSECTOR_SIZE_SHIFT;
	total += location->mda_start;
	LIST_FOR_EACH(metadata_locations, iter, location) {
		total += location->mda_size;
	}

	if (total != object->size) {
		if (device_size_prompt) {
			rc = check_pv_size_prompt(object, pv_label,
						  pv_header, total);
		} else {
			LOG_WARNING("Object %s recorded size (%"PRIu64") does "
				    "not match actual size (%"PRIu64")\n",
				    object->name, total, object->size);
			rc = EINVAL;
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * read_pv_label
 *
 * Search for an LVM2 label on the specified object. The label can be located
 * in one of the first four sectors on the object. Use the provided buffer for
 * the I/O. If a label is found, verify the fields. The returned pv_label will
 * be a pointer to a location inside the provided buffer.
 **/
static int read_pv_label(storage_object_t *object,
			 void *buffer,
			 label_header_t **pv_label)
{
	label_header_t *label;
	u_int32_t crc;
	int sector, rc;

	LOG_ENTRY();

	/* The label can be anywhere in the first four sectors, so read
	 * that entire area and then scan for the label.
	 */
	rc = READ(object, 0, LVM2_LABEL_SCAN_SECTORS, buffer);
	if (rc) {
		LOG_ERROR("I/O error reading label area on "
			  "object %s.\n", object->name);
		goto out;
	}

	for (sector = 0; sector < LVM2_LABEL_SCAN_SECTORS; sector++) {
		label = buffer + (sector << EVMS_VSECTOR_SIZE_SHIFT);

		/* Check for the LABELONE identifier. */
		rc = memcmp(label->id, LVM2_LABEL_ID, sizeof(label->id));
		if (rc) {
			LOG_DEBUG("No LVM2 label found on object %s, "
				  "sector %u\n", object->name, sector);
			rc = EINVAL;
			continue;
		}

		/* Verify the CRC from the label. Must do this before
		 * endian-converting the label.
		 */
		crc = lvm2_calc_crc(LVM2_INITIAL_CRC, &label->offset,
				    EVMS_VSECTOR_SIZE -
				    ((void*)&label->offset - (void*)label));
		if (DISK_TO_CPU32(label->crc) != crc) {
			LOG_WARNING("Found LVM2 label on object %s, sector %u "
				    "with wrong CRC.\n", object->name, sector);
			LOG_WARNING("Found CRC %u, expecting %u\n",
				    crc, DISK_TO_CPU32(label->crc));
			rc = EINVAL;
			continue;
		}

		endian_convert_pv_label(label);

		/* Make sure this label is in the correct sector. */
		if (label->sector != sector) {
			LOG_WARNING("Found LVM2 label on object %s at wrong "
				    "sector.\n", object->name);
			LOG_WARNING("Found at sector %u, expected at "
				    "%"PRIu64"\n", sector, label->sector);
			rc = EINVAL;
			continue;
		}

		/* Verify the label type. */
		rc = memcmp(label->type, LVM2_LABEL_TYPE, sizeof(label->type));
		if (rc) {
			LOG_WARNING("Found LVM2 label on object %s, sector %u "
				    "with wrong type.\n", object->name, sector);
			rc = EINVAL;
			continue;
		}

		/* Found a valid label. */
		LOG_DETAILS("Found LVM2 label on object %s, sector %u.\n",
			    object->name, sector);
		*pv_label = label;
		break;
	}

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * read_pv_header
 *
 * Validate the PV-header that follows the PV-label.
 **/
static int read_pv_header(storage_object_t *object,
			  label_header_t *pv_label,
			  pv_header_t **pv_header,
			  list_anchor_t *data_locations,
			  list_anchor_t *metadata_locations)
{
	int rc;

	LOG_ENTRY();

	*data_locations = NULL;
	*metadata_locations = NULL;

	/* The offset in the PV label points to the PV header. */
	*pv_header = (void*)pv_label + pv_label->offset;
	endian_convert_pv_header(*pv_header);

	/* Get the lists of data and metadata locations. */
	rc = get_data_and_metadata_areas(*pv_header, data_locations,
					 metadata_locations);
	if (rc) {
		LOG_ERROR("Error getting lists of data and metadata locations "
			  "on object %s.\n", object->name);
		goto out;
	}

	/* Must have at least one metadata area and one data area.
	 * (FIXME: Can we handle zero metadata areas?)
	 */
	if (EngFncs->list_count(*metadata_locations) < 1 ||
	    EngFncs->list_count(*data_locations) < 1) {
		LOG_WARNING("No VG metadata areas found on object %s.\n",
			    object->name);
		rc = EINVAL;
		goto out;
	}

	/* Check the object size against the recorded size. */
	rc = check_pv_size(object, pv_label, *pv_header, *metadata_locations);

out:
	if (rc && *data_locations) {
		EngFncs->destroy_list(*data_locations);
		EngFncs->destroy_list(*metadata_locations);
		*data_locations = NULL;
		*metadata_locations = NULL;
	}
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * read_mda_header
 *
 * Read the MDA header from disk and validate the fields.
 **/
static int read_mda_header(storage_object_t *object,
			   void *buffer,
			   list_anchor_t metadata_locations,
			   mda_header_t **mda_header)
{
	mda_header_t *header = buffer;
	metadata_location_t *location;
	list_element_t iter;
	lba_t sector;
	u_int32_t crc;
	int rc = 0;

	LOG_ENTRY();

	*mda_header = NULL;

	/* Loop through all metadata areas looking for valid MDA headers. */
	LIST_FOR_EACH(metadata_locations, iter, location) {
		sector = location->mda_start;
		rc = READ(object, sector, 1, buffer);
		if (rc) {
			LOG_ERROR("I/O error reading MDA header on object "
				  "%s, sector %"PRIu64".\n", object->name, sector);
			continue;
		}

		/* Verify the CRC from the header. Must do this before
		 * endian-converting the header.
		 */
		crc = lvm2_calc_crc(LVM2_INITIAL_CRC, header->magic,
				    LVM2_MDA_HEADER_SIZE - sizeof(header->checksum));
		if (DISK_TO_CPU32(header->checksum) != crc) {
			LOG_WARNING("Found MDA header on object %s, sector %"PRIu64
				    " with wrong CRC.\n", object->name, sector);
			LOG_WARNING("Found CRC %u, expecting %u\n",
				    crc, DISK_TO_CPU32(header->checksum));
			rc = EINVAL;
			continue;
		}

		endian_convert_mda_header(header);

		/* Validate the remaining fields in the MDA header. */
		rc = memcmp(header->magic, LVM2_FMTT_MAGIC, sizeof(header->magic));
		if (rc) {
			LOG_WARNING("Found MDA header on object %s, sector %"PRIu64
				    " with wrong magic number.\n", object->name, sector);
			rc = EINVAL;
			continue;
		}

		if (header->version != LVM2_FMTT_VERSION) {
			LOG_WARNING("Found MDA header on object %s, sector %"PRIu64
				    " with wrong version.\n", object->name, sector);
			LOG_WARNING("Found version %u, expecting %u\n",
				    header->version, LVM2_FMTT_VERSION);
			rc = EINVAL;
			continue;
		}

		if (header->start != location->mda_start << EVMS_VSECTOR_SIZE_SHIFT) {
			LOG_WARNING("Found MDA header on object %s, sector %"PRIu64
				    " with wrong starting sector.\n", object->name, sector);
			rc = EINVAL;
			continue;
		}

		if (header->size != location->mda_size << EVMS_VSECTOR_SIZE_SHIFT) {
			LOG_WARNING("Found MDA header on object %s, sector %"PRIu64
				    " with wrong size.\n", object->name, sector);
			rc = EINVAL;
			continue;
		}

		/* Make a copy of the first valid mda-header to return. */
		if (!*mda_header) {
			*mda_header = EngFncs->engine_alloc(EVMS_VSECTOR_SIZE);
			if (!*mda_header) {
				rc = ENOMEM;
				break;
			}
			memcpy(*mda_header, header, EVMS_VSECTOR_SIZE);
		}

		/* Fill in the metadata_location's remaining fields. */
		location->vgda_offset = header->raw_locations[0].offset >> EVMS_VSECTOR_SIZE_SHIFT;
		location->vgda_size = header->raw_locations[0].size;

		LOG_DETAILS("Found MDA header on object %s, sector %"PRIu64"\n",
			    object->name, sector);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * read_vg_metadata
 *
 * Read the text portion of the VG metadata. Then parse the text into a
 * tree for easier processing.
 **/
static int read_vg_metadata(storage_object_t *object,
			    mda_header_t *mda_header,
			    key_value_t **vgda_tree)
{
	raw_location_t *vg_location = &mda_header->raw_locations[0];
	unsigned long buffer_size;
	sector_count_t count;
	lba_t sector;
	u_int32_t crc;
	void *buffer = NULL;
	key_value_t *tree;
	int rc;

	LOG_ENTRY();

	/* Make sure there's a VG metadata area to read. A VG-location
	 * with a zero offset indicates an "orphan" PV.
	 */
	if (!vg_location->offset || !vg_location->size) {
		LOG_WARNING("Object %s is an orphan PV.\n", object->name);
		rc = EINVAL;
		goto out;
	}

	/* Allocate a buffer to read in the VG metadata. */
	buffer_size = round_up(vg_location->size, EVMS_VSECTOR_SIZE);
	buffer = EngFncs->engine_alloc(buffer_size);
	if (!buffer) {
		LOG_ERROR("Error allocating buffer to read VG metadata on "
			  "object %s.\n", object->name);
		rc = ENOMEM;
		goto out;
	}

	/* Read the metadata from disk. */
	sector = (mda_header->start + vg_location->offset) >> EVMS_VSECTOR_SIZE_SHIFT;
	count = buffer_size >> EVMS_VSECTOR_SIZE_SHIFT;
	rc = READ(object, sector, count, buffer);
	if (rc) {
		LOG_ERROR("I/O error reading VG metadata on object %s, "
			  "sector %"PRIu64".\n", object->name, sector);
		goto out;
	}

	/* Calculate the CRC of this buffer. */
	crc = lvm2_calc_crc(LVM2_INITIAL_CRC, buffer, vg_location->size);
	if (vg_location->checksum != crc) {
		LOG_WARNING("Found VG metadata on object %s, sector %"PRIu64
			    " with wrong CRC.\n", object->name, sector);
		LOG_WARNING("Found CRC %u, expecting %u\n",
			    crc, vg_location->checksum);
		rc = EINVAL;
		goto out;
	}

	/* Parse the text-data from the buffer into
	 * a tree representing the VGDA.
	 */
	tree = parse_vg_metadata(buffer);
	if (!tree) {
		LOG_WARNING("Error parsing the VG metadata on object %s.\n",
			    object->name);
		rc = EINVAL;
		goto out;
	}

	*vgda_tree = tree;

out:
	EngFncs->engine_free(buffer);
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * parse_pv_info
 **/
int parse_pv_info(key_value_t *pv_entry,
		  char *pv_uuid,
		  u_int32_t *pv_index,
		  u_int64_t *pe_start,
		  u_int64_t *pe_count,
		  unsigned long *pv_flags)
{
	key_value_t *node;
	int rc;

	LOG_ENTRY();

	/* The key for the PV-entry node is the name of the PV,
	 * which contains the index for the PV.
	 */
	rc = sscanf(pv_entry->key, "pv%u", pv_index);
	if (rc != 1) {
		LOG_ERROR("Invalid PV name: %s\n", pv_entry->key);
		rc = EINVAL;
		goto out;
	}

	/* Get the PV UUID. */
	node = find_key(get_section(pv_entry), "id");
	if (!node) {
		LOG_ERROR("Parse error finding \"id\" entry for PV %s "
			  "in VGDA!\n", pv_entry->key);
		rc = EINVAL;
		goto out;
	}
	unformat_uuid(node->value.string, pv_uuid);

	/* Get the PE start and count. */
	node = find_key(get_section(pv_entry), "pe_start");
	if (!node) {
		LOG_ERROR("Parse error finding \"pe_start\" entry for PV %s "
			  "in VGDA!\n", pv_entry->key);
		rc = EINVAL;
		goto out;
	}
	*pe_start = read_int64(node);

	node = find_key(get_section(pv_entry), "pe_count");
	if (!node) {
		LOG_ERROR("Parse error finding \"pe_count\" entry for PV %s "
			  "in VGDA!\n", pv_entry->key);
		rc = EINVAL;
		goto out;
	}
	*pe_count = read_int64(node);

	/* Get the status flags. */
	node = find_key(get_section(pv_entry), "status");
	if (!node) {
		LOG_ERROR("Parse error finding \"status\" entry for PV %s "
			  "in VGDA!\n", pv_entry->key);
		rc = EINVAL;
		goto out;
	}
	read_flags(node, PV_FLAGS, pv_flags);

	rc = 0;

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * find_pv_info_in_vgda
 *
 * Search the VGDA tree for information about the PV with the specified UUID.
 **/
static int find_pv_info_in_vgda(key_value_t *vgda_tree,
				char *pv_uuid,
				u_int32_t *pv_index,
				u_int64_t *pe_start,
				u_int64_t *pe_count,
				unsigned long *pv_flags)
{
	key_value_t *pv_section, *pv_entry, *node;
	char this_uuid[LVM2_UUID_LEN+1];
	int rc;

	LOG_ENTRY();

	/* Find this PV's information in the VGDA tree. */
	pv_section = find_key(get_section(vgda_tree), "physical_volumes");
	if (!pv_section) {
		LOG_ERROR("Parse error finding \"physical_volumes\" "
			  "section in VGDA!\n");
		rc = EINVAL;
		goto out;
	}

	/* Find the UUID node for each PV entry, and compare with the
	 * specified UUID.
	 */
	for (pv_entry = get_section(pv_section);
	     pv_entry; pv_entry = pv_entry->next) {
		node = find_key(get_section(pv_entry), "id");
		if (!node) {
			LOG_ERROR("Parse error finding \"id\" entry for PV "
				  "%s in VGDA!\n", pv_entry->key);
			continue;
		}

		unformat_uuid(node->value.string, this_uuid);

		rc = memcmp(this_uuid, pv_uuid, LVM2_UUID_LEN);
		if (!rc) {
			/* Found the pv_entry for this PV. */
			break;
		}
	}

	if (!pv_entry) {
		LOG_ERROR("Did not find PV entry in VGDA.\n");
		rc = EINVAL;
		goto out;
	}

	rc = parse_pv_info(pv_entry, this_uuid, pv_index,
			   pe_start, pe_count, pv_flags);

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * read_pv_metadata
 *
 * Read all the LVM2 metadata from this object and determine if it is a valid
 * LVM2 PV. If so, allocate a pv_data for the object and initialize with
 * information from the metadata.
 **/
int read_pv_metadata(storage_object_t *object)
{
	void *label_buffer = NULL, *mda_buffer = NULL;
	list_anchor_t data_locations = NULL, metadata_locations = NULL;
//	metadata_location_t *data_location;
	label_header_t *pv_label;
	pv_header_t *pv_header;
	mda_header_t *mda_header = NULL;
	key_value_t *vgda_tree;
	u_int32_t pv_index;
	u_int64_t pe_start, pe_count;
	unsigned long pv_flags = 0;
	int rc;

	LOG_ENTRY();

	/* Allocate buffers to read the various metadata sections. */
	label_buffer = EngFncs->engine_alloc(LVM2_LABEL_SCAN_SIZE);
	mda_buffer = EngFncs->engine_alloc(LVM2_MDA_HEADER_SIZE);
	if (!label_buffer || !mda_buffer) {
		LOG_ERROR("Error allocating I/O buffers to read PV metadata "
			  "on object %s.\n", object->name);
		rc = ENOMEM;
		goto out;
	}

	/* Check for an LVM2 label. */
	rc = read_pv_label(object, label_buffer, &pv_label);
	if (rc) {
		goto out;
	}

	/* Check for an LVM2 PV-header. */
	rc = read_pv_header(object, pv_label, &pv_header,
			    &data_locations, &metadata_locations);
	if (rc) {
		goto out;
	}

	/* Check for an MDA header. */
	rc = read_mda_header(object, mda_buffer,
			     metadata_locations, &mda_header);
	if (rc) {
		EngFncs->destroy_list(metadata_locations);
		goto out;
	}

	/* Read and parse the VG descriptor within the metadata-area. */
	rc = read_vg_metadata(object, mda_header, &vgda_tree);
	if (rc) {
		EngFncs->destroy_list(metadata_locations);
		goto out;
	}

	/* Find this PV's information in the VGDA tree. */
	rc = find_pv_info_in_vgda(vgda_tree, pv_header->pv_uuid,
				  &pv_index, &pe_start, &pe_count, &pv_flags);
	if (rc) {
		EngFncs->destroy_list(metadata_locations);
		goto out;
	}

/* Removing this comparison. LVM1 containers that have been converted to LVM2
 * using the LVM2 tools do not necessarily pass this check.
 */
#if 0
	/* Compare the pe_start and the data_location start. */
	data_location = EngFncs->first_thing(data_locations, NULL);
	if (data_location->mda_start != pe_start) {
		LOG_ERROR("Object %s: conflicting starting location for "
			  "data-area.\n", object->name);
		LOG_ERROR("PV header: %"PRIu64".  PE_Start: %"PRIu64"\n",
			  data_location->mda_start, pe_start);
		EngFncs->destroy_list(metadata_locations);
		rc = EINVAL;
		goto out;
	}
#endif

	/* Allocate private data for this PV object. */
	rc = allocate_pv_data(object, metadata_locations, vgda_tree,
			      pv_header->pv_uuid, pv_label->sector,
			      pv_index, pe_start, pe_count, pv_flags);
	if (rc) {
		EngFncs->destroy_list(metadata_locations);
		goto out;
	}

out:
	EngFncs->engine_free(label_buffer);
	EngFncs->engine_free(mda_buffer);
	EngFncs->engine_free(mda_header);
	EngFncs->destroy_list(data_locations);
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * write_mda_header
 *
 * Write an MDA header to this PV-object. A new header will not have any of
 * its "raw_locations" information filled in.
 **/
int write_mda_header(storage_object_t *object, boolean new_header,
		     boolean resized_headers_only, boolean backup)
{
	pv_data_t *pv_data = object->consuming_private_data;
	metadata_location_t *location;
	mda_header_t *header = NULL;
	list_element_t iter;
	u_int32_t crc;
	int rc = 0;

	LOG_ENTRY();
	LOG_DEBUG("Writing MDA headers for object %s.\n", object->name);

	if (MISSING_PV(object)) {
		LOG_DEBUG("Skipping missing object %s.\n", object->name);
		rc = 0;
		goto out;
	}

	/* Allocate a buffer for writing the MDA header to disk. */
	header = EngFncs->engine_alloc(LVM2_MDA_HEADER_SIZE);
	if (!header) {
		rc = ENOMEM;
		goto out;
	}

	/* Initialize the MDA header. */
	memcpy(header->magic, LVM2_FMTT_MAGIC, sizeof(header->magic));
	header->version = LVM2_FMTT_VERSION;

	/* There may be multiple MDA headers to write. */
	LIST_FOR_EACH(pv_data->metadata_areas, iter, location) {

		/* If we're writing new MDA headers after a PV-resize,
		 * only write MDA headers that follow the data area.
		 */
		if (resized_headers_only &&
		    location->mda_start < pv_data->pe_start) {
			continue;
		}

		header->start = location->mda_start << EVMS_VSECTOR_SIZE_SHIFT;
		header->size = location->mda_size << EVMS_VSECTOR_SIZE_SHIFT;

		if (!new_header) {
			/* These fields are calculated when writing the VGDA. */
			header->raw_locations[0].offset = location->vgda_offset << EVMS_VSECTOR_SIZE_SHIFT;
			header->raw_locations[0].size = location->vgda_size;
			header->raw_locations[0].checksum = location->vgda_crc;
		}

		/* Convert to on-disk format. */
		endian_convert_mda_header(header);

		/* Calculate the CRC. */
		crc = lvm2_calc_crc(LVM2_INITIAL_CRC, header->magic,
				    LVM2_MDA_HEADER_SIZE - sizeof(header->checksum));
		header->checksum = CPU_TO_DISK32(crc);

		/* Write the header to disk. */
		if (backup) {
			rc = EngFncs->save_metadata(object->consuming_container->name,
						    object->name,
						    location->mda_start, 1, header);
		} else {
			rc = WRITE(object, location->mda_start, 1, header);
		}
		if (rc) {
			/* FIXME: Should we keep going or bail out??? */
			goto out;
		}

		/* Convert back to CPU format. */
		endian_convert_mda_header(header);
	}

out:
	EngFncs->engine_free(header);
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * write_pv_label_and_header
 *
 * Write a new PV label and header to the start of this object.
 **/
int write_pv_label_and_header(storage_object_t *object, boolean backup)
{
	pv_data_t *pv_data = object->consuming_private_data;
	label_header_t *label = NULL;
	pv_header_t *header;
	disk_location_t *disk_area;
	metadata_location_t *location;
	list_element_t iter;
	u_int32_t crc;
	int rc;

	LOG_ENTRY();
	LOG_DEBUG("Writing PV label and header for object %s.\n", object->name);

	if (MISSING_PV(object)) {
		LOG_DEBUG("Skipping missing object %s.\n", object->name);
		rc = 0;
		goto out;
	}

	/* Allocate a buffer for writing the info to disk. */
	label = EngFncs->engine_alloc(EVMS_VSECTOR_SIZE);
	if (!label) {
		rc = ENOMEM;
		goto out;
	}

	/* Initialize the PV label. */
	memcpy(label->id, LVM2_LABEL_ID, sizeof(label->id));
	label->sector = pv_data->label_sector;
	label->offset = sizeof(*label);
	memcpy(label->type, LVM2_LABEL_TYPE, sizeof(label->type));

	/* Initialize the PV header. */
	header = (void*)label + label->offset;
	memcpy(header->pv_uuid, pv_data->uuid, sizeof(header->pv_uuid));
	header->device_size = object->size;
	location = EngFncs->first_thing(pv_data->metadata_areas, NULL);
	header->device_size -= location->mda_start;
	LIST_FOR_EACH(pv_data->metadata_areas, iter, location) {
		header->device_size -= location->mda_size;
	}
	header->device_size <<= EVMS_VSECTOR_SIZE_SHIFT;

	/* Fill in the disk_areas list. */
	disk_area = header->disk_areas;
	disk_area->offset = pv_data->pe_start << EVMS_VSECTOR_SIZE_SHIFT;

	/* Leave one entry blank. */
	disk_area++;
	disk_area++;

	LIST_FOR_EACH(pv_data->metadata_areas, iter, location) {
		disk_area->offset = location->mda_start << EVMS_VSECTOR_SIZE_SHIFT;
		disk_area->size = location->mda_size << EVMS_VSECTOR_SIZE_SHIFT;
		disk_area++;
	}

	/* Convert to disk format. */
	endian_convert_pv_label(label);
	endian_convert_pv_header(header);

	/* Calculate the CRC. */
	crc = lvm2_calc_crc(LVM2_INITIAL_CRC, &label->offset,
			    EVMS_VSECTOR_SIZE -
			    ((void*)&label->offset - (void*)label));
	label->crc = CPU_TO_DISK32(crc);

	/* Write the label/header to disk. */
	if (backup) {
		rc = EngFncs->save_metadata(object->consuming_container->name,
					    object->name,
					    pv_data->label_sector, 1, label);
	} else {
		rc = WRITE(object, pv_data->label_sector, 1, label);
	}
	if (rc) {
		goto out;
	}

out:
	EngFncs->engine_free(label);
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * erase_metadata
 *
 * Queue kill-sectors on all the metadata areas and the PV label.
 **/
void erase_metadata(storage_object_t *object)
{
	pv_data_t *pv_data = object->consuming_private_data;
	metadata_location_t *location;
	list_element_t iter;

	LOG_ENTRY();
	LOG_DEBUG("Erasing metadata from PV %s.\n", object->name);

	LIST_FOR_EACH(pv_data->metadata_areas, iter, location) {
		KILL_SECTORS(object, location->mda_start, location->mda_size);
	}

	KILL_SECTORS(object, pv_data->label_sector, 1);

	LOG_EXIT_VOID();
}

