libvirt/src/storage/storage_backend_disk.c
2010-02-09 01:04:54 +01:00

685 lines
23 KiB
C

/*
* storage_backend_disk.c: storage backend for disk handling
*
* Copyright (C) 2007-2008 Red Hat, Inc.
* Copyright (C) 2007-2008 Daniel P. Berrange
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Author: Daniel P. Berrange <berrange@redhat.com>
*/
#include <config.h>
#include <string.h>
#include <unistd.h>
#include <stdio.h>
#include "virterror_internal.h"
#include "logging.h"
#include "storage_backend_disk.h"
#include "util.h"
#include "memory.h"
#define VIR_FROM_THIS VIR_FROM_STORAGE
#define PARTHELPER BINDIR "/libvirt_parthelper"
#define SECTOR_SIZE 512
static int
virStorageBackendDiskMakeDataVol(virConnectPtr conn,
virStoragePoolObjPtr pool,
char **const groups,
virStorageVolDefPtr vol)
{
char *tmp, *devpath;
if (vol == NULL) {
if (VIR_ALLOC(vol) < 0) {
virReportOOMError();
return -1;
}
if (VIR_REALLOC_N(pool->volumes.objs,
pool->volumes.count+1) < 0) {
virReportOOMError();
virStorageVolDefFree(vol);
return -1;
}
pool->volumes.objs[pool->volumes.count++] = vol;
/* Prepended path will be same for all partitions, so we can
* strip the path to form a reasonable pool-unique name
*/
tmp = strrchr(groups[0], '/');
if ((vol->name = strdup(tmp ? tmp + 1 : groups[0])) == NULL) {
virReportOOMError();
return -1;
}
}
if (vol->target.path == NULL) {
if ((devpath = strdup(groups[0])) == NULL) {
virReportOOMError();
return -1;
}
/* Now figure out the stable path
*
* XXX this method is O(N) because it scans the pool target
* dir every time its run. Should figure out a more efficient
* way of doing this...
*/
vol->target.path = virStorageBackendStablePath(pool, devpath);
VIR_FREE(devpath);
if (vol->target.path == NULL)
return -1;
}
if (vol->key == NULL) {
/* XXX base off a unique key of the underlying disk */
if ((vol->key = strdup(vol->target.path)) == NULL) {
virReportOOMError();
return -1;
}
}
if (vol->source.extents == NULL) {
if (VIR_ALLOC(vol->source.extents) < 0) {
virReportOOMError();
return -1;
}
vol->source.nextent = 1;
if (virStrToLong_ull(groups[3], NULL, 10,
&vol->source.extents[0].start) < 0) {
virStorageReportError(conn, VIR_ERR_INTERNAL_ERROR,
"%s", _("cannot parse device start location"));
return -1;
}
if (virStrToLong_ull(groups[4], NULL, 10,
&vol->source.extents[0].end) < 0) {
virStorageReportError(conn, VIR_ERR_INTERNAL_ERROR,
"%s", _("cannot parse device end location"));
return -1;
}
if ((vol->source.extents[0].path =
strdup(pool->def->source.devices[0].path)) == NULL) {
virReportOOMError();
return -1;
}
}
/* Refresh allocation/capacity/perms */
if (virStorageBackendUpdateVolInfo(vol, 1) < 0)
return -1;
/* set partition type */
if(STREQ(groups[1], "normal"))
vol->target.type = VIR_STORAGE_VOL_DISK_TYPE_PRIMARY;
else if(STREQ(groups[1], "logical"))
vol->target.type = VIR_STORAGE_VOL_DISK_TYPE_LOGICAL;
else if(STREQ(groups[1], "extended"))
vol->target.type = VIR_STORAGE_VOL_DISK_TYPE_EXTENDED;
else
vol->target.type = VIR_STORAGE_VOL_DISK_TYPE_NONE;
vol->type = VIR_STORAGE_VOL_BLOCK;
/* The above gets allocation wrong for
* extended partitions, so overwrite it */
vol->allocation = vol->capacity =
(vol->source.extents[0].end - vol->source.extents[0].start);
if (STRNEQ(groups[2], "metadata"))
pool->def->allocation += vol->allocation;
if (vol->source.extents[0].end > pool->def->capacity)
pool->def->capacity = vol->source.extents[0].end;
return 0;
}
static int
virStorageBackendDiskMakeFreeExtent(virConnectPtr conn ATTRIBUTE_UNUSED,
virStoragePoolObjPtr pool,
char **const groups)
{
virStoragePoolSourceDevicePtr dev = &pool->def->source.devices[0];
if (VIR_REALLOC_N(dev->freeExtents,
dev->nfreeExtent + 1) < 0)
return -1;
memset(dev->freeExtents +
dev->nfreeExtent, 0,
sizeof(dev->freeExtents[0]));
/* set type of free area */
if(STREQ(groups[1], "logical")) {
dev->freeExtents[dev->nfreeExtent].type = VIR_STORAGE_FREE_LOGICAL;
} else {
dev->freeExtents[dev->nfreeExtent].type = VIR_STORAGE_FREE_NORMAL;
}
if (virStrToLong_ull(groups[3], NULL, 10,
&dev->freeExtents[dev->nfreeExtent].start) < 0)
return -1; /* Don't bother to re-alloc freeExtents - it'll be free'd shortly */
if (virStrToLong_ull(groups[4], NULL, 10,
&dev->freeExtents[dev->nfreeExtent].end) < 0)
return -1; /* Don't bother to re-alloc freeExtents - it'll be free'd shortly */
/* first block reported as free, even if it is not */
if (dev->freeExtents[dev->nfreeExtent].start == 0) {
dev->freeExtents[dev->nfreeExtent].start = SECTOR_SIZE;
}
pool->def->available +=
(dev->freeExtents[dev->nfreeExtent].end -
dev->freeExtents[dev->nfreeExtent].start);
if (dev->freeExtents[dev->nfreeExtent].end > pool->def->capacity)
pool->def->capacity = dev->freeExtents[dev->nfreeExtent].end;
dev->nfreeExtent++;
return 0;
}
static int
virStorageBackendDiskMakeVol(virConnectPtr conn,
virStoragePoolObjPtr pool,
size_t ntok ATTRIBUTE_UNUSED,
char **const groups,
void *data)
{
/*
* Ignore normal+metadata, and logical+metadata partitions
* since they're basically internal book-keeping regions
* we have no control over. Do keep extended+metadata though
* because that's the MS-DOS extended partition region we
* need to be able to view/create/delete
*/
if ((STREQ(groups[1], "normal") ||
STREQ(groups[1], "logical")) &&
STREQ(groups[2], "metadata"))
return 0;
/* Remaining data / metadata parts get turn into volumes... */
if (STREQ(groups[2], "metadata") ||
STREQ(groups[2], "data")) {
virStorageVolDefPtr vol = data;
if (vol) {
/* We're searching for a specific vol only */
if (vol->key) {
if (STRNEQ(vol->key, groups[0]))
return 0;
} else if (virStorageVolDefFindByKey(pool, groups[0]) != NULL) {
/* If no key, the volume must be newly created. If groups[0]
* isn't already a volume, assume it's the path we want */
return 0;
}
}
return virStorageBackendDiskMakeDataVol(conn, pool, groups, vol);
} else if (STREQ(groups[2], "free")) {
/* ....or free space extents */
return virStorageBackendDiskMakeFreeExtent(conn, pool, groups);
} else {
/* This code path should never happen unless someone changed
* libvirt_parthelper forgot to change this code */
return -1;
}
}
/* To get a list of partitions we run an external helper
* tool which then uses parted APIs. This is because
* parted's API is not compatible with libvirt's license
* but we really really want to use parted because the
* other options all suck :-)
*
* All the other storage backends run an external tool for
* listing volumes so this really isn't too much of a pain,
* and we can even ensure the output is friendly.
*/
static int
virStorageBackendDiskReadPartitions(virConnectPtr conn,
virStoragePoolObjPtr pool,
virStorageVolDefPtr vol)
{
/*
* # libvirt_parthelper DEVICE
* /dev/sda1 normal data 32256 106928128 106896384
* /dev/sda2 normal data 106928640 100027629568 99920701440
* - normal metadata 100027630080 100030242304 2612736
*
*/
const char *prog[] = {
PARTHELPER, pool->def->source.devices[0].path, NULL,
};
pool->def->allocation = pool->def->capacity = pool->def->available = 0;
return virStorageBackendRunProgNul(conn,
pool,
prog,
6,
virStorageBackendDiskMakeVol,
vol);
}
static int
virStorageBackendDiskMakePoolGeometry(virConnectPtr conn ATTRIBUTE_UNUSED,
virStoragePoolObjPtr pool,
size_t ntok ATTRIBUTE_UNUSED,
char **const groups,
void *data ATTRIBUTE_UNUSED)
{
pool->def->source.devices[0].geometry.cyliders = atoi(groups[0]);
pool->def->source.devices[0].geometry.heads = atoi(groups[1]);
pool->def->source.devices[0].geometry.sectors = atoi(groups[2]);
return 0;
}
static int
virStorageBackendDiskReadGeometry(virConnectPtr conn, virStoragePoolObjPtr pool)
{
const char *prog[] = {
PARTHELPER, pool->def->source.devices[0].path, "-g", NULL,
};
return virStorageBackendRunProgNul(conn,
pool,
prog,
3,
virStorageBackendDiskMakePoolGeometry,
NULL);
}
static int
virStorageBackendDiskRefreshPool(virConnectPtr conn,
virStoragePoolObjPtr pool)
{
VIR_FREE(pool->def->source.devices[0].freeExtents);
pool->def->source.devices[0].nfreeExtent = 0;
virFileWaitForDevices(conn);
if (virStorageBackendDiskReadGeometry(conn, pool) != 0) {
return -1;
}
return virStorageBackendDiskReadPartitions(conn, pool, NULL);
}
/**
* Write a new partition table header
*/
static int
virStorageBackendDiskBuildPool(virConnectPtr conn,
virStoragePoolObjPtr pool,
unsigned int flags ATTRIBUTE_UNUSED)
{
/* eg parted /dev/sda mklabel msdos */
const char *prog[] = {
PARTED,
pool->def->source.devices[0].path,
"mklabel",
"--script",
((pool->def->source.format == VIR_STORAGE_POOL_DISK_DOS) ? "msdos" :
virStoragePoolFormatDiskTypeToString(pool->def->source.format)),
NULL,
};
if (virRun(conn, prog, NULL) < 0)
return -1;
return 0;
}
/**
* Decides what kind of partition type that should be created.
* Important when the partition table is of msdos type
*/
static int
virStorageBackendDiskPartTypeToCreate(virStoragePoolObjPtr pool)
{
if (pool->def->source.format == VIR_STORAGE_POOL_DISK_DOS) {
/* count primary and extended paritions,
can't be more than 3 to create a new primary partition */
int i;
int count = 0;
for (i = 0; i < pool->volumes.count; i++) {
if (pool->volumes.objs[i]->target.type == VIR_STORAGE_VOL_DISK_TYPE_PRIMARY ||
pool->volumes.objs[i]->target.type == VIR_STORAGE_VOL_DISK_TYPE_EXTENDED) {
count++;
}
}
if (count >= 4) {
return VIR_STORAGE_VOL_DISK_TYPE_LOGICAL;
}
}
/* for all other cases, all partitions are primary */
return VIR_STORAGE_VOL_DISK_TYPE_PRIMARY;
}
static int
virStorageBackendDiskPartFormat(virConnectPtr conn, virStoragePoolObjPtr pool,
virStorageVolDefPtr vol,
char* partFormat)
{
int i;
if (pool->def->source.format == VIR_STORAGE_POOL_DISK_DOS) {
const char *partedFormat = virStoragePartedFsTypeTypeToString(vol->target.format);
if(partedFormat == NULL) {
virStorageReportError(conn, VIR_ERR_INTERNAL_ERROR,
"%s", _("Invalid partition type"));
return -1;
}
if (vol->target.format == VIR_STORAGE_VOL_DISK_EXTENDED) {
/* make sure we don't have a extended partition already */
for (i = 0; i < pool->volumes.count; i++) {
if (pool->volumes.objs[i]->target.format == VIR_STORAGE_VOL_DISK_EXTENDED) {
virStorageReportError(conn, VIR_ERR_INTERNAL_ERROR,
"%s", _("extended partition already exists"));
return -1;
}
}
sprintf(partFormat, "%s", partedFormat);
} else {
/* create primary partition as long as it is possible
and after that check if an extended partition exists
to create logical partitions. */
/* XXX Only support one extended partition */
switch (virStorageBackendDiskPartTypeToCreate(pool)) {
case VIR_STORAGE_VOL_DISK_TYPE_PRIMARY:
sprintf(partFormat, "primary %s", partedFormat);
break;
case VIR_STORAGE_VOL_DISK_TYPE_LOGICAL:
/* make sure we have a extended partition */
for (i = 0; i < pool->volumes.count; i++) {
if (pool->volumes.objs[i]->target.format == VIR_STORAGE_VOL_DISK_EXTENDED) {
sprintf(partFormat, "logical %s", partedFormat);
break;
}
}
if (i == pool->volumes.count) {
virStorageReportError(conn, VIR_ERR_INTERNAL_ERROR,
"%s", _("no extended partition found and no primary partition available"));
return -1;
}
break;
default:
break;
}
}
} else {
sprintf(partFormat, "primary");
}
return 0;
}
/**
* Aligns a new partition to nearest cylinder boundry
* when haveing a msdos partition table type
* to avoid any problem with all ready existing
* partitions
*/
static int
virStorageBackendDiskPartBoundries(virConnectPtr conn,
virStoragePoolObjPtr pool,
unsigned long long *start,
unsigned long long *end,
unsigned long long allocation)
{
int i;
int smallestExtent = -1;
unsigned long long smallestSize = 0;
unsigned long long extraBytes = 0;
unsigned long long alignedAllocation = allocation;
virStoragePoolSourceDevicePtr dev = &pool->def->source.devices[0];
unsigned long long cylinderSize = dev->geometry.heads *
dev->geometry.sectors * SECTOR_SIZE;
DEBUG("find free area: allocation %llu, cyl size %llu\n", allocation, cylinderSize);
int partType = virStorageBackendDiskPartTypeToCreate(pool);
/* how many extra bytes we have since we allocate
aligned to the cylinder boundry */
extraBytes = cylinderSize - (allocation % cylinderSize);
for (i = 0 ; i < dev->nfreeExtent ; i++) {
unsigned long long size =
dev->freeExtents[i].end -
dev->freeExtents[i].start;
unsigned long long neededSize = allocation;
if (pool->def->source.format == VIR_STORAGE_POOL_DISK_DOS) {
/* align to cylinder boundry */
neededSize += extraBytes;
if ((*start % cylinderSize) > extraBytes) {
/* add an extra cylinder if the offset can't fit within
the extra bytes we have */
neededSize += cylinderSize;
}
/* if we are creating a logical patition, we need one extra
block between partitions (or actually move start one block) */
if (partType == VIR_STORAGE_VOL_DISK_TYPE_LOGICAL) {
size -= SECTOR_SIZE;
}
}
if (size > neededSize &&
(smallestSize == 0 ||
size < smallestSize)) {
/* for logical partition, the free extent
must be within a logical free area */
if (partType == VIR_STORAGE_VOL_DISK_TYPE_LOGICAL &&
dev->freeExtents[i].type != VIR_STORAGE_FREE_LOGICAL) {
continue;
/* for primary partition, the free extent
must not be within a logical free area */
} else if(partType == VIR_STORAGE_VOL_DISK_TYPE_PRIMARY &&
dev->freeExtents[i].type != VIR_STORAGE_FREE_NORMAL) {
continue;
}
smallestSize = size;
smallestExtent = i;
alignedAllocation = neededSize;
}
}
if (smallestExtent == -1) {
virStorageReportError(conn, VIR_ERR_INTERNAL_ERROR,
"%s", _("no large enough free extent"));
return -1;
}
DEBUG("aligned alloc %llu\n", alignedAllocation);
*start = dev->freeExtents[smallestExtent].start;
if (partType == VIR_STORAGE_VOL_DISK_TYPE_LOGICAL) {
/* for logical partition, skip one block */
*start += SECTOR_SIZE;
}
*end = *start + alignedAllocation;
if (pool->def->source.format == VIR_STORAGE_POOL_DISK_DOS) {
/* adjust our allocation if start is not at a cylinder boundry */
*end -= (*start % cylinderSize);
}
/* counting in byte, we want the last byte of the current sector */
*end -= 1;
DEBUG("final aligned start %llu, end %llu\n", *start, *end);
return 0;
}
static int
virStorageBackendDiskCreateVol(virConnectPtr conn,
virStoragePoolObjPtr pool,
virStorageVolDefPtr vol)
{
char start[100], end[100], partFormat[100];
unsigned long long startOffset = 0, endOffset = 0;
const char *cmdargv[] = {
PARTED,
pool->def->source.devices[0].path,
"mkpart",
"--script",
partFormat,
start,
end,
NULL
};
if (vol->target.encryption != NULL) {
virStorageReportError(conn, VIR_ERR_NO_SUPPORT,
"%s", _("storage pool does not support encrypted "
"volumes"));
return -1;
}
if (virStorageBackendDiskPartFormat(conn, pool, vol, partFormat) != 0) {
return -1;
}
if (virStorageBackendDiskPartBoundries(conn, pool, &startOffset,
&endOffset,
vol->capacity) != 0) {
return -1;
}
snprintf(start, sizeof(start)-1, "%lluB", startOffset);
start[sizeof(start)-1] = '\0';
snprintf(end, sizeof(end)-1, "%lluB", endOffset);
end[sizeof(end)-1] = '\0';
if (virRun(conn, cmdargv, NULL) < 0)
return -1;
/* wait for device node to show up */
virFileWaitForDevices(conn);
/* Blow away free extent info, as we're about to re-populate it */
VIR_FREE(pool->def->source.devices[0].freeExtents);
pool->def->source.devices[0].nfreeExtent = 0;
/* Specifying a target path is meaningless */
VIR_FREE(vol->target.path);
/* Fetch actual extent info, generate key */
if (virStorageBackendDiskReadPartitions(conn, pool, vol) < 0)
return -1;
return 0;
}
static int
virStorageBackendDiskBuildVolFrom(virConnectPtr conn,
virStoragePoolObjPtr pool,
virStorageVolDefPtr vol,
virStorageVolDefPtr inputvol,
unsigned int flags)
{
virStorageBackendBuildVolFrom build_func;
build_func = virStorageBackendGetBuildVolFromFunction(conn, vol, inputvol);
if (!build_func)
return -1;
return build_func(conn, pool, vol, inputvol, flags);
}
static int
virStorageBackendDiskDeleteVol(virConnectPtr conn,
virStoragePoolObjPtr pool,
virStorageVolDefPtr vol,
unsigned int flags ATTRIBUTE_UNUSED)
{
char *part_num = NULL;
int err;
char *devpath = NULL;
char *devname, *srcname;
int rc = -1;
if ((err = virFileResolveLink(vol->target.path, &devpath)) < 0) {
virReportSystemError(err,
_("Couldn't read volume target path '%s'"),
vol->target.path);
goto cleanup;
}
devname = basename(devpath);
srcname = basename(pool->def->source.devices[0].path);
DEBUG("devname=%s, srcname=%s", devname, srcname);
if (!STRPREFIX(devname, srcname)) {
virStorageReportError(conn, VIR_ERR_INTERNAL_ERROR,
_("Volume path '%s' did not start with parent "
"pool source device name."), devname);
goto cleanup;
}
part_num = devname + strlen(srcname);
if (*part_num == 0) {
virStorageReportError(conn, VIR_ERR_INTERNAL_ERROR,
_("cannot parse partition number from target "
"'%s'"), devname);
goto cleanup;
}
/* eg parted /dev/sda rm 2 */
const char *prog[] = {
PARTED,
pool->def->source.devices[0].path,
"rm",
"--script",
part_num,
NULL,
};
if (virRun(conn, prog, NULL) < 0)
goto cleanup;
rc = 0;
cleanup:
VIR_FREE(devpath);
return rc;
}
virStorageBackend virStorageBackendDisk = {
.type = VIR_STORAGE_POOL_DISK,
.buildPool = virStorageBackendDiskBuildPool,
.refreshPool = virStorageBackendDiskRefreshPool,
.createVol = virStorageBackendDiskCreateVol,
.deleteVol = virStorageBackendDiskDeleteVol,
.buildVolFrom = virStorageBackendDiskBuildVolFrom,
};