/* * storage_backend_disk.c: storage backend for disk handling * * Copyright (C) 2007-2008 Red Hat, Inc. * Copyright (C) 2007-2008 Daniel P. Berrange * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * Author: Daniel P. Berrange */ #include #include #include #include #include "virterror_internal.h" #include "logging.h" #include "storage_backend_disk.h" #include "util.h" #include "memory.h" #define VIR_FROM_THIS VIR_FROM_STORAGE #define PARTHELPER BINDIR "/libvirt_parthelper" #define SECTOR_SIZE 512 static int virStorageBackendDiskMakeDataVol(virConnectPtr conn, virStoragePoolObjPtr pool, char **const groups, virStorageVolDefPtr vol) { char *tmp, *devpath; if (vol == NULL) { if (VIR_ALLOC(vol) < 0) { virReportOOMError(); return -1; } if (VIR_REALLOC_N(pool->volumes.objs, pool->volumes.count+1) < 0) { virReportOOMError(); virStorageVolDefFree(vol); return -1; } pool->volumes.objs[pool->volumes.count++] = vol; /* Prepended path will be same for all partitions, so we can * strip the path to form a reasonable pool-unique name */ tmp = strrchr(groups[0], '/'); if ((vol->name = strdup(tmp ? tmp + 1 : groups[0])) == NULL) { virReportOOMError(); return -1; } } if (vol->target.path == NULL) { if ((devpath = strdup(groups[0])) == NULL) { virReportOOMError(); return -1; } /* Now figure out the stable path * * XXX this method is O(N) because it scans the pool target * dir every time its run. Should figure out a more efficient * way of doing this... */ vol->target.path = virStorageBackendStablePath(pool, devpath); VIR_FREE(devpath); if (vol->target.path == NULL) return -1; } if (vol->key == NULL) { /* XXX base off a unique key of the underlying disk */ if ((vol->key = strdup(vol->target.path)) == NULL) { virReportOOMError(); return -1; } } if (vol->source.extents == NULL) { if (VIR_ALLOC(vol->source.extents) < 0) { virReportOOMError(); return -1; } vol->source.nextent = 1; if (virStrToLong_ull(groups[3], NULL, 10, &vol->source.extents[0].start) < 0) { virStorageReportError(conn, VIR_ERR_INTERNAL_ERROR, "%s", _("cannot parse device start location")); return -1; } if (virStrToLong_ull(groups[4], NULL, 10, &vol->source.extents[0].end) < 0) { virStorageReportError(conn, VIR_ERR_INTERNAL_ERROR, "%s", _("cannot parse device end location")); return -1; } if ((vol->source.extents[0].path = strdup(pool->def->source.devices[0].path)) == NULL) { virReportOOMError(); return -1; } } /* Refresh allocation/capacity/perms */ if (virStorageBackendUpdateVolInfo(vol, 1) < 0) return -1; /* set partition type */ if(STREQ(groups[1], "normal")) vol->target.type = VIR_STORAGE_VOL_DISK_TYPE_PRIMARY; else if(STREQ(groups[1], "logical")) vol->target.type = VIR_STORAGE_VOL_DISK_TYPE_LOGICAL; else if(STREQ(groups[1], "extended")) vol->target.type = VIR_STORAGE_VOL_DISK_TYPE_EXTENDED; else vol->target.type = VIR_STORAGE_VOL_DISK_TYPE_NONE; vol->type = VIR_STORAGE_VOL_BLOCK; /* The above gets allocation wrong for * extended partitions, so overwrite it */ vol->allocation = vol->capacity = (vol->source.extents[0].end - vol->source.extents[0].start); if (STRNEQ(groups[2], "metadata")) pool->def->allocation += vol->allocation; if (vol->source.extents[0].end > pool->def->capacity) pool->def->capacity = vol->source.extents[0].end; return 0; } static int virStorageBackendDiskMakeFreeExtent(virConnectPtr conn ATTRIBUTE_UNUSED, virStoragePoolObjPtr pool, char **const groups) { virStoragePoolSourceDevicePtr dev = &pool->def->source.devices[0]; if (VIR_REALLOC_N(dev->freeExtents, dev->nfreeExtent + 1) < 0) return -1; memset(dev->freeExtents + dev->nfreeExtent, 0, sizeof(dev->freeExtents[0])); /* set type of free area */ if(STREQ(groups[1], "logical")) { dev->freeExtents[dev->nfreeExtent].type = VIR_STORAGE_FREE_LOGICAL; } else { dev->freeExtents[dev->nfreeExtent].type = VIR_STORAGE_FREE_NORMAL; } if (virStrToLong_ull(groups[3], NULL, 10, &dev->freeExtents[dev->nfreeExtent].start) < 0) return -1; /* Don't bother to re-alloc freeExtents - it'll be free'd shortly */ if (virStrToLong_ull(groups[4], NULL, 10, &dev->freeExtents[dev->nfreeExtent].end) < 0) return -1; /* Don't bother to re-alloc freeExtents - it'll be free'd shortly */ /* first block reported as free, even if it is not */ if (dev->freeExtents[dev->nfreeExtent].start == 0) { dev->freeExtents[dev->nfreeExtent].start = SECTOR_SIZE; } pool->def->available += (dev->freeExtents[dev->nfreeExtent].end - dev->freeExtents[dev->nfreeExtent].start); if (dev->freeExtents[dev->nfreeExtent].end > pool->def->capacity) pool->def->capacity = dev->freeExtents[dev->nfreeExtent].end; dev->nfreeExtent++; return 0; } static int virStorageBackendDiskMakeVol(virConnectPtr conn, virStoragePoolObjPtr pool, size_t ntok ATTRIBUTE_UNUSED, char **const groups, void *data) { /* * Ignore normal+metadata, and logical+metadata partitions * since they're basically internal book-keeping regions * we have no control over. Do keep extended+metadata though * because that's the MS-DOS extended partition region we * need to be able to view/create/delete */ if ((STREQ(groups[1], "normal") || STREQ(groups[1], "logical")) && STREQ(groups[2], "metadata")) return 0; /* Remaining data / metadata parts get turn into volumes... */ if (STREQ(groups[2], "metadata") || STREQ(groups[2], "data")) { virStorageVolDefPtr vol = data; if (vol) { /* We're searching for a specific vol only */ if (vol->key) { if (STRNEQ(vol->key, groups[0])) return 0; } else if (virStorageVolDefFindByKey(pool, groups[0]) != NULL) { /* If no key, the volume must be newly created. If groups[0] * isn't already a volume, assume it's the path we want */ return 0; } } return virStorageBackendDiskMakeDataVol(conn, pool, groups, vol); } else if (STREQ(groups[2], "free")) { /* ....or free space extents */ return virStorageBackendDiskMakeFreeExtent(conn, pool, groups); } else { /* This code path should never happen unless someone changed * libvirt_parthelper forgot to change this code */ return -1; } } /* To get a list of partitions we run an external helper * tool which then uses parted APIs. This is because * parted's API is not compatible with libvirt's license * but we really really want to use parted because the * other options all suck :-) * * All the other storage backends run an external tool for * listing volumes so this really isn't too much of a pain, * and we can even ensure the output is friendly. */ static int virStorageBackendDiskReadPartitions(virConnectPtr conn, virStoragePoolObjPtr pool, virStorageVolDefPtr vol) { /* * # libvirt_parthelper DEVICE * /dev/sda1 normal data 32256 106928128 106896384 * /dev/sda2 normal data 106928640 100027629568 99920701440 * - normal metadata 100027630080 100030242304 2612736 * */ const char *prog[] = { PARTHELPER, pool->def->source.devices[0].path, NULL, }; pool->def->allocation = pool->def->capacity = pool->def->available = 0; return virStorageBackendRunProgNul(conn, pool, prog, 6, virStorageBackendDiskMakeVol, vol); } static int virStorageBackendDiskMakePoolGeometry(virConnectPtr conn ATTRIBUTE_UNUSED, virStoragePoolObjPtr pool, size_t ntok ATTRIBUTE_UNUSED, char **const groups, void *data ATTRIBUTE_UNUSED) { pool->def->source.devices[0].geometry.cyliders = atoi(groups[0]); pool->def->source.devices[0].geometry.heads = atoi(groups[1]); pool->def->source.devices[0].geometry.sectors = atoi(groups[2]); return 0; } static int virStorageBackendDiskReadGeometry(virConnectPtr conn, virStoragePoolObjPtr pool) { const char *prog[] = { PARTHELPER, pool->def->source.devices[0].path, "-g", NULL, }; return virStorageBackendRunProgNul(conn, pool, prog, 3, virStorageBackendDiskMakePoolGeometry, NULL); } static int virStorageBackendDiskRefreshPool(virConnectPtr conn, virStoragePoolObjPtr pool) { VIR_FREE(pool->def->source.devices[0].freeExtents); pool->def->source.devices[0].nfreeExtent = 0; virFileWaitForDevices(); if (virStorageBackendDiskReadGeometry(conn, pool) != 0) { return -1; } return virStorageBackendDiskReadPartitions(conn, pool, NULL); } /** * Write a new partition table header */ static int virStorageBackendDiskBuildPool(virConnectPtr conn ATTRIBUTE_UNUSED, virStoragePoolObjPtr pool, unsigned int flags ATTRIBUTE_UNUSED) { /* eg parted /dev/sda mklabel msdos */ const char *prog[] = { PARTED, pool->def->source.devices[0].path, "mklabel", "--script", ((pool->def->source.format == VIR_STORAGE_POOL_DISK_DOS) ? "msdos" : virStoragePoolFormatDiskTypeToString(pool->def->source.format)), NULL, }; if (virRun(prog, NULL) < 0) return -1; return 0; } /** * Decides what kind of partition type that should be created. * Important when the partition table is of msdos type */ static int virStorageBackendDiskPartTypeToCreate(virStoragePoolObjPtr pool) { if (pool->def->source.format == VIR_STORAGE_POOL_DISK_DOS) { /* count primary and extended paritions, can't be more than 3 to create a new primary partition */ int i; int count = 0; for (i = 0; i < pool->volumes.count; i++) { if (pool->volumes.objs[i]->target.type == VIR_STORAGE_VOL_DISK_TYPE_PRIMARY || pool->volumes.objs[i]->target.type == VIR_STORAGE_VOL_DISK_TYPE_EXTENDED) { count++; } } if (count >= 4) { return VIR_STORAGE_VOL_DISK_TYPE_LOGICAL; } } /* for all other cases, all partitions are primary */ return VIR_STORAGE_VOL_DISK_TYPE_PRIMARY; } static int virStorageBackendDiskPartFormat(virConnectPtr conn, virStoragePoolObjPtr pool, virStorageVolDefPtr vol, char* partFormat) { int i; if (pool->def->source.format == VIR_STORAGE_POOL_DISK_DOS) { const char *partedFormat = virStoragePartedFsTypeTypeToString(vol->target.format); if(partedFormat == NULL) { virStorageReportError(conn, VIR_ERR_INTERNAL_ERROR, "%s", _("Invalid partition type")); return -1; } if (vol->target.format == VIR_STORAGE_VOL_DISK_EXTENDED) { /* make sure we don't have a extended partition already */ for (i = 0; i < pool->volumes.count; i++) { if (pool->volumes.objs[i]->target.format == VIR_STORAGE_VOL_DISK_EXTENDED) { virStorageReportError(conn, VIR_ERR_INTERNAL_ERROR, "%s", _("extended partition already exists")); return -1; } } sprintf(partFormat, "%s", partedFormat); } else { /* create primary partition as long as it is possible and after that check if an extended partition exists to create logical partitions. */ /* XXX Only support one extended partition */ switch (virStorageBackendDiskPartTypeToCreate(pool)) { case VIR_STORAGE_VOL_DISK_TYPE_PRIMARY: sprintf(partFormat, "primary %s", partedFormat); break; case VIR_STORAGE_VOL_DISK_TYPE_LOGICAL: /* make sure we have a extended partition */ for (i = 0; i < pool->volumes.count; i++) { if (pool->volumes.objs[i]->target.format == VIR_STORAGE_VOL_DISK_EXTENDED) { sprintf(partFormat, "logical %s", partedFormat); break; } } if (i == pool->volumes.count) { virStorageReportError(conn, VIR_ERR_INTERNAL_ERROR, "%s", _("no extended partition found and no primary partition available")); return -1; } break; default: break; } } } else { sprintf(partFormat, "primary"); } return 0; } /** * Aligns a new partition to nearest cylinder boundry * when haveing a msdos partition table type * to avoid any problem with all ready existing * partitions */ static int virStorageBackendDiskPartBoundries(virConnectPtr conn, virStoragePoolObjPtr pool, unsigned long long *start, unsigned long long *end, unsigned long long allocation) { int i; int smallestExtent = -1; unsigned long long smallestSize = 0; unsigned long long extraBytes = 0; unsigned long long alignedAllocation = allocation; virStoragePoolSourceDevicePtr dev = &pool->def->source.devices[0]; unsigned long long cylinderSize = dev->geometry.heads * dev->geometry.sectors * SECTOR_SIZE; DEBUG("find free area: allocation %llu, cyl size %llu\n", allocation, cylinderSize); int partType = virStorageBackendDiskPartTypeToCreate(pool); /* how many extra bytes we have since we allocate aligned to the cylinder boundry */ extraBytes = cylinderSize - (allocation % cylinderSize); for (i = 0 ; i < dev->nfreeExtent ; i++) { unsigned long long size = dev->freeExtents[i].end - dev->freeExtents[i].start; unsigned long long neededSize = allocation; if (pool->def->source.format == VIR_STORAGE_POOL_DISK_DOS) { /* align to cylinder boundry */ neededSize += extraBytes; if ((*start % cylinderSize) > extraBytes) { /* add an extra cylinder if the offset can't fit within the extra bytes we have */ neededSize += cylinderSize; } /* if we are creating a logical patition, we need one extra block between partitions (or actually move start one block) */ if (partType == VIR_STORAGE_VOL_DISK_TYPE_LOGICAL) { size -= SECTOR_SIZE; } } if (size > neededSize && (smallestSize == 0 || size < smallestSize)) { /* for logical partition, the free extent must be within a logical free area */ if (partType == VIR_STORAGE_VOL_DISK_TYPE_LOGICAL && dev->freeExtents[i].type != VIR_STORAGE_FREE_LOGICAL) { continue; /* for primary partition, the free extent must not be within a logical free area */ } else if(partType == VIR_STORAGE_VOL_DISK_TYPE_PRIMARY && dev->freeExtents[i].type != VIR_STORAGE_FREE_NORMAL) { continue; } smallestSize = size; smallestExtent = i; alignedAllocation = neededSize; } } if (smallestExtent == -1) { virStorageReportError(conn, VIR_ERR_INTERNAL_ERROR, "%s", _("no large enough free extent")); return -1; } DEBUG("aligned alloc %llu\n", alignedAllocation); *start = dev->freeExtents[smallestExtent].start; if (partType == VIR_STORAGE_VOL_DISK_TYPE_LOGICAL) { /* for logical partition, skip one block */ *start += SECTOR_SIZE; } *end = *start + alignedAllocation; if (pool->def->source.format == VIR_STORAGE_POOL_DISK_DOS) { /* adjust our allocation if start is not at a cylinder boundry */ *end -= (*start % cylinderSize); } /* counting in byte, we want the last byte of the current sector */ *end -= 1; DEBUG("final aligned start %llu, end %llu\n", *start, *end); return 0; } static int virStorageBackendDiskCreateVol(virConnectPtr conn, virStoragePoolObjPtr pool, virStorageVolDefPtr vol) { char start[100], end[100], partFormat[100]; unsigned long long startOffset = 0, endOffset = 0; const char *cmdargv[] = { PARTED, pool->def->source.devices[0].path, "mkpart", "--script", partFormat, start, end, NULL }; if (vol->target.encryption != NULL) { virStorageReportError(conn, VIR_ERR_NO_SUPPORT, "%s", _("storage pool does not support encrypted " "volumes")); return -1; } if (virStorageBackendDiskPartFormat(conn, pool, vol, partFormat) != 0) { return -1; } if (virStorageBackendDiskPartBoundries(conn, pool, &startOffset, &endOffset, vol->capacity) != 0) { return -1; } snprintf(start, sizeof(start)-1, "%lluB", startOffset); start[sizeof(start)-1] = '\0'; snprintf(end, sizeof(end)-1, "%lluB", endOffset); end[sizeof(end)-1] = '\0'; if (virRun(cmdargv, NULL) < 0) return -1; /* wait for device node to show up */ virFileWaitForDevices(); /* Blow away free extent info, as we're about to re-populate it */ VIR_FREE(pool->def->source.devices[0].freeExtents); pool->def->source.devices[0].nfreeExtent = 0; /* Specifying a target path is meaningless */ VIR_FREE(vol->target.path); /* Fetch actual extent info, generate key */ if (virStorageBackendDiskReadPartitions(conn, pool, vol) < 0) return -1; return 0; } static int virStorageBackendDiskBuildVolFrom(virConnectPtr conn, virStoragePoolObjPtr pool, virStorageVolDefPtr vol, virStorageVolDefPtr inputvol, unsigned int flags) { virStorageBackendBuildVolFrom build_func; build_func = virStorageBackendGetBuildVolFromFunction(conn, vol, inputvol); if (!build_func) return -1; return build_func(conn, pool, vol, inputvol, flags); } static int virStorageBackendDiskDeleteVol(virConnectPtr conn, virStoragePoolObjPtr pool, virStorageVolDefPtr vol, unsigned int flags ATTRIBUTE_UNUSED) { char *part_num = NULL; int err; char *devpath = NULL; char *devname, *srcname; int rc = -1; if ((err = virFileResolveLink(vol->target.path, &devpath)) < 0) { virReportSystemError(err, _("Couldn't read volume target path '%s'"), vol->target.path); goto cleanup; } devname = basename(devpath); srcname = basename(pool->def->source.devices[0].path); DEBUG("devname=%s, srcname=%s", devname, srcname); if (!STRPREFIX(devname, srcname)) { virStorageReportError(conn, VIR_ERR_INTERNAL_ERROR, _("Volume path '%s' did not start with parent " "pool source device name."), devname); goto cleanup; } part_num = devname + strlen(srcname); if (*part_num == 0) { virStorageReportError(conn, VIR_ERR_INTERNAL_ERROR, _("cannot parse partition number from target " "'%s'"), devname); goto cleanup; } /* eg parted /dev/sda rm 2 */ const char *prog[] = { PARTED, pool->def->source.devices[0].path, "rm", "--script", part_num, NULL, }; if (virRun(prog, NULL) < 0) goto cleanup; rc = 0; cleanup: VIR_FREE(devpath); return rc; } virStorageBackend virStorageBackendDisk = { .type = VIR_STORAGE_POOL_DISK, .buildPool = virStorageBackendDiskBuildPool, .refreshPool = virStorageBackendDiskRefreshPool, .createVol = virStorageBackendDiskCreateVol, .deleteVol = virStorageBackendDiskDeleteVol, .buildVolFrom = virStorageBackendDiskBuildVolFrom, };