/* * storage_backend_gluster.c: storage backend for Gluster handling * * Copyright (C) 2013-2014 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see * . * */ #include #include #include "storage_backend_gluster.h" #include "storage_conf.h" #include "viralloc.h" #include "virerror.h" #include "virlog.h" #include "virstoragefile.h" #include "virstring.h" #include "viruri.h" #include "storage_util.h" #define VIR_FROM_THIS VIR_FROM_STORAGE VIR_LOG_INIT("storage.storage_backend_gluster"); struct _virStorageBackendGlusterState { glfs_t *vol; /* Accept the same URIs as qemu's block/gluster.c: * gluster[+transport]://[server[:port]]/vol/[dir/]image[?socket=...] */ virURI *uri; char *volname; /* vol from URI, no '/' */ char *dir; /* dir from URI, or "/"; always starts and ends in '/' */ }; typedef struct _virStorageBackendGlusterState virStorageBackendGlusterState; typedef virStorageBackendGlusterState *virStorageBackendGlusterStatePtr; static void virStorageBackendGlusterClose(virStorageBackendGlusterStatePtr state) { if (!state) return; /* Yuck - glusterfs-api-3.4.1 appears to always return -1 for * glfs_fini, with errno containing random data, so there's no way * to tell if it succeeded. 3.4.2 is supposed to fix this.*/ if (state->vol && glfs_fini(state->vol) < 0) VIR_DEBUG("shutdown of gluster volume %s failed with errno %d", state->volname, errno); virURIFree(state->uri); VIR_FREE(state->volname); VIR_FREE(state->dir); VIR_FREE(state); } static virStorageBackendGlusterStatePtr virStorageBackendGlusterOpen(virStoragePoolObjPtr pool) { virStorageBackendGlusterStatePtr ret = NULL; const char *name = pool->def->source.name; const char *dir = pool->def->source.dir; bool trailing_slash = true; /* Volume name must not contain '/'; optional path allows use of a * subdirectory within the volume name. */ if (strchr(name, '/')) { virReportError(VIR_ERR_XML_ERROR, _("gluster pool name '%s' must not contain /"), name); return NULL; } if (dir) { if (*dir != '/') { virReportError(VIR_ERR_XML_ERROR, _("gluster pool path '%s' must start with /"), dir); return NULL; } if (strchr(dir, '\0')[-1] != '/') trailing_slash = false; } if (VIR_ALLOC(ret) < 0) return NULL; if (VIR_STRDUP(ret->volname, name) < 0) goto error; if (virAsprintf(&ret->dir, "%s%s", dir ? dir : "/", trailing_slash ? "" : "/") < 0) goto error; /* FIXME: Currently hard-coded to tcp transport; XML needs to be * extended to allow alternate transport */ if (VIR_ALLOC(ret->uri) < 0) goto error; if (VIR_STRDUP(ret->uri->scheme, "gluster") < 0) goto error; if (VIR_STRDUP(ret->uri->server, pool->def->source.hosts[0].name) < 0) goto error; if (virAsprintf(&ret->uri->path, "/%s%s", ret->volname, ret->dir) < 0) goto error; ret->uri->port = pool->def->source.hosts[0].port; /* Actually connect to glfs */ if (!(ret->vol = glfs_new(ret->volname))) { virReportOOMError(); goto error; } if (glfs_set_volfile_server(ret->vol, "tcp", ret->uri->server, ret->uri->port) < 0 || glfs_init(ret->vol) < 0) { char *uri = virURIFormat(ret->uri); virReportSystemError(errno, _("failed to connect to %s"), NULLSTR(uri)); VIR_FREE(uri); goto error; } if (glfs_chdir(ret->vol, ret->dir) < 0) { virReportSystemError(errno, _("failed to change to directory '%s' in '%s'"), ret->dir, ret->volname); goto error; } return ret; error: virStorageBackendGlusterClose(ret); return NULL; } static ssize_t virStorageBackendGlusterReadHeader(glfs_fd_t *fd, const char *name, ssize_t maxlen, char **buf) { char *s; size_t nread = 0; if (VIR_ALLOC_N(*buf, maxlen) < 0) return -1; s = *buf; while (maxlen) { ssize_t r = glfs_read(fd, s, maxlen, 0); if (r < 0 && errno == EINTR) continue; if (r < 0) { VIR_FREE(*buf); virReportSystemError(errno, _("unable to read '%s'"), name); return r; } if (r == 0) return nread; s += r; maxlen -= r; nread += r; } return nread; } static int virStorageBackendGlusterSetMetadata(virStorageBackendGlusterStatePtr state, virStorageVolDefPtr vol, const char *name) { int ret = -1; char *path = NULL; char *tmp; VIR_FREE(vol->key); VIR_FREE(vol->target.path); vol->type = VIR_STORAGE_VOL_NETWORK; vol->target.format = VIR_STORAGE_FILE_RAW; if (name) { VIR_FREE(vol->name); if (VIR_STRDUP(vol->name, name) < 0) goto cleanup; } if (virAsprintf(&path, "%s%s%s", state->volname, state->dir, vol->name) < 0) goto cleanup; tmp = state->uri->path; if (virAsprintf(&state->uri->path, "/%s", path) < 0) { state->uri->path = tmp; goto cleanup; } if (!(vol->target.path = virURIFormat(state->uri))) { VIR_FREE(state->uri->path); state->uri->path = tmp; goto cleanup; } VIR_FREE(state->uri->path); state->uri->path = tmp; /* the path is unique enough to serve as a volume key */ if (VIR_STRDUP(vol->key, vol->target.path) < 0) goto cleanup; ret = 0; cleanup: VIR_FREE(path); return ret; } /* Populate *volptr for the given name and stat information, or leave * it NULL if the entry should be skipped (such as "."). Return 0 on * success, -1 on failure. */ static int virStorageBackendGlusterRefreshVol(virStorageBackendGlusterStatePtr state, const char *name, struct stat *st, virStorageVolDefPtr *volptr) { int ret = -1; virStorageVolDefPtr vol = NULL; glfs_fd_t *fd = NULL; virStorageSourcePtr meta = NULL; char *header = NULL; ssize_t len = VIR_STORAGE_MAX_HEADER; int backingFormat; *volptr = NULL; /* Silently skip '.' and '..'. */ if (STREQ(name, ".") || STREQ(name, "..")) return 0; /* Follow symlinks; silently skip broken links and loops. */ if (S_ISLNK(st->st_mode) && glfs_stat(state->vol, name, st) < 0) { if (errno == ENOENT || errno == ELOOP) { VIR_WARN("ignoring dangling symlink '%s'", name); ret = 0; } else { virReportSystemError(errno, _("cannot stat '%s'"), name); } return ret; } if (VIR_ALLOC(vol) < 0) goto cleanup; if (virStorageBackendUpdateVolTargetInfoFD(&vol->target, -1, st) < 0) goto cleanup; if (virStorageBackendGlusterSetMetadata(state, vol, name) < 0) goto cleanup; if (S_ISDIR(st->st_mode)) { vol->type = VIR_STORAGE_VOL_NETDIR; vol->target.format = VIR_STORAGE_FILE_DIR; *volptr = vol; vol = NULL; ret = 0; goto cleanup; } /* No need to worry about O_NONBLOCK - gluster doesn't allow creation * of fifos, so there's nothing it would protect us from. */ if (!(fd = glfs_open(state->vol, name, O_RDONLY | O_NOCTTY))) { /* A dangling symlink now implies a TOCTTOU race; report it. */ virReportSystemError(errno, _("cannot open volume '%s'"), name); goto cleanup; } if ((len = virStorageBackendGlusterReadHeader(fd, name, len, &header)) < 0) goto cleanup; if (!(meta = virStorageFileGetMetadataFromBuf(name, header, len, VIR_STORAGE_FILE_AUTO, &backingFormat))) goto cleanup; if (meta->backingStoreRaw) { if (VIR_ALLOC(vol->target.backingStore) < 0) goto cleanup; vol->target.backingStore->path = meta->backingStoreRaw; if (backingFormat < 0) vol->target.backingStore->format = VIR_STORAGE_FILE_RAW; else vol->target.backingStore->format = backingFormat; meta->backingStoreRaw = NULL; } vol->target.format = meta->format; if (meta->capacity) vol->target.capacity = meta->capacity; if (meta->encryption) { vol->target.encryption = meta->encryption; meta->encryption = NULL; } vol->target.features = meta->features; meta->features = NULL; vol->target.compat = meta->compat; meta->compat = NULL; *volptr = vol; vol = NULL; ret = 0; cleanup: virStorageSourceFree(meta); virStorageVolDefFree(vol); if (fd) glfs_close(fd); VIR_FREE(header); return ret; } static int virStorageBackendGlusterRefreshPool(virConnectPtr conn ATTRIBUTE_UNUSED, virStoragePoolObjPtr pool) { int ret = -1; virStorageBackendGlusterStatePtr state = NULL; struct { struct dirent ent; /* See comment below about readdir_r needing padding */ char padding[MAX(1, 256 - (int) (sizeof(struct dirent) - offsetof(struct dirent, d_name)))]; } de; struct dirent *ent; glfs_fd_t *dir = NULL; struct stat st; struct statvfs sb; if (!(state = virStorageBackendGlusterOpen(pool))) goto cleanup; /* Why oh why did glfs 3.4 decide to expose only readdir_r rather * than readdir? POSIX admits that readdir_r is inherently a * flawed design, because systems are not required to define * NAME_MAX: http://austingroupbugs.net/view.php?id=696 * http://womble.decadent.org.uk/readdir_r-advisory.html * * Fortunately, gluster appears to limit its underlying bricks to * only use file systems such as XFS that have a NAME_MAX of 255; * so we are currently guaranteed that if we provide 256 bytes of * tail padding, then we should have enough space to avoid buffer * overflow no matter whether the OS used d_name[], d_name[1], or * d_name[256] in its 'struct dirent'. * http://lists.gnu.org/archive/html/gluster-devel/2013-10/msg00083.html */ if (!(dir = glfs_opendir(state->vol, state->dir))) { virReportSystemError(errno, _("cannot open path '%s' in '%s'"), state->dir, state->volname); goto cleanup; } while (!(errno = glfs_readdirplus_r(dir, &st, &de.ent, &ent)) && ent) { virStorageVolDefPtr vol; int okay = virStorageBackendGlusterRefreshVol(state, ent->d_name, &st, &vol); if (okay < 0) goto cleanup; if (vol && VIR_APPEND_ELEMENT(pool->volumes.objs, pool->volumes.count, vol) < 0) goto cleanup; } if (errno) { virReportSystemError(errno, _("failed to read directory '%s' in '%s'"), state->dir, state->volname); goto cleanup; } if (glfs_statvfs(state->vol, state->dir, &sb) < 0) { virReportSystemError(errno, _("cannot statvfs path '%s' in '%s'"), state->dir, state->volname); goto cleanup; } pool->def->capacity = ((unsigned long long)sb.f_frsize * (unsigned long long)sb.f_blocks); pool->def->available = ((unsigned long long)sb.f_bfree * (unsigned long long)sb.f_frsize); pool->def->allocation = pool->def->capacity - pool->def->available; ret = 0; cleanup: if (dir) glfs_closedir(dir); virStorageBackendGlusterClose(state); if (ret < 0) virStoragePoolObjClearVols(pool); return ret; } static int virStorageBackendGlusterVolDelete(virConnectPtr conn ATTRIBUTE_UNUSED, virStoragePoolObjPtr pool, virStorageVolDefPtr vol, unsigned int flags) { virStorageBackendGlusterStatePtr state = NULL; int ret = -1; virCheckFlags(0, -1); switch ((virStorageVolType) vol->type) { case VIR_STORAGE_VOL_FILE: case VIR_STORAGE_VOL_DIR: case VIR_STORAGE_VOL_BLOCK: case VIR_STORAGE_VOL_PLOOP: case VIR_STORAGE_VOL_LAST: virReportError(VIR_ERR_NO_SUPPORT, _("removing of '%s' volumes is not supported " "by the gluster backend: %s"), virStorageVolTypeToString(vol->type), vol->target.path); goto cleanup; break; case VIR_STORAGE_VOL_NETWORK: if (!(state = virStorageBackendGlusterOpen(pool))) goto cleanup; if (glfs_unlink(state->vol, vol->name) < 0) { if (errno != ENOENT) { virReportSystemError(errno, _("cannot remove gluster volume file '%s'"), vol->target.path); goto cleanup; } } break; case VIR_STORAGE_VOL_NETDIR: if (!(state = virStorageBackendGlusterOpen(pool))) goto cleanup; if (glfs_rmdir(state->vol, vol->target.path) < 0) { if (errno != ENOENT) { virReportSystemError(errno, _("cannot remove gluster volume dir '%s'"), vol->target.path); goto cleanup; } } break; } ret = 0; cleanup: virStorageBackendGlusterClose(state); return ret; } static char * virStorageBackendGlusterFindPoolSources(virConnectPtr conn ATTRIBUTE_UNUSED, const char *srcSpec, unsigned int flags) { virStoragePoolSourceList list = { .type = VIR_STORAGE_POOL_GLUSTER, .nsources = 0, .sources = NULL }; virStoragePoolSourcePtr source = NULL; char *ret = NULL; int rc; size_t i; virCheckFlags(0, NULL); if (!srcSpec) { virReportError(VIR_ERR_INVALID_ARG, "%s", _("hostname must be specified for gluster sources")); return NULL; } if (!(source = virStoragePoolDefParseSourceString(srcSpec, VIR_STORAGE_POOL_GLUSTER))) return NULL; if (source->nhost != 1) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("Expected exactly 1 host for the storage pool")); goto cleanup; } if ((rc = virStorageBackendFindGlusterPoolSources(source->hosts[0].name, VIR_STORAGE_POOL_GLUSTER, &list, true)) < 0) goto cleanup; if (rc == 0) { virReportError(VIR_ERR_OPERATION_FAILED, _("no storage pools were found on host '%s'"), source->hosts[0].name); goto cleanup; } if (!(ret = virStoragePoolSourceListFormat(&list))) goto cleanup; cleanup: for (i = 0; i < list.nsources; i++) virStoragePoolSourceClear(&list.sources[i]); VIR_FREE(list.sources); virStoragePoolSourceFree(source); return ret; } static int virStorageBackendGlusterCheckPool(virStoragePoolObjPtr pool, bool *active) { /* Return previous state remembered by the status XML. If the pool is not * available we will fail to refresh it and end up in the same situation. * This will save one attempt to open the connection to the remote server */ *active = pool->active; return 0; } virStorageBackend virStorageBackendGluster = { .type = VIR_STORAGE_POOL_GLUSTER, .checkPool = virStorageBackendGlusterCheckPool, .refreshPool = virStorageBackendGlusterRefreshPool, .findPoolSources = virStorageBackendGlusterFindPoolSources, .deleteVol = virStorageBackendGlusterVolDelete, }; typedef struct _virStorageFileBackendGlusterPriv virStorageFileBackendGlusterPriv; typedef virStorageFileBackendGlusterPriv *virStorageFileBackendGlusterPrivPtr; struct _virStorageFileBackendGlusterPriv { glfs_t *vol; char *canonpath; }; static void virStorageFileBackendGlusterDeinit(virStorageSourcePtr src) { virStorageFileBackendGlusterPrivPtr priv = src->drv->priv; VIR_DEBUG("deinitializing gluster storage file %p (gluster://%s:%s/%s%s)", src, src->hosts->name, src->hosts->port ? src->hosts->port : "0", src->volume, src->path); if (priv->vol) glfs_fini(priv->vol); VIR_FREE(priv->canonpath); VIR_FREE(priv); src->drv->priv = NULL; } static int virStorageFileBackendGlusterInitServer(virStorageFileBackendGlusterPrivPtr priv, virStorageNetHostDefPtr host) { const char *transport = virStorageNetHostTransportTypeToString(host->transport); const char *hoststr = NULL; int port = 0; switch ((virStorageNetHostTransport) host->transport) { case VIR_STORAGE_NET_HOST_TRANS_RDMA: case VIR_STORAGE_NET_HOST_TRANS_TCP: hoststr = host->name; if (host->port && virStrToLong_i(host->port, NULL, 10, &port) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, _("failed to parse port number '%s'"), host->port); return -1; } break; case VIR_STORAGE_NET_HOST_TRANS_UNIX: hoststr = host->socket; break; case VIR_STORAGE_NET_HOST_TRANS_LAST: break; } VIR_DEBUG("adding gluster host for %p: transport=%s host=%s port=%d", priv, transport, hoststr, port); if (glfs_set_volfile_server(priv->vol, transport, hoststr, port) < 0) { virReportSystemError(errno, _("failed to set gluster volfile server '%s'"), hoststr); return -1; } return 0; } static int virStorageFileBackendGlusterInit(virStorageSourcePtr src) { virStorageFileBackendGlusterPrivPtr priv = NULL; size_t i; if (!src->volume) { virReportError(VIR_ERR_INTERNAL_ERROR, _("missing gluster volume name for path '%s'"), src->path); return -1; } if (VIR_ALLOC(priv) < 0) return -1; VIR_DEBUG("initializing gluster storage file %p " "(priv='%p' volume='%s' path='%s') as [%u:%u]", src, priv, src->volume, src->path, (unsigned int)src->drv->uid, (unsigned int)src->drv->gid); if (!(priv->vol = glfs_new(src->volume))) { virReportOOMError(); goto error; } for (i = 0; i < src->nhosts; i++) { if (virStorageFileBackendGlusterInitServer(priv, src->hosts + i) < 0) goto error; } if (glfs_init(priv->vol) < 0) { virReportSystemError(errno, _("failed to initialize gluster connection " "(src=%p priv=%p)"), src, priv); goto error; } src->drv->priv = priv; return 0; error: if (priv->vol) glfs_fini(priv->vol); VIR_FREE(priv); return -1; } static int virStorageFileBackendGlusterCreate(virStorageSourcePtr src) { virStorageFileBackendGlusterPrivPtr priv = src->drv->priv; glfs_fd_t *fd = NULL; mode_t mode = S_IRUSR; if (!src->readonly) mode |= S_IWUSR; if (!(fd = glfs_creat(priv->vol, src->path, O_CREAT | O_TRUNC | O_WRONLY, mode))) return -1; ignore_value(glfs_close(fd)); return 0; } static int virStorageFileBackendGlusterUnlink(virStorageSourcePtr src) { virStorageFileBackendGlusterPrivPtr priv = src->drv->priv; return glfs_unlink(priv->vol, src->path); } static int virStorageFileBackendGlusterStat(virStorageSourcePtr src, struct stat *st) { virStorageFileBackendGlusterPrivPtr priv = src->drv->priv; return glfs_stat(priv->vol, src->path, st); } static ssize_t virStorageFileBackendGlusterReadHeader(virStorageSourcePtr src, ssize_t max_len, char **buf) { virStorageFileBackendGlusterPrivPtr priv = src->drv->priv; glfs_fd_t *fd = NULL; ssize_t ret = -1; *buf = NULL; if (!(fd = glfs_open(priv->vol, src->path, O_RDONLY))) { virReportSystemError(errno, _("Failed to open file '%s'"), src->path); return -1; } ret = virStorageBackendGlusterReadHeader(fd, src->path, max_len, buf); if (fd) glfs_close(fd); return ret; } static int virStorageFileBackendGlusterAccess(virStorageSourcePtr src, int mode) { virStorageFileBackendGlusterPrivPtr priv = src->drv->priv; return glfs_access(priv->vol, src->path, mode); } static int virStorageFileBackendGlusterReadlinkCallback(const char *path, char **linkpath, void *data) { virStorageFileBackendGlusterPrivPtr priv = data; char *buf = NULL; size_t bufsiz = 0; ssize_t ret; struct stat st; *linkpath = NULL; if (glfs_stat(priv->vol, path, &st) < 0) { virReportSystemError(errno, _("failed to stat gluster path '%s'"), path); return -1; } if (!S_ISLNK(st.st_mode)) return 1; realloc: if (VIR_EXPAND_N(buf, bufsiz, 256) < 0) goto error; if ((ret = glfs_readlink(priv->vol, path, buf, bufsiz)) < 0) { virReportSystemError(errno, _("failed to read link of gluster file '%s'"), path); goto error; } if (ret == bufsiz) goto realloc; buf[ret] = '\0'; *linkpath = buf; return 0; error: VIR_FREE(buf); return -1; } static const char * virStorageFileBackendGlusterGetUniqueIdentifier(virStorageSourcePtr src) { virStorageFileBackendGlusterPrivPtr priv = src->drv->priv; char *filePath = NULL; if (priv->canonpath) return priv->canonpath; if (!(filePath = virStorageFileCanonicalizePath(src->path, virStorageFileBackendGlusterReadlinkCallback, priv))) return NULL; ignore_value(virAsprintf(&priv->canonpath, "gluster://%s:%s/%s/%s", src->hosts->name, src->hosts->port, src->volume, filePath)); VIR_FREE(filePath); return priv->canonpath; } static int virStorageFileBackendGlusterChown(const virStorageSource *src, uid_t uid, gid_t gid) { virStorageFileBackendGlusterPrivPtr priv = src->drv->priv; return glfs_chown(priv->vol, src->path, uid, gid); } virStorageFileBackend virStorageFileBackendGluster = { .type = VIR_STORAGE_TYPE_NETWORK, .protocol = VIR_STORAGE_NET_PROTOCOL_GLUSTER, .backendInit = virStorageFileBackendGlusterInit, .backendDeinit = virStorageFileBackendGlusterDeinit, .storageFileCreate = virStorageFileBackendGlusterCreate, .storageFileUnlink = virStorageFileBackendGlusterUnlink, .storageFileStat = virStorageFileBackendGlusterStat, .storageFileReadHeader = virStorageFileBackendGlusterReadHeader, .storageFileAccess = virStorageFileBackendGlusterAccess, .storageFileChown = virStorageFileBackendGlusterChown, .storageFileGetUniqueIdentifier = virStorageFileBackendGlusterGetUniqueIdentifier, }; int virStorageBackendGlusterRegister(void) { if (virStorageBackendRegister(&virStorageBackendGluster) < 0) return -1; if (virStorageBackendFileRegister(&virStorageFileBackendGluster) < 0) return -1; return 0; }