From f46d137e33a348c0f96eaacc58e29794170757cb Mon Sep 17 00:00:00 2001 From: Wido den Hollander Date: Wed, 6 Jan 2016 10:25:49 +0100 Subject: [PATCH] rbd: Do not error out on a single image during pool refresh It could happen that rbd_list() returns X names, but that while refreshing the pool one of those RBD images is removed from Ceph through a different route then libvirt. We do not need to error out in such case, we can simply ignore the volume and continue. error : volStorageBackendRBDRefreshVolInfo:289 : failed to open the RBD image 'vol-998': No such file or directory It could also be that one or more Placement Groups (PGs) inside Ceph are inactive due to a system failure. If that happens it could be that some RBD images can not be refreshed and a timeout will be raised by librados. error : volStorageBackendRBDRefreshVolInfo:289 : failed to open the RBD image 'vol-893': Connection timed out Ignore the error and continue to refresh the rest of the pool's contents. Signed-off-by: Wido den Hollander --- src/storage/storage_backend_rbd.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/storage/storage_backend_rbd.c b/src/storage/storage_backend_rbd.c index 8e2d51bf94..8dcb9be899 100644 --- a/src/storage/storage_backend_rbd.c +++ b/src/storage/storage_backend_rbd.c @@ -285,6 +285,7 @@ static int volStorageBackendRBDRefreshVolInfo(virStorageVolDefPtr vol, r = rbd_open(ptr->ioctx, vol->name, &image, NULL); if (r < 0) { + ret = -r; virReportSystemError(-r, _("failed to open the RBD image '%s'"), vol->name); goto cleanup; @@ -293,6 +294,7 @@ static int volStorageBackendRBDRefreshVolInfo(virStorageVolDefPtr vol, rbd_image_info_t info; r = rbd_stat(image, &info, sizeof(info)); if (r < 0) { + ret = -r; virReportSystemError(-r, _("failed to stat the RBD image '%s'"), vol->name); goto cleanup; @@ -400,7 +402,21 @@ static int virStorageBackendRBDRefreshPool(virConnectPtr conn, name += strlen(name) + 1; - if (volStorageBackendRBDRefreshVolInfo(vol, pool, &ptr) < 0) { + r = volStorageBackendRBDRefreshVolInfo(vol, pool, &ptr); + + /* It could be that a volume has been deleted through a different route + * then libvirt and that will cause a -ENOENT to be returned. + * + * Another possibility is that there is something wrong with the placement + * group (PG) that RBD image's header is in and that causes -ETIMEDOUT + * to be returned. + * + * Do not error out and simply ignore the volume + */ + if (r < 0) { + if (r == -ENOENT || r == -ETIMEDOUT) + continue; + virStorageVolDefFree(vol); goto cleanup; }