qemu: Monitor nbdkit process for exit

Adds the ability to monitor the nbdkit process so that we can take
action in case the child exits unexpectedly.

When the nbdkit process exits, we pause the vm, restart nbdkit, and then
resume the vm. This allows the vm to continue working in the event of a
nbdkit failure.

Eventually we may want to generalize this functionality since we may
need something similar for e.g. qemu-storage-daemon, etc.

The process is monitored with the pidfd_open() syscall if it exists
(since linux 5.3). Otherwise it resorts to checking whether the process
is alive once a second. The one-second time period was chosen somewhat
arbitrarily.

Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com>
Reviewed-by: Peter Krempa <pkrempa@redhat.com>
This commit is contained in:
Jonathon Jongsma 2022-10-05 12:03:33 -05:00
parent 8836f331d7
commit 447e09dfdb
10 changed files with 227 additions and 24 deletions

View File

@ -691,6 +691,13 @@ symbols = [
[ 'sched.h', 'cpu_set_t' ],
]
if host_machine.system() == 'linux'
symbols += [
# process management
[ 'sys/syscall.h', 'SYS_pidfd_open' ],
]
endif
foreach symbol : symbols
if cc.has_header_symbol(symbol[0], symbol[1], args: '-D_GNU_SOURCE', prefix: symbol.get(2, ''))
conf.set('WITH_DECL_@0@'.format(symbol[1].to_upper()), 1)
@ -2011,6 +2018,9 @@ endif
conf.set_quoted('TLS_PRIORITY', get_option('tls_priority'))
if conf.has('WITH_DECL_SYS_PIDFD_OPEN')
conf.set('WITH_NBDKIT', 1)
endif
# Various definitions
@ -2268,6 +2278,7 @@ misc_summary = {
'firewalld-zone': conf.has('WITH_FIREWALLD_ZONE'),
'nss': conf.has('WITH_NSS'),
'numad': conf.has('WITH_NUMAD'),
'nbdkit': conf.has('WITH_NBDKIT'),
'Init script': init_script,
'Char device locks': chrdev_lock_files,
'Loader/NVRAM': loader_res,

View File

@ -11499,6 +11499,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event)
case QEMU_PROCESS_EVENT_PR_DISCONNECT:
case QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION:
case QEMU_PROCESS_EVENT_RESET:
case QEMU_PROCESS_EVENT_NBDKIT_EXITED:
case QEMU_PROCESS_EVENT_LAST:
break;
}

View File

@ -465,6 +465,7 @@ typedef enum {
QEMU_PROCESS_EVENT_MEMORY_DEVICE_SIZE_CHANGE,
QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION,
QEMU_PROCESS_EVENT_RESET,
QEMU_PROCESS_EVENT_NBDKIT_EXITED,
QEMU_PROCESS_EVENT_LAST
} qemuProcessEventType;

View File

@ -4033,6 +4033,20 @@ processResetEvent(virQEMUDriver *driver,
}
static void
processNbdkitExitedEvent(virDomainObj *vm,
qemuNbdkitProcess *nbdkit)
{
if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0)
return;
if (qemuNbdkitProcessRestart(nbdkit, vm) < 0)
virDomainObjTaint(vm, VIR_DOMAIN_TAINT_NBDKIT_RESTART);
virDomainObjEndJob(vm);
}
static void qemuProcessEventHandler(void *data, void *opaque)
{
struct qemuProcessEvent *processEvent = data;
@ -4090,6 +4104,9 @@ static void qemuProcessEventHandler(void *data, void *opaque)
case QEMU_PROCESS_EVENT_RESET:
processResetEvent(driver, vm);
break;
case QEMU_PROCESS_EVENT_NBDKIT_EXITED:
processNbdkitExitedEvent(vm, processEvent->data);
break;
case QEMU_PROCESS_EVENT_LAST:
break;
}

View File

@ -19,6 +19,7 @@
#include <config.h>
#include <glib.h>
#include <sys/syscall.h>
#include "vircommand.h"
#include "virerror.h"
@ -33,6 +34,7 @@
#include "qemu_nbdkit.h"
#define LIBVIRT_QEMU_NBDKITPRIV_H_ALLOW
#include "qemu_nbdkitpriv.h"
#include "qemu_process.h"
#include "qemu_security.h"
#include <fcntl.h>
@ -41,6 +43,12 @@
VIR_LOG_INIT("qemu.nbdkit");
#if WITH_NBDKIT
# define WITHOUT_NBDKIT_UNUSED
#else
# define WITHOUT_NBDKIT_UNUSED G_GNUC_UNUSED
#endif
VIR_ENUM_IMPL(qemuNbdkitCaps,
QEMU_NBDKIT_CAPS_LAST,
/* 0 */
@ -611,6 +619,113 @@ qemuNbdkitCapsCacheNew(const char *cachedir)
}
int
qemuNbdkitProcessRestart(qemuNbdkitProcess *proc,
virDomainObj *vm)
{
qemuDomainObjPrivate *vmpriv = vm->privateData;
virQEMUDriver *driver = vmpriv->driver;
/* clean up resources associated with process */
qemuNbdkitProcessStop(proc);
return qemuNbdkitProcessStart(proc, vm, driver);
}
#if WITH_NBDKIT
typedef struct {
qemuNbdkitProcess *proc;
virDomainObj *vm;
} qemuNbdkitProcessEventData;
static qemuNbdkitProcessEventData*
qemuNbdkitProcessEventDataNew(qemuNbdkitProcess *proc,
virDomainObj *vm)
{
qemuNbdkitProcessEventData *d = g_new(qemuNbdkitProcessEventData, 1);
d->proc = proc;
d->vm = virObjectRef(vm);
return d;
}
static void
qemuNbdkitProcessEventDataFree(qemuNbdkitProcessEventData *d)
{
virObjectUnref(d->vm);
g_free(d);
}
static void
qemuNbdkitProcessPidfdCb(int watch G_GNUC_UNUSED,
int fd,
int events G_GNUC_UNUSED,
void *opaque)
{
qemuNbdkitProcessEventData *d = opaque;
VIR_FORCE_CLOSE(fd);
/* submit an event so that it is handled in the per-vm event thread */
qemuProcessHandleNbdkitExit(d->proc, d->vm);
}
#endif /* WITH_NBDKIT */
static int
qemuNbdkitProcessStartMonitor(qemuNbdkitProcess *proc WITHOUT_NBDKIT_UNUSED,
virDomainObj *vm WITHOUT_NBDKIT_UNUSED)
{
#if WITH_NBDKIT
int pidfd;
qemuNbdkitProcessEventData *data;
pidfd = syscall(SYS_pidfd_open, proc->pid, 0);
if (pidfd < 0) {
virReportSystemError(errno, _("pidfd_open failed for %1$i"), proc->pid);
return -1;
}
data = qemuNbdkitProcessEventDataNew(proc, vm);
if ((proc->eventwatch = virEventAddHandle(pidfd,
VIR_EVENT_HANDLE_READABLE,
qemuNbdkitProcessPidfdCb,
data,
(virFreeCallback)qemuNbdkitProcessEventDataFree)) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("failed to monitor nbdkit process %1$i"),
proc->pid);
VIR_FORCE_CLOSE(pidfd);
qemuNbdkitProcessEventDataFree(data);
return -1;
}
VIR_DEBUG("Monitoring nbdkit process %i for exit", proc->pid);
return 0;
#else
/* This should not be reachable */
virReportError(VIR_ERR_NO_SUPPORT, "%s",
_("nbdkit support is not enabled"));
return -1;
#endif /* WITH_NBDKIT */
}
static void
qemuNbdkitProcessStopMonitor(qemuNbdkitProcess *proc WITHOUT_NBDKIT_UNUSED)
{
#if WITH_NBDKIT
if (proc->eventwatch > 0) {
virEventRemoveHandle(proc->eventwatch);
proc->eventwatch = 0;
}
#endif /* WITH_NBDKIT */
}
static qemuNbdkitProcess *
qemuNbdkitProcessNew(virStorageSource *source,
const char *pidfile,
@ -657,29 +772,40 @@ qemuNbdkitReconnectStorageSource(virStorageSource *source,
}
static void
qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source)
static int
qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source,
virDomainObj *vm)
{
qemuDomainStorageSourcePrivate *srcpriv = QEMU_DOMAIN_STORAGE_SOURCE_PRIVATE(source);
qemuDomainObjPrivate *vmpriv = vm->privateData;
qemuNbdkitProcess *proc;
if (!srcpriv)
return;
return 0;
proc = srcpriv->nbdkitProcess;
if (!proc)
return;
return 0;
if (!proc->caps)
proc->caps = qemuGetNbdkitCaps(vmpriv->driver);
if (proc->pid <= 0) {
if (virPidFileReadPath(proc->pidfile, &proc->pid) < 0) {
VIR_WARN("Unable to read pidfile '%s'", proc->pidfile);
return;
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Unable to read pidfile '%1$s'"),
proc->pidfile);
return -1;
}
}
if (virProcessKill(proc->pid, 0) < 0)
VIR_WARN("nbdkit process %i is not alive", proc->pid);
if (virProcessKill(proc->pid, 0) < 0) {
VIR_DEBUG("nbdkit process %i is not alive", proc->pid);
return qemuNbdkitProcessRestart(proc, vm);
}
return qemuNbdkitProcessStartMonitor(proc, vm);
}
/**
@ -691,23 +817,32 @@ qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source)
* @source. It is intended to be called after libvirt restarts and has loaded its current state from
* disk and is attempting to re-connect to active domains.
*/
void
qemuNbdkitStorageSourceManageProcess(virStorageSource *source)
int
qemuNbdkitStorageSourceManageProcess(virStorageSource *source,
virDomainObj *vm)
{
virStorageSource *backing;
for (backing = source; backing != NULL; backing = backing->backingStore)
qemuNbdkitStorageSourceManageProcessOne(backing);
if (qemuNbdkitStorageSourceManageProcessOne(backing, vm) < 0)
return -1;
return 0;
}
bool
qemuNbdkitInitStorageSource(qemuNbdkitCaps *caps,
virStorageSource *source,
char *statedir,
const char *alias,
uid_t user,
gid_t group)
qemuNbdkitInitStorageSource(qemuNbdkitCaps *caps WITHOUT_NBDKIT_UNUSED,
virStorageSource *source WITHOUT_NBDKIT_UNUSED,
char *statedir WITHOUT_NBDKIT_UNUSED,
const char *alias WITHOUT_NBDKIT_UNUSED,
uid_t user WITHOUT_NBDKIT_UNUSED,
gid_t group WITHOUT_NBDKIT_UNUSED)
{
#if !WITH_NBDKIT
/* if nbdkit support is not enabled, don't construct the object so the
* calling function will fall back to qemu storage drivers */
return false;
#else
qemuDomainStorageSourcePrivate *srcPriv = qemuDomainStorageSourcePrivateFetch(source);
g_autofree char *pidname = g_strdup_printf("nbdkit-%s.pid", alias);
g_autofree char *socketname = g_strdup_printf("nbdkit-%s.socket", alias);
@ -751,6 +886,7 @@ qemuNbdkitInitStorageSource(qemuNbdkitCaps *caps,
srcPriv->nbdkitProcess = proc;
return true;
#endif /* WITH_NBDKIT */
}
@ -968,6 +1104,8 @@ qemuNbdkitProcessBuildCommand(qemuNbdkitProcess *proc)
void
qemuNbdkitProcessFree(qemuNbdkitProcess *proc)
{
qemuNbdkitProcessStopMonitor(proc);
g_clear_pointer(&proc->pidfile, g_free);
g_clear_pointer(&proc->socketfile, g_free);
g_clear_object(&proc->caps);
@ -1037,8 +1175,11 @@ qemuNbdkitProcessStart(qemuNbdkitProcess *proc,
goto error;
while (virTimeBackOffWait(&timebackoff)) {
if (virFileExists(proc->socketfile))
if (virFileExists(proc->socketfile)) {
if (qemuNbdkitProcessStartMonitor(proc, vm) < 0)
goto error;
return 0;
}
if (virProcessKill(proc->pid, 0) == 0)
continue;
@ -1069,6 +1210,8 @@ qemuNbdkitProcessStart(qemuNbdkitProcess *proc,
int
qemuNbdkitProcessStop(qemuNbdkitProcess *proc)
{
qemuNbdkitProcessStopMonitor(proc);
if (proc->pid < 0)
return 0;

View File

@ -68,8 +68,9 @@ qemuNbdkitStartStorageSource(virQEMUDriver *driver,
void
qemuNbdkitStopStorageSource(virStorageSource *src);
void
qemuNbdkitStorageSourceManageProcess(virStorageSource *src);
int
qemuNbdkitStorageSourceManageProcess(virStorageSource *src,
virDomainObj *vm);
bool
qemuNbdkitCapsGet(qemuNbdkitCaps *nbdkitCaps,
@ -91,6 +92,7 @@ struct _qemuNbdkitProcess {
uid_t user;
gid_t group;
pid_t pid;
int eventwatch;
};
int
@ -98,6 +100,10 @@ qemuNbdkitProcessStart(qemuNbdkitProcess *proc,
virDomainObj *vm,
virQEMUDriver *driver);
int
qemuNbdkitProcessRestart(qemuNbdkitProcess *proc,
virDomainObj *vm);
int
qemuNbdkitProcessStop(qemuNbdkitProcess *proc);

View File

@ -9055,10 +9055,12 @@ qemuProcessReconnect(void *opaque)
}
for (i = 0; i < obj->def->ndisks; i++)
qemuNbdkitStorageSourceManageProcess(obj->def->disks[i]->src);
if (qemuNbdkitStorageSourceManageProcess(obj->def->disks[i]->src, obj) < 0)
goto error;
if (obj->def->os.loader && obj->def->os.loader->nvram)
qemuNbdkitStorageSourceManageProcess(obj->def->os.loader->nvram);
if (qemuNbdkitStorageSourceManageProcess(obj->def->os.loader->nvram, obj) < 0)
goto error;
/* update domain state XML with possibly updated state in virDomainObj */
if (virDomainObjSave(obj, driver->xmlopt, cfg->stateDir) < 0)
@ -9512,3 +9514,14 @@ qemuProcessQMPStart(qemuProcessQMP *proc)
return 0;
}
void
qemuProcessHandleNbdkitExit(qemuNbdkitProcess *nbdkit,
virDomainObj *vm)
{
virObjectLock(vm);
VIR_DEBUG("nbdkit process %i died", nbdkit->pid);
qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_NBDKIT_EXITED, 0, 0, nbdkit);
virObjectUnlock(vm);
}

View File

@ -237,3 +237,6 @@ void qemuProcessRefreshDiskProps(virDomainDiskDef *disk,
struct qemuDomainDiskInfo *info);
int qemuProcessSetupEmulator(virDomainObj *vm);
void qemuProcessHandleNbdkitExit(qemuNbdkitProcess *nbdkit,
virDomainObj *vm);

View File

@ -456,8 +456,12 @@ if conf.has('WITH_QEMU')
{ 'name': 'qemuvhostusertest', 'link_with': [ test_qemu_driver_lib ], 'link_whole': [ test_file_wrapper_lib ] },
{ 'name': 'qemuxml2argvtest', 'link_with': [ test_qemu_driver_lib, test_utils_qemu_monitor_lib ], 'link_whole': [ test_utils_qemu_lib, test_file_wrapper_lib ] },
{ 'name': 'qemuxml2xmltest', 'link_with': [ test_qemu_driver_lib ], 'link_whole': [ test_utils_qemu_lib, test_file_wrapper_lib ] },
{ 'name': 'qemunbdkittest', 'link_with': [ test_qemu_driver_lib ], 'link_whole': [ test_utils_qemu_lib, test_file_wrapper_lib ] },
]
if conf.has('WITH_NBDKIT')
tests += [
{ 'name': 'qemunbdkittest', 'link_with': [ test_qemu_driver_lib ], 'link_whole': [ test_utils_qemu_lib, test_file_wrapper_lib ] },
]
endif
endif
if conf.has('WITH_REMOTE')

View File

@ -778,8 +778,12 @@ mymain(void)
# define DO_TEST_CAPS_ARCH_VER(name, arch, ver) \
DO_TEST_CAPS_ARCH_VER_FULL(name, arch, ver, ARG_END)
# define DO_TEST_CAPS_LATEST_NBDKIT(name, ...) \
# if WITH_NBDKIT
# define DO_TEST_CAPS_LATEST_NBDKIT(name, ...) \
DO_TEST_CAPS_ARCH_LATEST_FULL(name, "x86_64", ARG_NBDKIT_CAPS, __VA_ARGS__, QEMU_NBDKIT_CAPS_LAST, ARG_END)
# else
# define DO_TEST_CAPS_LATEST_NBDKIT(name, ...)
# endif /* WITH_NBDKIT */
# define DO_TEST_CAPS_LATEST(name) \
DO_TEST_CAPS_ARCH_LATEST(name, "x86_64")