virsh: Fix --timeout option of migrate command

When starting a migration with --timeout, we create a thread to call the
migration API and in parallel setup a timer for the timeout. The
description of --timeout says: "run action specified by --timeout-*
option (suspend by default) if live migration exceeds timeout", which is
not really the way this feature was implemented. Before live migration
starts we first need to contact the source to get the domain definition
and send it to the destination where a new QEMU process has to be
started. This can take some (unpredictably long) time while the timeout
timer is already running. If a very short timeout is set (which doesn't
really make sense, but it's allowed), we may even end up taking the
timeout action before the actual migration had a chance to start.

With this patch the timeout is started only after we get non-zero
dataTotal from virDomainGetJobInfo, which means the migration (of either
storage or memory) really started.

https://issues.redhat.com/browse/RHEL-41264

Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
This commit is contained in:
Jiri Denemark 2024-11-27 13:24:44 +01:00
parent 6cc93bf288
commit b1725fbfb8

View File

@ -4237,7 +4237,10 @@ typedef void (*jobWatchTimeoutFunc)(vshControl *ctl, virDomainPtr dom,
struct virshWatchData { struct virshWatchData {
vshControl *ctl; vshControl *ctl;
virDomainPtr dom; virDomainPtr dom;
GMainContext *context;
jobWatchTimeoutFunc timeout_func; jobWatchTimeoutFunc timeout_func;
int timeout_secs;
GSource *timeout_src;
void *opaque; void *opaque;
const char *label; const char *label;
GIOChannel *stdin_ioc; GIOChannel *stdin_ioc;
@ -4259,6 +4262,20 @@ virshWatchTimeout(gpointer opaque)
} }
static void
virshWatchSetTimeout(struct virshWatchData *data)
{
vshDebug(data->ctl, VSH_ERR_DEBUG,
"watchJob: setting timeout of %d secs\n", data->timeout_secs);
data->timeout_src = g_timeout_source_new_seconds(data->timeout_secs);
g_source_set_callback(data->timeout_src,
virshWatchTimeout,
data, NULL);
g_source_attach(data->timeout_src, data->context);
}
static gboolean static gboolean
virshWatchProgress(gpointer opaque) virshWatchProgress(gpointer opaque)
{ {
@ -4290,10 +4307,17 @@ virshWatchProgress(gpointer opaque)
jobinfo.type == VIR_DOMAIN_JOB_UNBOUNDED)) { jobinfo.type == VIR_DOMAIN_JOB_UNBOUNDED)) {
vshTTYDisableInterrupt(data->ctl); vshTTYDisableInterrupt(data->ctl);
data->jobStarted = true; data->jobStarted = true;
vshDebug(data->ctl, VSH_ERR_DEBUG,
"watchJob: job started\n");
}
if (!data->verbose) { if (data->jobStarted) {
if (data->timeout_secs > 0 && !data->timeout_src) {
if (jobinfo.dataTotal > 0)
virshWatchSetTimeout(data);
} else if (!data->verbose) {
vshDebug(data->ctl, VSH_ERR_DEBUG, vshDebug(data->ctl, VSH_ERR_DEBUG,
"watchJob: job started, disabling callback\n"); "watchJob: disabling callback\n");
return G_SOURCE_REMOVE; return G_SOURCE_REMOVE;
} }
} }
@ -4356,13 +4380,15 @@ virshWatchJob(vshControl *ctl,
struct sigaction sig_action; struct sigaction sig_action;
struct sigaction old_sig_action; struct sigaction old_sig_action;
#endif /* !WIN32 */ #endif /* !WIN32 */
g_autoptr(GSource) timeout_src = NULL;
g_autoptr(GSource) progress_src = NULL; g_autoptr(GSource) progress_src = NULL;
g_autoptr(GSource) stdin_src = NULL; g_autoptr(GSource) stdin_src = NULL;
struct virshWatchData data = { struct virshWatchData data = {
.ctl = ctl, .ctl = ctl,
.dom = dom, .dom = dom,
.context = g_main_loop_get_context(eventLoop),
.timeout_func = timeout_func, .timeout_func = timeout_func,
.timeout_secs = timeout_secs,
.timeout_src = NULL,
.opaque = opaque, .opaque = opaque,
.label = label, .label = label,
.stdin_ioc = NULL, .stdin_ioc = NULL,
@ -4391,27 +4417,14 @@ virshWatchJob(vshControl *ctl,
g_source_set_callback(stdin_src, g_source_set_callback(stdin_src,
(GSourceFunc)virshWatchInterrupt, (GSourceFunc)virshWatchInterrupt,
&data, NULL); &data, NULL);
g_source_attach(stdin_src, g_source_attach(stdin_src, data.context);
g_main_loop_get_context(eventLoop));
}
if (timeout_secs) {
vshDebug(ctl, VSH_ERR_DEBUG,
"watchJob: setting timeout of %d secs\n", timeout_secs);
timeout_src = g_timeout_source_new_seconds(timeout_secs);
g_source_set_callback(timeout_src,
virshWatchTimeout,
&data, NULL);
g_source_attach(timeout_src,
g_main_loop_get_context(eventLoop));
} }
progress_src = g_timeout_source_new(500); progress_src = g_timeout_source_new(500);
g_source_set_callback(progress_src, g_source_set_callback(progress_src,
virshWatchProgress, virshWatchProgress,
&data, NULL); &data, NULL);
g_source_attach(progress_src, g_source_attach(progress_src, data.context);
g_main_loop_get_context(eventLoop));
g_main_loop_run(eventLoop); g_main_loop_run(eventLoop);
@ -4420,8 +4433,10 @@ virshWatchJob(vshControl *ctl,
if (*job_err == 0 && verbose) /* print [100 %] */ if (*job_err == 0 && verbose) /* print [100 %] */
virshPrintJobProgress(label, 0, 1); virshPrintJobProgress(label, 0, 1);
if (timeout_src) if (data.timeout_src) {
g_source_destroy(timeout_src); g_source_destroy(data.timeout_src);
g_source_unref(data.timeout_src);
}
g_source_destroy(progress_src); g_source_destroy(progress_src);
if (stdin_src) if (stdin_src)
g_source_destroy(stdin_src); g_source_destroy(stdin_src);