tools/virsh-domain.c | 55 ++++++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 20 deletions(-)
When starting a migration with --timeout, we create a thread to call the
migration API and in parallel setup a timer for the timeout. The
description of --timeout says: "run action specified by --timeout-*
option (suspend by default) if live migration exceeds timeout", which is
not really the way this feature was implemented. Before live migration
starts we first need to contact the source to get the domain definition
and send it to the destination where a new QEMU process has to be
started. This can take some (unpredictably long) time while the timeout
timer is already running. If a very short timeout is set (which doesn't
really make sense, but it's allowed), we may even end up taking the
timeout action before the actual migration had a chance to start.
With this patch the timeout is started only after we get non-zero
dataTotal from virDomainGetJobInfo, which means the migration (of either
storage or memory) really started.
https://issues.redhat.com/browse/RHEL-41264
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
---
tools/virsh-domain.c | 55 ++++++++++++++++++++++++++++----------------
1 file changed, 35 insertions(+), 20 deletions(-)
diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c
index e4923284af..546db955a9 100644
--- a/tools/virsh-domain.c
+++ b/tools/virsh-domain.c
@@ -4237,7 +4237,10 @@ typedef void (*jobWatchTimeoutFunc)(vshControl *ctl, virDomainPtr dom,
struct virshWatchData {
vshControl *ctl;
virDomainPtr dom;
+ GMainContext *context;
jobWatchTimeoutFunc timeout_func;
+ int timeout_secs;
+ GSource *timeout_src;
void *opaque;
const char *label;
GIOChannel *stdin_ioc;
@@ -4259,6 +4262,20 @@ virshWatchTimeout(gpointer opaque)
}
+static void
+virshWatchSetTimeout(struct virshWatchData *data)
+{
+ vshDebug(data->ctl, VSH_ERR_DEBUG,
+ "watchJob: setting timeout of %d secs\n", data->timeout_secs);
+
+ data->timeout_src = g_timeout_source_new_seconds(data->timeout_secs);
+ g_source_set_callback(data->timeout_src,
+ virshWatchTimeout,
+ data, NULL);
+ g_source_attach(data->timeout_src, data->context);
+}
+
+
static gboolean
virshWatchProgress(gpointer opaque)
{
@@ -4290,10 +4307,17 @@ virshWatchProgress(gpointer opaque)
jobinfo.type == VIR_DOMAIN_JOB_UNBOUNDED)) {
vshTTYDisableInterrupt(data->ctl);
data->jobStarted = true;
+ vshDebug(data->ctl, VSH_ERR_DEBUG,
+ "watchJob: job started\n");
+ }
- if (!data->verbose) {
+ if (data->jobStarted) {
+ if (data->timeout_secs > 0 && !data->timeout_src) {
+ if (jobinfo.dataTotal > 0)
+ virshWatchSetTimeout(data);
+ } else if (!data->verbose) {
vshDebug(data->ctl, VSH_ERR_DEBUG,
- "watchJob: job started, disabling callback\n");
+ "watchJob: disabling callback\n");
return G_SOURCE_REMOVE;
}
}
@@ -4356,13 +4380,15 @@ virshWatchJob(vshControl *ctl,
struct sigaction sig_action;
struct sigaction old_sig_action;
#endif /* !WIN32 */
- g_autoptr(GSource) timeout_src = NULL;
g_autoptr(GSource) progress_src = NULL;
g_autoptr(GSource) stdin_src = NULL;
struct virshWatchData data = {
.ctl = ctl,
.dom = dom,
+ .context = g_main_loop_get_context(eventLoop),
.timeout_func = timeout_func,
+ .timeout_secs = timeout_secs,
+ .timeout_src = NULL,
.opaque = opaque,
.label = label,
.stdin_ioc = NULL,
@@ -4391,27 +4417,14 @@ virshWatchJob(vshControl *ctl,
g_source_set_callback(stdin_src,
(GSourceFunc)virshWatchInterrupt,
&data, NULL);
- g_source_attach(stdin_src,
- g_main_loop_get_context(eventLoop));
- }
-
- if (timeout_secs) {
- vshDebug(ctl, VSH_ERR_DEBUG,
- "watchJob: setting timeout of %d secs\n", timeout_secs);
- timeout_src = g_timeout_source_new_seconds(timeout_secs);
- g_source_set_callback(timeout_src,
- virshWatchTimeout,
- &data, NULL);
- g_source_attach(timeout_src,
- g_main_loop_get_context(eventLoop));
+ g_source_attach(stdin_src, data.context);
}
progress_src = g_timeout_source_new(500);
g_source_set_callback(progress_src,
virshWatchProgress,
&data, NULL);
- g_source_attach(progress_src,
- g_main_loop_get_context(eventLoop));
+ g_source_attach(progress_src, data.context);
g_main_loop_run(eventLoop);
@@ -4420,8 +4433,10 @@ virshWatchJob(vshControl *ctl,
if (*job_err == 0 && verbose) /* print [100 %] */
virshPrintJobProgress(label, 0, 1);
- if (timeout_src)
- g_source_destroy(timeout_src);
+ if (data.timeout_src) {
+ g_source_destroy(data.timeout_src);
+ g_source_unref(data.timeout_src);
+ }
g_source_destroy(progress_src);
if (stdin_src)
g_source_destroy(stdin_src);
--
2.47.0
On 12/6/24 17:25, Jiri Denemark wrote: > When starting a migration with --timeout, we create a thread to call the > migration API and in parallel setup a timer for the timeout. The > description of --timeout says: "run action specified by --timeout-* > option (suspend by default) if live migration exceeds timeout", which is > not really the way this feature was implemented. Before live migration > starts we first need to contact the source to get the domain definition > and send it to the destination where a new QEMU process has to be > started. This can take some (unpredictably long) time while the timeout > timer is already running. If a very short timeout is set (which doesn't > really make sense, but it's allowed), we may even end up taking the > timeout action before the actual migration had a chance to start. > > With this patch the timeout is started only after we get non-zero > dataTotal from virDomainGetJobInfo, which means the migration (of either > storage or memory) really started. > > https://issues.redhat.com/browse/RHEL-41264 > > Signed-off-by: Jiri Denemark <jdenemar@redhat.com> > --- > tools/virsh-domain.c | 55 ++++++++++++++++++++++++++++---------------- > 1 file changed, 35 insertions(+), 20 deletions(-) Reviewed-by: Michal Privoznik <mprivozn@redhat.com> Michal
© 2016 - 2024 Red Hat, Inc.