include/libvirt/libvirt-domain.h | 9 ++++ include/libvirt/libvirt-qemu.h | 8 +-- src/driver-hypervisor.h | 6 +++ src/libvirt-domain.c | 45 +++++++++++++++++ src/libvirt_public.syms | 1 + src/qemu/qemu_agent.c | 84 ++++++++++++++++++++------------ src/qemu/qemu_agent.h | 4 ++ src/qemu/qemu_domain.c | 15 ++++++ src/qemu/qemu_domain.h | 5 ++ src/qemu/qemu_driver.c | 22 +++++++++ src/remote/remote_driver.c | 1 + src/remote/remote_protocol.x | 18 ++++++- src/remote_protocol-structs | 9 ++++ 13 files changed, 190 insertions(+), 37 deletions(-)
Some layered products such as oVirt have requested a way to avoid being
blocked by guest agent commands when querying a loaded vm. For example,
many guest agent commands are polled periodically to monitor changes,
and rather than blocking the calling process, they'd prefer to simply
time out when an agent query is taking too long.
This patch adds a way for the user to specify a custom agent timeout
that is applied to all agent commands.
One special case to note here is the 'guest-sync' command. 'guest-sync'
is issued internally prior to calling any other command. (For example,
when libvirt wants to call 'guest-get-fsinfo', we first call
'guest-sync' and then call 'guest-get-fsinfo').
Previously, the 'guest-sync' command used a 5-second timeout
(VIR_DOMAIN_QEMU_AGENT_COMMAND_DEFAULT), whereas the actual command that
followed always blocked indefinitely
(VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK). As part of this patch, if a
custom timeout is specified that is shorter than
5 seconds, this new timeout is also used for 'guest-sync'. If there is
no custom timeout or if the custom timeout is longer than 5 seconds, we
will continue to use the 5-second timeout.
See https://bugzilla.redhat.com/show_bug.cgi?id=1705426 for additional details.
Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com>
---
I've addressed most of the previous review comments from Peter, Daniel, and
Michal but I'm unsure yet whether there's concensus on this feature. Here's a
v2 for your consideration.
I admit that the proposed API is not perfect. It would probably be more elegant
if each agent command instead had its own 'timeout' argument so that the caller
could specify the timeout for each invocation. But that is not the case, and
introducing duplicate API for each agent command (with an additional timeout
argument) would clutter the public API. In addition, we still have the issue
Michal mentioned elsewhere in the thread where some commands like shutdown may
or may not use the agent, so a timeout argument could be confusing.
So: is this a viable approach, or should I rethink it?
Changes in v2:
- Make this an official public API rather than putting it in libvirt-qemu.
- Don't use the qemuDomainObjEnterAgent()/ExitAgent() API, which expects you
to acquire an agent job. Instead, introduce
qemuDomainObjSetAgentResponseTimeout() which simply locks the agent while
setting the variable.
- rename the function slightly for better descriptiveness:
virDomainQemuAgentSetTimeout() -> virDomainAgentSetResponseTimeout()
- added 'flags' argument for future-proofing.
include/libvirt/libvirt-domain.h | 9 ++++
include/libvirt/libvirt-qemu.h | 8 +--
src/driver-hypervisor.h | 6 +++
src/libvirt-domain.c | 45 +++++++++++++++++
src/libvirt_public.syms | 1 +
src/qemu/qemu_agent.c | 84 ++++++++++++++++++++------------
src/qemu/qemu_agent.h | 4 ++
src/qemu/qemu_domain.c | 15 ++++++
src/qemu/qemu_domain.h | 5 ++
src/qemu/qemu_driver.c | 22 +++++++++
src/remote/remote_driver.c | 1 +
src/remote/remote_protocol.x | 18 ++++++-
src/remote_protocol-structs | 9 ++++
13 files changed, 190 insertions(+), 37 deletions(-)
diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h
index 22277b0a84..83f6c1b835 100644
--- a/include/libvirt/libvirt-domain.h
+++ b/include/libvirt/libvirt-domain.h
@@ -4916,4 +4916,13 @@ int virDomainGetGuestInfo(virDomainPtr domain,
int *nparams,
unsigned int flags);
+typedef enum {
+ VIR_DOMAIN_AGENT_COMMAND_BLOCK = -2,
+ VIR_DOMAIN_AGENT_COMMAND_DEFAULT = -1,
+ VIR_DOMAIN_AGENT_COMMAND_NOWAIT = 0,
+} virDomainAgentCommandTimeoutValues;
+int virDomainAgentSetResponseTimeout(virDomainPtr domain,
+ int timeout,
+ unsigned int flags);
+
#endif /* LIBVIRT_DOMAIN_H */
diff --git a/include/libvirt/libvirt-qemu.h b/include/libvirt/libvirt-qemu.h
index 891617443f..4a541167ad 100644
--- a/include/libvirt/libvirt-qemu.h
+++ b/include/libvirt/libvirt-qemu.h
@@ -43,10 +43,10 @@ virDomainPtr virDomainQemuAttach(virConnectPtr domain,
unsigned int flags);
typedef enum {
- VIR_DOMAIN_QEMU_AGENT_COMMAND_MIN = -2,
- VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK = -2,
- VIR_DOMAIN_QEMU_AGENT_COMMAND_DEFAULT = -1,
- VIR_DOMAIN_QEMU_AGENT_COMMAND_NOWAIT = 0,
+ VIR_DOMAIN_QEMU_AGENT_COMMAND_MIN = VIR_DOMAIN_AGENT_COMMAND_BLOCK,
+ VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK = VIR_DOMAIN_AGENT_COMMAND_BLOCK,
+ VIR_DOMAIN_QEMU_AGENT_COMMAND_DEFAULT = VIR_DOMAIN_AGENT_COMMAND_DEFAULT,
+ VIR_DOMAIN_QEMU_AGENT_COMMAND_NOWAIT = VIR_DOMAIN_AGENT_COMMAND_NOWAIT,
VIR_DOMAIN_QEMU_AGENT_COMMAND_SHUTDOWN = 60,
} virDomainQemuAgentCommandTimeoutValues;
diff --git a/src/driver-hypervisor.h b/src/driver-hypervisor.h
index 015b2cd01c..4afd8f6ec5 100644
--- a/src/driver-hypervisor.h
+++ b/src/driver-hypervisor.h
@@ -1372,6 +1372,11 @@ typedef int
int *nparams,
unsigned int flags);
+typedef int
+(*virDrvDomainAgentSetResponseTimeout)(virDomainPtr domain,
+ int timeout,
+ unsigned int flags);
+
typedef struct _virHypervisorDriver virHypervisorDriver;
typedef virHypervisorDriver *virHypervisorDriverPtr;
@@ -1632,4 +1637,5 @@ struct _virHypervisorDriver {
virDrvDomainCheckpointGetParent domainCheckpointGetParent;
virDrvDomainCheckpointDelete domainCheckpointDelete;
virDrvDomainGetGuestInfo domainGetGuestInfo;
+ virDrvDomainAgentSetResponseTimeout domainAgentSetResponseTimeout;
};
diff --git a/src/libvirt-domain.c b/src/libvirt-domain.c
index dcab179e6e..b4e1b18164 100644
--- a/src/libvirt-domain.c
+++ b/src/libvirt-domain.c
@@ -12497,3 +12497,48 @@ int virDomainGetLaunchSecurityInfo(virDomainPtr domain,
virDispatchError(domain->conn);
return -1;
}
+
+/**
+ * virDomainAgentSetResponseTimeout:
+ * @domain: a domain object
+ * @timeout: timeout in seconds
+ * @flags: extra flags; not used yet, so callers should always pass 0
+ *
+ * Set how long to wait for a response from qemu agent commands. By default,
+ * agent commands block forever waiting for a response.
+ *
+ * @timeout must be -2, -1, 0 or positive.
+ * VIR_DOMAIN_AGENT_COMMAND_BLOCK(-2): meaning to block forever waiting for a
+ * result.
+ * VIR_DOMAIN_AGENT_COMMAND_DEFAULT(-1): use default timeout value.
+ * VIR_DOMAIN_AGENT_COMMAND_NOWAIT(0): does not wait.
+ * positive value: wait for @timeout seconds
+ *
+ * Returns 0 on success, -1 on failure
+ */
+int
+virDomainAgentSetResponseTimeout(virDomainPtr domain,
+ int timeout,
+ unsigned int flags)
+{
+ virConnectPtr conn;
+ VIR_DOMAIN_DEBUG(domain, "timeout=%i, flags=0x%x",
+ timeout, flags);
+
+ virResetLastError();
+
+ virCheckDomainReturn(domain, -1);
+ conn = domain->conn;
+
+ if (conn->driver->domainAgentSetResponseTimeout) {
+ if (conn->driver->domainAgentSetResponseTimeout(domain, timeout, flags) < 0)
+ goto error;
+ return 0;
+ }
+
+ virReportUnsupportedError();
+
+ error:
+ virDispatchError(conn);
+ return -1;
+}
diff --git a/src/libvirt_public.syms b/src/libvirt_public.syms
index 40655fbbf5..74d5e0b4b9 100644
--- a/src/libvirt_public.syms
+++ b/src/libvirt_public.syms
@@ -859,6 +859,7 @@ LIBVIRT_5.7.0 {
LIBVIRT_5.8.0 {
virConnectSetIdentity;
+ virDomainAgentSetResponseTimeout;
} LIBVIRT_5.7.0;
# .... define new API here using predicted next version number ....
diff --git a/src/qemu/qemu_agent.c b/src/qemu/qemu_agent.c
index 4b914e6d3b..c2f9a025ba 100644
--- a/src/qemu/qemu_agent.c
+++ b/src/qemu/qemu_agent.c
@@ -127,6 +127,7 @@ struct _qemuAgent {
* but fire up an event on qemu monitor instead.
* Take that as indication of successful completion */
qemuAgentEvent await_event;
+ int timeout;
};
static virClassPtr qemuAgentClass;
@@ -695,6 +696,8 @@ qemuAgentOpen(virDomainObjPtr vm,
if (!(mon = virObjectLockableNew(qemuAgentClass)))
return NULL;
+ /* agent commands block by default, user can choose different behavior */
+ mon->timeout = VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK;
mon->fd = -1;
if (virCondInit(&mon->notify) < 0) {
virReportSystemError(errno, "%s",
@@ -907,6 +910,12 @@ qemuAgentGuestSync(qemuAgentPtr mon)
int send_ret;
unsigned long long id;
qemuAgentMessage sync_msg;
+ int timeout = VIR_DOMAIN_QEMU_AGENT_COMMAND_DEFAULT;
+
+ /* if user specified a custom agent timeout that is lower than the
+ * default timeout, use the shorter timeout instead */
+ if ((mon->timeout >= 0) && (mon->timeout < timeout))
+ timeout = mon->timeout;
memset(&sync_msg, 0, sizeof(sync_msg));
/* set only on first sync */
@@ -927,8 +936,7 @@ qemuAgentGuestSync(qemuAgentPtr mon)
VIR_DEBUG("Sending guest-sync command with ID: %llu", id);
- send_ret = qemuAgentSend(mon, &sync_msg,
- VIR_DOMAIN_QEMU_AGENT_COMMAND_DEFAULT);
+ send_ret = qemuAgentSend(mon, &sync_msg, timeout);
VIR_DEBUG("qemuAgentSend returned: %d", send_ret);
@@ -1304,8 +1312,7 @@ int qemuAgentFSFreeze(qemuAgentPtr mon, const char **mountpoints,
if (!cmd)
goto cleanup;
- if (qemuAgentCommand(mon, cmd, &reply, true,
- VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0)
+ if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0)
goto cleanup;
if (virJSONValueObjectGetNumberInt(reply, "return", &ret) < 0) {
@@ -1342,8 +1349,7 @@ int qemuAgentFSThaw(qemuAgentPtr mon)
if (!cmd)
return -1;
- if (qemuAgentCommand(mon, cmd, &reply, true,
- VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0)
+ if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0)
goto cleanup;
if (virJSONValueObjectGetNumberInt(reply, "return", &ret) < 0) {
@@ -1380,8 +1386,7 @@ qemuAgentSuspend(qemuAgentPtr mon,
return -1;
mon->await_event = QEMU_AGENT_EVENT_SUSPEND;
- ret = qemuAgentCommand(mon, cmd, &reply, false,
- VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK);
+ ret = qemuAgentCommand(mon, cmd, &reply, false, mon->timeout);
virJSONValueFree(cmd);
virJSONValueFree(reply);
@@ -1437,8 +1442,7 @@ qemuAgentFSTrim(qemuAgentPtr mon,
if (!cmd)
return ret;
- ret = qemuAgentCommand(mon, cmd, &reply, false,
- VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK);
+ ret = qemuAgentCommand(mon, cmd, &reply, false, mon->timeout);
virJSONValueFree(cmd);
virJSONValueFree(reply);
@@ -1459,8 +1463,7 @@ qemuAgentGetVCPUs(qemuAgentPtr mon,
if (!(cmd = qemuAgentMakeCommand("guest-get-vcpus", NULL)))
return -1;
- if (qemuAgentCommand(mon, cmd, &reply, true,
- VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0)
+ if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0)
goto cleanup;
if (!(data = virJSONValueObjectGetArray(reply, "return"))) {
@@ -1575,8 +1578,7 @@ qemuAgentSetVCPUsCommand(qemuAgentPtr mon,
NULL)))
goto cleanup;
- if (qemuAgentCommand(mon, cmd, &reply, true,
- VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0)
+ if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0)
goto cleanup;
/* All negative values are invalid. Return of 0 is bogus since we wouldn't
@@ -1731,8 +1733,7 @@ qemuAgentGetHostname(qemuAgentPtr mon,
if (!cmd)
return ret;
- if (qemuAgentCommand(mon, cmd, &reply, true,
- VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) {
+ if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) {
if (qemuAgentErrorCommandUnsupported(reply))
ret = -2;
goto cleanup;
@@ -1776,8 +1777,7 @@ qemuAgentGetTime(qemuAgentPtr mon,
if (!cmd)
return ret;
- if (qemuAgentCommand(mon, cmd, &reply, true,
- VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0)
+ if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0)
goto cleanup;
if (virJSONValueObjectGetNumberUlong(reply, "return", &json_time) < 0) {
@@ -1842,8 +1842,7 @@ qemuAgentSetTime(qemuAgentPtr mon,
if (!cmd)
return ret;
- if (qemuAgentCommand(mon, cmd, &reply, true,
- VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0)
+ if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0)
goto cleanup;
ret = 0;
@@ -2046,8 +2045,7 @@ qemuAgentGetFSInfoInternal(qemuAgentPtr mon,
if (!cmd)
return ret;
- if (qemuAgentCommand(mon, cmd, &reply, true,
- VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) {
+ if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) {
if (qemuAgentErrorCommandUnsupported(reply))
ret = -2;
goto cleanup;
@@ -2336,8 +2334,7 @@ qemuAgentGetInterfaces(qemuAgentPtr mon,
if (!(cmd = qemuAgentMakeCommand("guest-network-get-interfaces", NULL)))
goto cleanup;
- if (qemuAgentCommand(mon, cmd, &reply, true,
- VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0)
+ if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0)
goto cleanup;
if (!(ret_array = virJSONValueObjectGet(reply, "return"))) {
@@ -2514,8 +2511,7 @@ qemuAgentSetUserPassword(qemuAgentPtr mon,
NULL)))
goto cleanup;
- if (qemuAgentCommand(mon, cmd, &reply, true,
- VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0)
+ if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0)
goto cleanup;
ret = 0;
@@ -2546,8 +2542,7 @@ qemuAgentGetUsers(qemuAgentPtr mon,
if (!(cmd = qemuAgentMakeCommand("guest-get-users", NULL)))
return -1;
- if (qemuAgentCommand(mon, cmd, &reply, true,
- VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) {
+ if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) {
if (qemuAgentErrorCommandUnsupported(reply))
return -2;
return -1;
@@ -2636,8 +2631,7 @@ qemuAgentGetOSInfo(qemuAgentPtr mon,
if (!(cmd = qemuAgentMakeCommand("guest-get-osinfo", NULL)))
return -1;
- if (qemuAgentCommand(mon, cmd, &reply, true,
- VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) {
+ if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) {
if (qemuAgentErrorCommandUnsupported(reply))
return -2;
return -1;
@@ -2692,8 +2686,7 @@ qemuAgentGetTimezone(qemuAgentPtr mon,
if (!(cmd = qemuAgentMakeCommand("guest-get-timezone", NULL)))
return -1;
- if (qemuAgentCommand(mon, cmd, &reply, true,
- VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) {
+ if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) {
if (qemuAgentErrorCommandUnsupported(reply))
return -2;
return -1;
@@ -2722,3 +2715,30 @@ qemuAgentGetTimezone(qemuAgentPtr mon,
return 0;
}
+
+/* qemuAgentSetResponseTimeout:
+ * mon: agent monitor
+ * timeout: number of seconds to wait for agent response
+ * flags: currently unused. Must pass 0
+ *
+ * the agent object must be locked prior to calling this function
+ *
+ * Returns: 0 on success
+ * -1 otherwise
+ */
+int
+qemuAgentSetResponseTimeout(qemuAgentPtr mon,
+ int timeout,
+ unsigned int flags G_GNUC_UNUSED)
+{
+ if (timeout < VIR_DOMAIN_QEMU_AGENT_COMMAND_MIN) {
+ virReportError(VIR_ERR_INVALID_ARG,
+ _("guest agent timeout '%d' is "
+ "less than the minimum '%d'"),
+ timeout, VIR_DOMAIN_QEMU_AGENT_COMMAND_MIN);
+ return -1;
+ }
+
+ mon->timeout = timeout;
+ return 0;
+}
diff --git a/src/qemu/qemu_agent.h b/src/qemu/qemu_agent.h
index 78e648992a..88ead4762a 100644
--- a/src/qemu/qemu_agent.h
+++ b/src/qemu/qemu_agent.h
@@ -140,3 +140,7 @@ int qemuAgentGetTimezone(qemuAgentPtr mon,
virTypedParameterPtr *params,
int *nparams,
int *maxparams);
+
+int qemuAgentSetResponseTimeout(qemuAgentPtr mon,
+ int timeout,
+ unsigned int flags);
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 667cc89072..40d0401e79 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -15564,3 +15564,18 @@ qemuDomainSupportsCheckpointsBlockjobs(virDomainObjPtr vm)
return 0;
}
+
+int qemuDomainObjSetAgentResponseTimeout(virDomainObjPtr vm,
+ int timeout,
+ unsigned int flags)
+{
+ qemuDomainObjPrivatePtr priv = vm->privateData;
+ qemuAgentPtr agent = priv->agent;
+ int ret;
+
+ virObjectLock(agent);
+ ret = qemuAgentSetResponseTimeout(agent, timeout, flags);
+ virObjectUnlock(agent);
+
+ return ret;
+}
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index b23912ee98..34af55ef45 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -1225,3 +1225,8 @@ qemuDomainValidateActualNetDef(const virDomainNetDef *net,
int
qemuDomainSupportsCheckpointsBlockjobs(virDomainObjPtr vm)
G_GNUC_WARN_UNUSED_RESULT;
+
+int
+qemuDomainObjSetAgentResponseTimeout(virDomainObjPtr vm,
+ int seconds,
+ unsigned int flags);
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index d17c18705b..68f7b65d81 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -22654,6 +22654,27 @@ qemuDomainGetGuestInfo(virDomainPtr dom,
return ret;
}
+static int
+qemuDomainAgentSetResponseTimeout(virDomainPtr dom,
+ int timeout,
+ unsigned int flags)
+{
+ virDomainObjPtr vm = NULL;
+ int ret = -1;
+
+ if (!(vm = qemuDomainObjFromDomain(dom)))
+ goto cleanup;
+
+ if (virDomainAgentSetResponseTimeoutEnsureACL(dom->conn, vm->def) < 0)
+ goto cleanup;
+
+ ret = qemuDomainObjSetAgentResponseTimeout(vm, timeout, flags);
+
+ cleanup:
+ virDomainObjEndAPI(&vm);
+ return ret;
+}
+
static virHypervisorDriver qemuHypervisorDriver = {
.name = QEMU_DRIVER_NAME,
.connectURIProbe = qemuConnectURIProbe,
@@ -22890,6 +22911,7 @@ static virHypervisorDriver qemuHypervisorDriver = {
.domainCheckpointGetParent = qemuDomainCheckpointGetParent, /* 5.6.0 */
.domainCheckpointDelete = qemuDomainCheckpointDelete, /* 5.6.0 */
.domainGetGuestInfo = qemuDomainGetGuestInfo, /* 5.7.0 */
+ .domainAgentSetResponseTimeout = qemuDomainAgentSetResponseTimeout, /* 5.8.0 */
};
diff --git a/src/remote/remote_driver.c b/src/remote/remote_driver.c
index 83477243fc..77edf0b4c4 100644
--- a/src/remote/remote_driver.c
+++ b/src/remote/remote_driver.c
@@ -8710,6 +8710,7 @@ static virHypervisorDriver hypervisor_driver = {
.domainCheckpointGetParent = remoteDomainCheckpointGetParent, /* 5.6.0 */
.domainCheckpointDelete = remoteDomainCheckpointDelete, /* 5.6.0 */
.domainGetGuestInfo = remoteDomainGetGuestInfo, /* 5.7.0 */
+ .domainAgentSetResponseTimeout = remoteDomainAgentSetResponseTimeout, /* 5.8.0 */
};
static virNetworkDriver network_driver = {
diff --git a/src/remote/remote_protocol.x b/src/remote/remote_protocol.x
index f4e3392212..23e42d17b1 100644
--- a/src/remote/remote_protocol.x
+++ b/src/remote/remote_protocol.x
@@ -3744,6 +3744,16 @@ struct remote_connect_set_identity_args {
unsigned int flags;
};
+struct remote_domain_agent_set_response_timeout_args {
+ remote_nonnull_domain dom;
+ int timeout;
+ unsigned int flags;
+};
+
+struct remote_domain_agent_set_response_timeout_ret {
+ int result;
+};
+
/*----- Protocol. -----*/
/* Define the program number, protocol version and procedure numbers here. */
@@ -6617,5 +6627,11 @@ enum remote_procedure {
* @generate: client
* @acl: connect:write
*/
- REMOTE_PROC_CONNECT_SET_IDENTITY = 419
+ REMOTE_PROC_CONNECT_SET_IDENTITY = 419,
+
+ /**
+ * @generate: both
+ * @acl: domain:write
+ */
+ REMOTE_PROC_DOMAIN_AGENT_SET_RESPONSE_TIMEOUT = 420
};
diff --git a/src/remote_protocol-structs b/src/remote_protocol-structs
index 51606e7473..9ad7a857e0 100644
--- a/src/remote_protocol-structs
+++ b/src/remote_protocol-structs
@@ -3114,6 +3114,14 @@ struct remote_connect_set_identity_args {
} params;
u_int flags;
};
+struct remote_domain_agent_set_response_timeout_args {
+ remote_nonnull_domain dom;
+ int timeout;
+ u_int flags;
+};
+struct remote_domain_agent_set_response_timeout_ret {
+ int result;
+};
enum remote_procedure {
REMOTE_PROC_CONNECT_OPEN = 1,
REMOTE_PROC_CONNECT_CLOSE = 2,
@@ -3534,4 +3542,5 @@ enum remote_procedure {
REMOTE_PROC_DOMAIN_CHECKPOINT_DELETE = 417,
REMOTE_PROC_DOMAIN_GET_GUEST_INFO = 418,
REMOTE_PROC_CONNECT_SET_IDENTITY = 419,
+ REMOTE_PROC_DOMAIN_AGENT_SET_RESPONSE_TIMEOUT = 420,
};
--
2.21.0
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Unfortunately, I sent this patch out a bit prematurely. I forgot to run syntax-check after adding the 'flags' argument, so there are a few minor changes required for that. I also forgot to update the version references (should be 5.10 now since 5.9 was just released). But I'll wait to send an updated patch until I get some more feedback on whether the approach is viable or not. Jonathon On Wed, 2019-11-06 at 15:13 -0600, Jonathon Jongsma wrote: > Some layered products such as oVirt have requested a way to avoid > being > blocked by guest agent commands when querying a loaded vm. For > example, > many guest agent commands are polled periodically to monitor changes, > and rather than blocking the calling process, they'd prefer to simply > time out when an agent query is taking too long. > > This patch adds a way for the user to specify a custom agent timeout > that is applied to all agent commands. > > One special case to note here is the 'guest-sync' command. 'guest- > sync' > is issued internally prior to calling any other command. (For > example, > when libvirt wants to call 'guest-get-fsinfo', we first call > 'guest-sync' and then call 'guest-get-fsinfo'). > > Previously, the 'guest-sync' command used a 5-second timeout > (VIR_DOMAIN_QEMU_AGENT_COMMAND_DEFAULT), whereas the actual command > that > followed always blocked indefinitely > (VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK). As part of this patch, if a > custom timeout is specified that is shorter than > 5 seconds, this new timeout is also used for 'guest-sync'. If there > is > no custom timeout or if the custom timeout is longer than 5 seconds, > we > will continue to use the 5-second timeout. > > See https://bugzilla.redhat.com/show_bug.cgi?id=1705426 for > additional details. > > Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com> > --- > I've addressed most of the previous review comments from Peter, > Daniel, and > Michal but I'm unsure yet whether there's concensus on this feature. > Here's a > v2 for your consideration. > > I admit that the proposed API is not perfect. It would probably be > more elegant > if each agent command instead had its own 'timeout' argument so that > the caller > could specify the timeout for each invocation. But that is not the > case, and > introducing duplicate API for each agent command (with an additional > timeout > argument) would clutter the public API. In addition, we still have > the issue > Michal mentioned elsewhere in the thread where some commands like > shutdown may > or may not use the agent, so a timeout argument could be confusing. > > So: is this a viable approach, or should I rethink it? > > Changes in v2: > - Make this an official public API rather than putting it in libvirt- > qemu. > - Don't use the qemuDomainObjEnterAgent()/ExitAgent() API, which > expects you > to acquire an agent job. Instead, introduce > qemuDomainObjSetAgentResponseTimeout() which simply locks the agent > while > setting the variable. > - rename the function slightly for better descriptiveness: > virDomainQemuAgentSetTimeout() -> > virDomainAgentSetResponseTimeout() > - added 'flags' argument for future-proofing. > > include/libvirt/libvirt-domain.h | 9 ++++ > include/libvirt/libvirt-qemu.h | 8 +-- > src/driver-hypervisor.h | 6 +++ > src/libvirt-domain.c | 45 +++++++++++++++++ > src/libvirt_public.syms | 1 + > src/qemu/qemu_agent.c | 84 ++++++++++++++++++++-------- > ---- > src/qemu/qemu_agent.h | 4 ++ > src/qemu/qemu_domain.c | 15 ++++++ > src/qemu/qemu_domain.h | 5 ++ > src/qemu/qemu_driver.c | 22 +++++++++ > src/remote/remote_driver.c | 1 + > src/remote/remote_protocol.x | 18 ++++++- > src/remote_protocol-structs | 9 ++++ > 13 files changed, 190 insertions(+), 37 deletions(-) > > diff --git a/include/libvirt/libvirt-domain.h > b/include/libvirt/libvirt-domain.h > index 22277b0a84..83f6c1b835 100644 > --- a/include/libvirt/libvirt-domain.h > +++ b/include/libvirt/libvirt-domain.h > @@ -4916,4 +4916,13 @@ int virDomainGetGuestInfo(virDomainPtr domain, > int *nparams, > unsigned int flags); > > +typedef enum { > + VIR_DOMAIN_AGENT_COMMAND_BLOCK = -2, > + VIR_DOMAIN_AGENT_COMMAND_DEFAULT = -1, > + VIR_DOMAIN_AGENT_COMMAND_NOWAIT = 0, > +} virDomainAgentCommandTimeoutValues; > +int virDomainAgentSetResponseTimeout(virDomainPtr domain, > + int timeout, > + unsigned int flags); > + > #endif /* LIBVIRT_DOMAIN_H */ > diff --git a/include/libvirt/libvirt-qemu.h > b/include/libvirt/libvirt-qemu.h > index 891617443f..4a541167ad 100644 > --- a/include/libvirt/libvirt-qemu.h > +++ b/include/libvirt/libvirt-qemu.h > @@ -43,10 +43,10 @@ virDomainPtr virDomainQemuAttach(virConnectPtr > domain, > unsigned int flags); > > typedef enum { > - VIR_DOMAIN_QEMU_AGENT_COMMAND_MIN = -2, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK = -2, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_DEFAULT = -1, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_NOWAIT = 0, > + VIR_DOMAIN_QEMU_AGENT_COMMAND_MIN = > VIR_DOMAIN_AGENT_COMMAND_BLOCK, > + VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK = > VIR_DOMAIN_AGENT_COMMAND_BLOCK, > + VIR_DOMAIN_QEMU_AGENT_COMMAND_DEFAULT = > VIR_DOMAIN_AGENT_COMMAND_DEFAULT, > + VIR_DOMAIN_QEMU_AGENT_COMMAND_NOWAIT = > VIR_DOMAIN_AGENT_COMMAND_NOWAIT, > VIR_DOMAIN_QEMU_AGENT_COMMAND_SHUTDOWN = 60, > } virDomainQemuAgentCommandTimeoutValues; > > diff --git a/src/driver-hypervisor.h b/src/driver-hypervisor.h > index 015b2cd01c..4afd8f6ec5 100644 > --- a/src/driver-hypervisor.h > +++ b/src/driver-hypervisor.h > @@ -1372,6 +1372,11 @@ typedef int > int *nparams, > unsigned int flags); > > +typedef int > +(*virDrvDomainAgentSetResponseTimeout)(virDomainPtr domain, > + int timeout, > + unsigned int flags); > + > typedef struct _virHypervisorDriver virHypervisorDriver; > typedef virHypervisorDriver *virHypervisorDriverPtr; > > @@ -1632,4 +1637,5 @@ struct _virHypervisorDriver { > virDrvDomainCheckpointGetParent domainCheckpointGetParent; > virDrvDomainCheckpointDelete domainCheckpointDelete; > virDrvDomainGetGuestInfo domainGetGuestInfo; > + virDrvDomainAgentSetResponseTimeout > domainAgentSetResponseTimeout; > }; > diff --git a/src/libvirt-domain.c b/src/libvirt-domain.c > index dcab179e6e..b4e1b18164 100644 > --- a/src/libvirt-domain.c > +++ b/src/libvirt-domain.c > @@ -12497,3 +12497,48 @@ int > virDomainGetLaunchSecurityInfo(virDomainPtr domain, > virDispatchError(domain->conn); > return -1; > } > + > +/** > + * virDomainAgentSetResponseTimeout: > + * @domain: a domain object > + * @timeout: timeout in seconds > + * @flags: extra flags; not used yet, so callers should always pass > 0 > + * > + * Set how long to wait for a response from qemu agent commands. By > default, > + * agent commands block forever waiting for a response. > + * > + * @timeout must be -2, -1, 0 or positive. > + * VIR_DOMAIN_AGENT_COMMAND_BLOCK(-2): meaning to block forever > waiting for a > + * result. > + * VIR_DOMAIN_AGENT_COMMAND_DEFAULT(-1): use default timeout value. > + * VIR_DOMAIN_AGENT_COMMAND_NOWAIT(0): does not wait. > + * positive value: wait for @timeout seconds > + * > + * Returns 0 on success, -1 on failure > + */ > +int > +virDomainAgentSetResponseTimeout(virDomainPtr domain, > + int timeout, > + unsigned int flags) > +{ > + virConnectPtr conn; > + VIR_DOMAIN_DEBUG(domain, "timeout=%i, flags=0x%x", > + timeout, flags); > + > + virResetLastError(); > + > + virCheckDomainReturn(domain, -1); > + conn = domain->conn; > + > + if (conn->driver->domainAgentSetResponseTimeout) { > + if (conn->driver->domainAgentSetResponseTimeout(domain, > timeout, flags) < 0) > + goto error; > + return 0; > + } > + > + virReportUnsupportedError(); > + > + error: > + virDispatchError(conn); > + return -1; > +} > diff --git a/src/libvirt_public.syms b/src/libvirt_public.syms > index 40655fbbf5..74d5e0b4b9 100644 > --- a/src/libvirt_public.syms > +++ b/src/libvirt_public.syms > @@ -859,6 +859,7 @@ LIBVIRT_5.7.0 { > > LIBVIRT_5.8.0 { > virConnectSetIdentity; > + virDomainAgentSetResponseTimeout; > } LIBVIRT_5.7.0; > > # .... define new API here using predicted next version number .... > diff --git a/src/qemu/qemu_agent.c b/src/qemu/qemu_agent.c > index 4b914e6d3b..c2f9a025ba 100644 > --- a/src/qemu/qemu_agent.c > +++ b/src/qemu/qemu_agent.c > @@ -127,6 +127,7 @@ struct _qemuAgent { > * but fire up an event on qemu monitor instead. > * Take that as indication of successful completion */ > qemuAgentEvent await_event; > + int timeout; > }; > > static virClassPtr qemuAgentClass; > @@ -695,6 +696,8 @@ qemuAgentOpen(virDomainObjPtr vm, > if (!(mon = virObjectLockableNew(qemuAgentClass))) > return NULL; > > + /* agent commands block by default, user can choose different > behavior */ > + mon->timeout = VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK; > mon->fd = -1; > if (virCondInit(&mon->notify) < 0) { > virReportSystemError(errno, "%s", > @@ -907,6 +910,12 @@ qemuAgentGuestSync(qemuAgentPtr mon) > int send_ret; > unsigned long long id; > qemuAgentMessage sync_msg; > + int timeout = VIR_DOMAIN_QEMU_AGENT_COMMAND_DEFAULT; > + > + /* if user specified a custom agent timeout that is lower than > the > + * default timeout, use the shorter timeout instead */ > + if ((mon->timeout >= 0) && (mon->timeout < timeout)) > + timeout = mon->timeout; > > memset(&sync_msg, 0, sizeof(sync_msg)); > /* set only on first sync */ > @@ -927,8 +936,7 @@ qemuAgentGuestSync(qemuAgentPtr mon) > > VIR_DEBUG("Sending guest-sync command with ID: %llu", id); > > - send_ret = qemuAgentSend(mon, &sync_msg, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_DEFAULT); > + send_ret = qemuAgentSend(mon, &sync_msg, timeout); > > VIR_DEBUG("qemuAgentSend returned: %d", send_ret); > > @@ -1304,8 +1312,7 @@ int qemuAgentFSFreeze(qemuAgentPtr mon, const > char **mountpoints, > if (!cmd) > goto cleanup; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) > goto cleanup; > > if (virJSONValueObjectGetNumberInt(reply, "return", &ret) < 0) { > @@ -1342,8 +1349,7 @@ int qemuAgentFSThaw(qemuAgentPtr mon) > if (!cmd) > return -1; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) > goto cleanup; > > if (virJSONValueObjectGetNumberInt(reply, "return", &ret) < 0) { > @@ -1380,8 +1386,7 @@ qemuAgentSuspend(qemuAgentPtr mon, > return -1; > > mon->await_event = QEMU_AGENT_EVENT_SUSPEND; > - ret = qemuAgentCommand(mon, cmd, &reply, false, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK); > + ret = qemuAgentCommand(mon, cmd, &reply, false, mon->timeout); > > virJSONValueFree(cmd); > virJSONValueFree(reply); > @@ -1437,8 +1442,7 @@ qemuAgentFSTrim(qemuAgentPtr mon, > if (!cmd) > return ret; > > - ret = qemuAgentCommand(mon, cmd, &reply, false, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK); > + ret = qemuAgentCommand(mon, cmd, &reply, false, mon->timeout); > > virJSONValueFree(cmd); > virJSONValueFree(reply); > @@ -1459,8 +1463,7 @@ qemuAgentGetVCPUs(qemuAgentPtr mon, > if (!(cmd = qemuAgentMakeCommand("guest-get-vcpus", NULL))) > return -1; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) > goto cleanup; > > if (!(data = virJSONValueObjectGetArray(reply, "return"))) { > @@ -1575,8 +1578,7 @@ qemuAgentSetVCPUsCommand(qemuAgentPtr mon, > NULL))) > goto cleanup; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) > goto cleanup; > > /* All negative values are invalid. Return of 0 is bogus since > we wouldn't > @@ -1731,8 +1733,7 @@ qemuAgentGetHostname(qemuAgentPtr mon, > if (!cmd) > return ret; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) { > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) > { > if (qemuAgentErrorCommandUnsupported(reply)) > ret = -2; > goto cleanup; > @@ -1776,8 +1777,7 @@ qemuAgentGetTime(qemuAgentPtr mon, > if (!cmd) > return ret; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) > goto cleanup; > > if (virJSONValueObjectGetNumberUlong(reply, "return", > &json_time) < 0) { > @@ -1842,8 +1842,7 @@ qemuAgentSetTime(qemuAgentPtr mon, > if (!cmd) > return ret; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) > goto cleanup; > > ret = 0; > @@ -2046,8 +2045,7 @@ qemuAgentGetFSInfoInternal(qemuAgentPtr mon, > if (!cmd) > return ret; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) { > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) > { > if (qemuAgentErrorCommandUnsupported(reply)) > ret = -2; > goto cleanup; > @@ -2336,8 +2334,7 @@ qemuAgentGetInterfaces(qemuAgentPtr mon, > if (!(cmd = qemuAgentMakeCommand("guest-network-get-interfaces", > NULL))) > goto cleanup; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) > goto cleanup; > > if (!(ret_array = virJSONValueObjectGet(reply, "return"))) { > @@ -2514,8 +2511,7 @@ qemuAgentSetUserPassword(qemuAgentPtr mon, > NULL))) > goto cleanup; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) > goto cleanup; > > ret = 0; > @@ -2546,8 +2542,7 @@ qemuAgentGetUsers(qemuAgentPtr mon, > if (!(cmd = qemuAgentMakeCommand("guest-get-users", NULL))) > return -1; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) { > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) > { > if (qemuAgentErrorCommandUnsupported(reply)) > return -2; > return -1; > @@ -2636,8 +2631,7 @@ qemuAgentGetOSInfo(qemuAgentPtr mon, > if (!(cmd = qemuAgentMakeCommand("guest-get-osinfo", NULL))) > return -1; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) { > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) > { > if (qemuAgentErrorCommandUnsupported(reply)) > return -2; > return -1; > @@ -2692,8 +2686,7 @@ qemuAgentGetTimezone(qemuAgentPtr mon, > if (!(cmd = qemuAgentMakeCommand("guest-get-timezone", NULL))) > return -1; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) { > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) > { > if (qemuAgentErrorCommandUnsupported(reply)) > return -2; > return -1; > @@ -2722,3 +2715,30 @@ qemuAgentGetTimezone(qemuAgentPtr mon, > > return 0; > } > + > +/* qemuAgentSetResponseTimeout: > + * mon: agent monitor > + * timeout: number of seconds to wait for agent response > + * flags: currently unused. Must pass 0 > + * > + * the agent object must be locked prior to calling this function > + * > + * Returns: 0 on success > + * -1 otherwise > + */ > +int > +qemuAgentSetResponseTimeout(qemuAgentPtr mon, > + int timeout, > + unsigned int flags G_GNUC_UNUSED) > +{ > + if (timeout < VIR_DOMAIN_QEMU_AGENT_COMMAND_MIN) { > + virReportError(VIR_ERR_INVALID_ARG, > + _("guest agent timeout '%d' is " > + "less than the minimum '%d'"), > + timeout, VIR_DOMAIN_QEMU_AGENT_COMMAND_MIN); > + return -1; > + } > + > + mon->timeout = timeout; > + return 0; > +} > diff --git a/src/qemu/qemu_agent.h b/src/qemu/qemu_agent.h > index 78e648992a..88ead4762a 100644 > --- a/src/qemu/qemu_agent.h > +++ b/src/qemu/qemu_agent.h > @@ -140,3 +140,7 @@ int qemuAgentGetTimezone(qemuAgentPtr mon, > virTypedParameterPtr *params, > int *nparams, > int *maxparams); > + > +int qemuAgentSetResponseTimeout(qemuAgentPtr mon, > + int timeout, > + unsigned int flags); > diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c > index 667cc89072..40d0401e79 100644 > --- a/src/qemu/qemu_domain.c > +++ b/src/qemu/qemu_domain.c > @@ -15564,3 +15564,18 @@ > qemuDomainSupportsCheckpointsBlockjobs(virDomainObjPtr vm) > > return 0; > } > + > +int qemuDomainObjSetAgentResponseTimeout(virDomainObjPtr vm, > + int timeout, > + unsigned int flags) > +{ > + qemuDomainObjPrivatePtr priv = vm->privateData; > + qemuAgentPtr agent = priv->agent; > + int ret; > + > + virObjectLock(agent); > + ret = qemuAgentSetResponseTimeout(agent, timeout, flags); > + virObjectUnlock(agent); > + > + return ret; > +} > diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h > index b23912ee98..34af55ef45 100644 > --- a/src/qemu/qemu_domain.h > +++ b/src/qemu/qemu_domain.h > @@ -1225,3 +1225,8 @@ qemuDomainValidateActualNetDef(const > virDomainNetDef *net, > int > qemuDomainSupportsCheckpointsBlockjobs(virDomainObjPtr vm) > G_GNUC_WARN_UNUSED_RESULT; > + > +int > +qemuDomainObjSetAgentResponseTimeout(virDomainObjPtr vm, > + int seconds, > + unsigned int flags); > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c > index d17c18705b..68f7b65d81 100644 > --- a/src/qemu/qemu_driver.c > +++ b/src/qemu/qemu_driver.c > @@ -22654,6 +22654,27 @@ qemuDomainGetGuestInfo(virDomainPtr dom, > return ret; > } > > +static int > +qemuDomainAgentSetResponseTimeout(virDomainPtr dom, > + int timeout, > + unsigned int flags) > +{ > + virDomainObjPtr vm = NULL; > + int ret = -1; > + > + if (!(vm = qemuDomainObjFromDomain(dom))) > + goto cleanup; > + > + if (virDomainAgentSetResponseTimeoutEnsureACL(dom->conn, vm- > >def) < 0) > + goto cleanup; > + > + ret = qemuDomainObjSetAgentResponseTimeout(vm, timeout, flags); > + > + cleanup: > + virDomainObjEndAPI(&vm); > + return ret; > +} > + > static virHypervisorDriver qemuHypervisorDriver = { > .name = QEMU_DRIVER_NAME, > .connectURIProbe = qemuConnectURIProbe, > @@ -22890,6 +22911,7 @@ static virHypervisorDriver > qemuHypervisorDriver = { > .domainCheckpointGetParent = qemuDomainCheckpointGetParent, /* > 5.6.0 */ > .domainCheckpointDelete = qemuDomainCheckpointDelete, /* 5.6.0 > */ > .domainGetGuestInfo = qemuDomainGetGuestInfo, /* 5.7.0 */ > + .domainAgentSetResponseTimeout = > qemuDomainAgentSetResponseTimeout, /* 5.8.0 */ > }; > > > diff --git a/src/remote/remote_driver.c b/src/remote/remote_driver.c > index 83477243fc..77edf0b4c4 100644 > --- a/src/remote/remote_driver.c > +++ b/src/remote/remote_driver.c > @@ -8710,6 +8710,7 @@ static virHypervisorDriver hypervisor_driver = > { > .domainCheckpointGetParent = remoteDomainCheckpointGetParent, /* > 5.6.0 */ > .domainCheckpointDelete = remoteDomainCheckpointDelete, /* 5.6.0 > */ > .domainGetGuestInfo = remoteDomainGetGuestInfo, /* 5.7.0 */ > + .domainAgentSetResponseTimeout = > remoteDomainAgentSetResponseTimeout, /* 5.8.0 */ > }; > > static virNetworkDriver network_driver = { > diff --git a/src/remote/remote_protocol.x > b/src/remote/remote_protocol.x > index f4e3392212..23e42d17b1 100644 > --- a/src/remote/remote_protocol.x > +++ b/src/remote/remote_protocol.x > @@ -3744,6 +3744,16 @@ struct remote_connect_set_identity_args { > unsigned int flags; > }; > > +struct remote_domain_agent_set_response_timeout_args { > + remote_nonnull_domain dom; > + int timeout; > + unsigned int flags; > +}; > + > +struct remote_domain_agent_set_response_timeout_ret { > + int result; > +}; > + > /*----- Protocol. -----*/ > > /* Define the program number, protocol version and procedure numbers > here. */ > @@ -6617,5 +6627,11 @@ enum remote_procedure { > * @generate: client > * @acl: connect:write > */ > - REMOTE_PROC_CONNECT_SET_IDENTITY = 419 > + REMOTE_PROC_CONNECT_SET_IDENTITY = 419, > + > + /** > + * @generate: both > + * @acl: domain:write > + */ > + REMOTE_PROC_DOMAIN_AGENT_SET_RESPONSE_TIMEOUT = 420 > }; > diff --git a/src/remote_protocol-structs b/src/remote_protocol- > structs > index 51606e7473..9ad7a857e0 100644 > --- a/src/remote_protocol-structs > +++ b/src/remote_protocol-structs > @@ -3114,6 +3114,14 @@ struct remote_connect_set_identity_args { > } params; > u_int flags; > }; > +struct remote_domain_agent_set_response_timeout_args { > + remote_nonnull_domain dom; > + int timeout; > + u_int flags; > +}; > +struct remote_domain_agent_set_response_timeout_ret { > + int result; > +}; > enum remote_procedure { > REMOTE_PROC_CONNECT_OPEN = 1, > REMOTE_PROC_CONNECT_CLOSE = 2, > @@ -3534,4 +3542,5 @@ enum remote_procedure { > REMOTE_PROC_DOMAIN_CHECKPOINT_DELETE = 417, > REMOTE_PROC_DOMAIN_GET_GUEST_INFO = 418, > REMOTE_PROC_CONNECT_SET_IDENTITY = 419, > + REMOTE_PROC_DOMAIN_AGENT_SET_RESPONSE_TIMEOUT = 420, > }; -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
On 11/6/19 10:13 PM, Jonathon Jongsma wrote: > Some layered products such as oVirt have requested a way to avoid being > blocked by guest agent commands when querying a loaded vm. For example, > many guest agent commands are polled periodically to monitor changes, > and rather than blocking the calling process, they'd prefer to simply > time out when an agent query is taking too long. > > This patch adds a way for the user to specify a custom agent timeout > that is applied to all agent commands. > > One special case to note here is the 'guest-sync' command. 'guest-sync' > is issued internally prior to calling any other command. (For example, > when libvirt wants to call 'guest-get-fsinfo', we first call > 'guest-sync' and then call 'guest-get-fsinfo'). > > Previously, the 'guest-sync' command used a 5-second timeout > (VIR_DOMAIN_QEMU_AGENT_COMMAND_DEFAULT), whereas the actual command that > followed always blocked indefinitely > (VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK). As part of this patch, if a > custom timeout is specified that is shorter than > 5 seconds, this new timeout is also used for 'guest-sync'. If there is > no custom timeout or if the custom timeout is longer than 5 seconds, we > will continue to use the 5-second timeout. > > See https://bugzilla.redhat.com/show_bug.cgi?id=1705426 for additional details. > > Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com> > --- > I've addressed most of the previous review comments from Peter, Daniel, and > Michal but I'm unsure yet whether there's concensus on this feature. Here's a > v2 for your consideration. > > I admit that the proposed API is not perfect. It would probably be more elegant > if each agent command instead had its own 'timeout' argument so that the caller > could specify the timeout for each invocation. But that is not the case, and > introducing duplicate API for each agent command (with an additional timeout > argument) would clutter the public API. In addition, we still have the issue > Michal mentioned elsewhere in the thread where some commands like shutdown may > or may not use the agent, so a timeout argument could be confusing. > > So: is this a viable approach, or should I rethink it? It's on the right path, but there are some issues. See below. It's not this simple. > > Changes in v2: > - Make this an official public API rather than putting it in libvirt-qemu. > - Don't use the qemuDomainObjEnterAgent()/ExitAgent() API, which expects you > to acquire an agent job. Instead, introduce > qemuDomainObjSetAgentResponseTimeout() which simply locks the agent while > setting the variable. > - rename the function slightly for better descriptiveness: > virDomainQemuAgentSetTimeout() -> virDomainAgentSetResponseTimeout() > - added 'flags' argument for future-proofing. > > include/libvirt/libvirt-domain.h | 9 ++++ > include/libvirt/libvirt-qemu.h | 8 +-- > src/driver-hypervisor.h | 6 +++ > src/libvirt-domain.c | 45 +++++++++++++++++ > src/libvirt_public.syms | 1 + > src/qemu/qemu_agent.c | 84 ++++++++++++++++++++------------ > src/qemu/qemu_agent.h | 4 ++ > src/qemu/qemu_domain.c | 15 ++++++ > src/qemu/qemu_domain.h | 5 ++ > src/qemu/qemu_driver.c | 22 +++++++++ > src/remote/remote_driver.c | 1 + > src/remote/remote_protocol.x | 18 ++++++- > src/remote_protocol-structs | 9 ++++ > 13 files changed, 190 insertions(+), 37 deletions(-) > > diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h > index 22277b0a84..83f6c1b835 100644 > --- a/include/libvirt/libvirt-domain.h > +++ b/include/libvirt/libvirt-domain.h > @@ -4916,4 +4916,13 @@ int virDomainGetGuestInfo(virDomainPtr domain, > int *nparams, > unsigned int flags); > > +typedef enum { > + VIR_DOMAIN_AGENT_COMMAND_BLOCK = -2, > + VIR_DOMAIN_AGENT_COMMAND_DEFAULT = -1, > + VIR_DOMAIN_AGENT_COMMAND_NOWAIT = 0, > +} virDomainAgentCommandTimeoutValues; > +int virDomainAgentSetResponseTimeout(virDomainPtr domain, > + int timeout, > + unsigned int flags); > + > #endif /* LIBVIRT_DOMAIN_H */ > diff --git a/include/libvirt/libvirt-qemu.h b/include/libvirt/libvirt-qemu.h > index 891617443f..4a541167ad 100644 > --- a/include/libvirt/libvirt-qemu.h > +++ b/include/libvirt/libvirt-qemu.h > @@ -43,10 +43,10 @@ virDomainPtr virDomainQemuAttach(virConnectPtr domain, > unsigned int flags); > > typedef enum { > - VIR_DOMAIN_QEMU_AGENT_COMMAND_MIN = -2, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK = -2, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_DEFAULT = -1, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_NOWAIT = 0, > + VIR_DOMAIN_QEMU_AGENT_COMMAND_MIN = VIR_DOMAIN_AGENT_COMMAND_BLOCK, > + VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK = VIR_DOMAIN_AGENT_COMMAND_BLOCK, > + VIR_DOMAIN_QEMU_AGENT_COMMAND_DEFAULT = VIR_DOMAIN_AGENT_COMMAND_DEFAULT, > + VIR_DOMAIN_QEMU_AGENT_COMMAND_NOWAIT = VIR_DOMAIN_AGENT_COMMAND_NOWAIT, > VIR_DOMAIN_QEMU_AGENT_COMMAND_SHUTDOWN = 60, > } virDomainQemuAgentCommandTimeoutValues; > > diff --git a/src/driver-hypervisor.h b/src/driver-hypervisor.h > index 015b2cd01c..4afd8f6ec5 100644 > --- a/src/driver-hypervisor.h > +++ b/src/driver-hypervisor.h > @@ -1372,6 +1372,11 @@ typedef int > int *nparams, > unsigned int flags); > > +typedef int > +(*virDrvDomainAgentSetResponseTimeout)(virDomainPtr domain, > + int timeout, > + unsigned int flags); > + > typedef struct _virHypervisorDriver virHypervisorDriver; > typedef virHypervisorDriver *virHypervisorDriverPtr; > > @@ -1632,4 +1637,5 @@ struct _virHypervisorDriver { > virDrvDomainCheckpointGetParent domainCheckpointGetParent; > virDrvDomainCheckpointDelete domainCheckpointDelete; > virDrvDomainGetGuestInfo domainGetGuestInfo; > + virDrvDomainAgentSetResponseTimeout domainAgentSetResponseTimeout; > }; > diff --git a/src/libvirt-domain.c b/src/libvirt-domain.c > index dcab179e6e..b4e1b18164 100644 > --- a/src/libvirt-domain.c > +++ b/src/libvirt-domain.c > @@ -12497,3 +12497,48 @@ int virDomainGetLaunchSecurityInfo(virDomainPtr domain, > virDispatchError(domain->conn); > return -1; > } > + > +/** > + * virDomainAgentSetResponseTimeout: > + * @domain: a domain object > + * @timeout: timeout in seconds > + * @flags: extra flags; not used yet, so callers should always pass 0 > + * > + * Set how long to wait for a response from qemu agent commands. By default, > + * agent commands block forever waiting for a response. > + * > + * @timeout must be -2, -1, 0 or positive. > + * VIR_DOMAIN_AGENT_COMMAND_BLOCK(-2): meaning to block forever waiting for a > + * result. > + * VIR_DOMAIN_AGENT_COMMAND_DEFAULT(-1): use default timeout value. > + * VIR_DOMAIN_AGENT_COMMAND_NOWAIT(0): does not wait. > + * positive value: wait for @timeout seconds > + * > + * Returns 0 on success, -1 on failure > + */ > +int > +virDomainAgentSetResponseTimeout(virDomainPtr domain, > + int timeout, > + unsigned int flags) > +{ > + virConnectPtr conn; > + VIR_DOMAIN_DEBUG(domain, "timeout=%i, flags=0x%x", > + timeout, flags); > + > + virResetLastError(); > + > + virCheckDomainReturn(domain, -1); > + conn = domain->conn; > + > + if (conn->driver->domainAgentSetResponseTimeout) { > + if (conn->driver->domainAgentSetResponseTimeout(domain, timeout, flags) < 0) > + goto error; > + return 0; > + } > + > + virReportUnsupportedError(); > + > + error: > + virDispatchError(conn); > + return -1; > +} > diff --git a/src/libvirt_public.syms b/src/libvirt_public.syms > index 40655fbbf5..74d5e0b4b9 100644 > --- a/src/libvirt_public.syms > +++ b/src/libvirt_public.syms > @@ -859,6 +859,7 @@ LIBVIRT_5.7.0 { > > LIBVIRT_5.8.0 { > virConnectSetIdentity; > + virDomainAgentSetResponseTimeout; > } LIBVIRT_5.7.0; This needs to be updated. > > # .... define new API here using predicted next version number .... > diff --git a/src/qemu/qemu_agent.c b/src/qemu/qemu_agent.c > index 4b914e6d3b..c2f9a025ba 100644 > --- a/src/qemu/qemu_agent.c > +++ b/src/qemu/qemu_agent.c > @@ -127,6 +127,7 @@ struct _qemuAgent { > * but fire up an event on qemu monitor instead. > * Take that as indication of successful completion */ > qemuAgentEvent await_event; > + int timeout; > }; > > static virClassPtr qemuAgentClass; > @@ -695,6 +696,8 @@ qemuAgentOpen(virDomainObjPtr vm, > if (!(mon = virObjectLockableNew(qemuAgentClass))) > return NULL; > > + /* agent commands block by default, user can choose different behavior */ > + mon->timeout = VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK; This needs to be changed, see below [1]. > mon->fd = -1; > if (virCondInit(&mon->notify) < 0) { > virReportSystemError(errno, "%s", > @@ -907,6 +910,12 @@ qemuAgentGuestSync(qemuAgentPtr mon) > int send_ret; > unsigned long long id; > qemuAgentMessage sync_msg; > + int timeout = VIR_DOMAIN_QEMU_AGENT_COMMAND_DEFAULT; > + > + /* if user specified a custom agent timeout that is lower than the > + * default timeout, use the shorter timeout instead */ > + if ((mon->timeout >= 0) && (mon->timeout < timeout)) > + timeout = mon->timeout; > > memset(&sync_msg, 0, sizeof(sync_msg)); > /* set only on first sync */ > @@ -927,8 +936,7 @@ qemuAgentGuestSync(qemuAgentPtr mon) > > VIR_DEBUG("Sending guest-sync command with ID: %llu", id); > > - send_ret = qemuAgentSend(mon, &sync_msg, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_DEFAULT); > + send_ret = qemuAgentSend(mon, &sync_msg, timeout); > > VIR_DEBUG("qemuAgentSend returned: %d", send_ret); > > @@ -1304,8 +1312,7 @@ int qemuAgentFSFreeze(qemuAgentPtr mon, const char **mountpoints, > if (!cmd) > goto cleanup; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) > goto cleanup; > > if (virJSONValueObjectGetNumberInt(reply, "return", &ret) < 0) { > @@ -1342,8 +1349,7 @@ int qemuAgentFSThaw(qemuAgentPtr mon) > if (!cmd) > return -1; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) > goto cleanup; > > if (virJSONValueObjectGetNumberInt(reply, "return", &ret) < 0) { > @@ -1380,8 +1386,7 @@ qemuAgentSuspend(qemuAgentPtr mon, > return -1; > > mon->await_event = QEMU_AGENT_EVENT_SUSPEND; > - ret = qemuAgentCommand(mon, cmd, &reply, false, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK); > + ret = qemuAgentCommand(mon, cmd, &reply, false, mon->timeout); > > virJSONValueFree(cmd); > virJSONValueFree(reply); > @@ -1437,8 +1442,7 @@ qemuAgentFSTrim(qemuAgentPtr mon, > if (!cmd) > return ret; > > - ret = qemuAgentCommand(mon, cmd, &reply, false, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK); > + ret = qemuAgentCommand(mon, cmd, &reply, false, mon->timeout); > > virJSONValueFree(cmd); > virJSONValueFree(reply); > @@ -1459,8 +1463,7 @@ qemuAgentGetVCPUs(qemuAgentPtr mon, > if (!(cmd = qemuAgentMakeCommand("guest-get-vcpus", NULL))) > return -1; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) > goto cleanup; > > if (!(data = virJSONValueObjectGetArray(reply, "return"))) { > @@ -1575,8 +1578,7 @@ qemuAgentSetVCPUsCommand(qemuAgentPtr mon, > NULL))) > goto cleanup; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) > goto cleanup; > > /* All negative values are invalid. Return of 0 is bogus since we wouldn't > @@ -1731,8 +1733,7 @@ qemuAgentGetHostname(qemuAgentPtr mon, > if (!cmd) > return ret; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) { > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) { > if (qemuAgentErrorCommandUnsupported(reply)) > ret = -2; > goto cleanup; > @@ -1776,8 +1777,7 @@ qemuAgentGetTime(qemuAgentPtr mon, > if (!cmd) > return ret; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) > goto cleanup; > > if (virJSONValueObjectGetNumberUlong(reply, "return", &json_time) < 0) { > @@ -1842,8 +1842,7 @@ qemuAgentSetTime(qemuAgentPtr mon, > if (!cmd) > return ret; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) > goto cleanup; > > ret = 0; > @@ -2046,8 +2045,7 @@ qemuAgentGetFSInfoInternal(qemuAgentPtr mon, > if (!cmd) > return ret; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) { > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) { > if (qemuAgentErrorCommandUnsupported(reply)) > ret = -2; > goto cleanup; > @@ -2336,8 +2334,7 @@ qemuAgentGetInterfaces(qemuAgentPtr mon, > if (!(cmd = qemuAgentMakeCommand("guest-network-get-interfaces", NULL))) > goto cleanup; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) > goto cleanup; > > if (!(ret_array = virJSONValueObjectGet(reply, "return"))) { > @@ -2514,8 +2511,7 @@ qemuAgentSetUserPassword(qemuAgentPtr mon, > NULL))) > goto cleanup; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) > goto cleanup; > > ret = 0; > @@ -2546,8 +2542,7 @@ qemuAgentGetUsers(qemuAgentPtr mon, > if (!(cmd = qemuAgentMakeCommand("guest-get-users", NULL))) > return -1; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) { > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) { > if (qemuAgentErrorCommandUnsupported(reply)) > return -2; > return -1; > @@ -2636,8 +2631,7 @@ qemuAgentGetOSInfo(qemuAgentPtr mon, > if (!(cmd = qemuAgentMakeCommand("guest-get-osinfo", NULL))) > return -1; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) { > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) { > if (qemuAgentErrorCommandUnsupported(reply)) > return -2; > return -1; > @@ -2692,8 +2686,7 @@ qemuAgentGetTimezone(qemuAgentPtr mon, > if (!(cmd = qemuAgentMakeCommand("guest-get-timezone", NULL))) > return -1; > > - if (qemuAgentCommand(mon, cmd, &reply, true, > - VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0) { > + if (qemuAgentCommand(mon, cmd, &reply, true, mon->timeout) < 0) { > if (qemuAgentErrorCommandUnsupported(reply)) > return -2; > return -1; > @@ -2722,3 +2715,30 @@ qemuAgentGetTimezone(qemuAgentPtr mon, > > return 0; > } > + > +/* qemuAgentSetResponseTimeout: > + * mon: agent monitor > + * timeout: number of seconds to wait for agent response > + * flags: currently unused. Must pass 0 > + * > + * the agent object must be locked prior to calling this function > + * > + * Returns: 0 on success > + * -1 otherwise > + */ > +int > +qemuAgentSetResponseTimeout(qemuAgentPtr mon, > + int timeout, > + unsigned int flags G_GNUC_UNUSED) @flags can be dropped. They should be checked for in qemu driver. If we ever need to populate them here, we can modify this function to have the argument. > +{ > + if (timeout < VIR_DOMAIN_QEMU_AGENT_COMMAND_MIN) { > + virReportError(VIR_ERR_INVALID_ARG, > + _("guest agent timeout '%d' is " > + "less than the minimum '%d'"), > + timeout, VIR_DOMAIN_QEMU_AGENT_COMMAND_MIN); > + return -1; > + } > + > + mon->timeout = timeout; > + return 0; > +} > diff --git a/src/qemu/qemu_agent.h b/src/qemu/qemu_agent.h > index 78e648992a..88ead4762a 100644 > --- a/src/qemu/qemu_agent.h > +++ b/src/qemu/qemu_agent.h > @@ -140,3 +140,7 @@ int qemuAgentGetTimezone(qemuAgentPtr mon, > virTypedParameterPtr *params, > int *nparams, > int *maxparams); > + > +int qemuAgentSetResponseTimeout(qemuAgentPtr mon, > + int timeout, > + unsigned int flags); > diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c > index 667cc89072..40d0401e79 100644 > --- a/src/qemu/qemu_domain.c > +++ b/src/qemu/qemu_domain.c > @@ -15564,3 +15564,18 @@ qemuDomainSupportsCheckpointsBlockjobs(virDomainObjPtr vm) > > return 0; > } > + > +int qemuDomainObjSetAgentResponseTimeout(virDomainObjPtr vm, > + int timeout, > + unsigned int flags) > +{ > + qemuDomainObjPrivatePtr priv = vm->privateData; > + qemuAgentPtr agent = priv->agent; > + int ret; > + > + virObjectLock(agent); > + ret = qemuAgentSetResponseTimeout(agent, timeout, flags); > + virObjectUnlock(agent); > + > + return ret; Nope, priv->agent can be NULL. This function is needless, you can call qemuAgentSetResponseTimeout() right from qemu_driver.c. > +} > diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h > index b23912ee98..34af55ef45 100644 > --- a/src/qemu/qemu_domain.h > +++ b/src/qemu/qemu_domain.h > @@ -1225,3 +1225,8 @@ qemuDomainValidateActualNetDef(const virDomainNetDef *net, > int > qemuDomainSupportsCheckpointsBlockjobs(virDomainObjPtr vm) > G_GNUC_WARN_UNUSED_RESULT; > + > +int > +qemuDomainObjSetAgentResponseTimeout(virDomainObjPtr vm, > + int seconds, > + unsigned int flags); > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c > index d17c18705b..68f7b65d81 100644 > --- a/src/qemu/qemu_driver.c > +++ b/src/qemu/qemu_driver.c > @@ -22654,6 +22654,27 @@ qemuDomainGetGuestInfo(virDomainPtr dom, > return ret; > } > > +static int > +qemuDomainAgentSetResponseTimeout(virDomainPtr dom, > + int timeout, > + unsigned int flags) > +{ > + virDomainObjPtr vm = NULL; > + int ret = -1; > + > + if (!(vm = qemuDomainObjFromDomain(dom))) > + goto cleanup; > + > + if (virDomainAgentSetResponseTimeoutEnsureACL(dom->conn, vm->def) < 0) > + goto cleanup; > + > + ret = qemuDomainObjSetAgentResponseTimeout(vm, timeout, flags); 1: It's not that simple. Firstly, you need to grab an agent job (QEMU_AGENT_JOB_MODIFY ideally) to make sure no other thread is in the middle of talking to the agent. Secondly, some domains may not have an agent configured at all (in which case, priv->agent is going to be NULL). And lastly, we need to track the timeout set in domain private data so that if libvirtd restarts the same timeout is preserved. But changing private data means you have to acquire domain job too. In the end, I think we want something like this: + /* If domain has an agent, change it's timeout too. Otherwise + * just note down the request so that if agent appears it + * gets the requested timeout from the beginning. */ + if (qemuDomainAgentAvailable(vm, false)) + agentJob = QEMU_AGENT_JOB_MODIFY; + + if (qemuDomainObjBeginJobWithAgent(driver, vm, + QEMU_JOB_MODIFY, + agentJob) < 0) + goto cleanup; + + if (agentJob != QEMU_AGENT_JOB_NONE) { + qemuAgentPtr agent; + int rc; + + if (virDomainObjCheckActive(vm) < 0) + goto endjob; + + agent = qemuDomainObjEnterAgent(vm); + rc = qemuAgentSetResponseTimeout(agent, timeout); + qemuDomainObjExitAgent(vm, agent); + + if (rc < 0) + goto endjob; + } + + QEMU_DOMAIN_PRIVATE(vm)->agentTimeout = timeout; + ret = 0; + + endjob: + if (agentJob != QEMU_AGENT_JOB_NONE) + qemuDomainObjEndJobWithAgent(driver, vm); + else + qemuDomainObjEndJob(driver, vm); I've created a fixup commit and pushed it to my github. Feel free to incorporate it into v3 (you will need to implement saving/restoring of priv->agentTimeout to/from status XML - see qemuDomainObjPrivateXMLFormat() and qemuDomainObjPrivateXMLParse()): https://github.com/zippy2/libvirt/commit/b0f38070e09366d9a09428b19ecafee3f1aab2b4 The rest looks okay. But it would be also nice if you implement virsh command and expose this API through it. In my patch I've named it "qemu-agent-timeout" but that is probably wrong since the API name is generic and not tied to qemu. Michal -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
Thanks for the thorough review Michal, clearly it needs more work. In particular, I was not yet aware of the private xml save/restore stuff. Thanks for the pointers in that regard. But I have one particular question below: On Fri, 2019-11-08 at 11:46 +0100, Michal Privoznik wrote: > 1: It's not that simple. Firstly, you need to grab an agent job > (QEMU_AGENT_JOB_MODIFY ideally) to make sure no other thread is in > the > middle of talking to the agent. Secondly, some domains may not have > an > agent configured at all (in which case, priv->agent is going to be > NULL). And lastly, we need to track the timeout set in domain > private > data so that if libvirtd restarts the same timeout is preserved. But > changing private data means you have to acquire domain job too. Is this really true? I'm obviously still not intimately familiar with the libvirt 'job' stuff, but my understanding of jobs is that it is a method to avoid multiple threads talking to the qemu monitor (or agent, for agent jobs) at the same time. The data integrity of the structs themselves are protected by mutex. But a mutex is insufficient to mediate access to the monitor/agent because waiting for a response may involve a sleep, and the mutex will be released while the thread sleeps. So this 'job' system was added on top to indicate that the monitor/agent was in use and other threads need to wait until the job is complete before they can interact with the monitor or agent. Is my understanding correct so far? In this situation, we're not talking to the qemu monitor or the agent, we're merely changing some struct data. So from a data-integrity point of view, we simply need to hold the mutex lock, right? (We do hold the mutex lock for the domain from calling qemuDomainObjFromDomain()). So it doesn't seem strictly necessary to acquire a job here (either for the agent, or for the domain/monitor). One argument I can see for acquiring a job here is that it ensures that any private data is not changed while another thread is in the middle of a monitor query. And that's potentially a compelling justification for acquiring a job. But in this circumstance, I don't see that it gains us much. For example, let's say Thread A has already acquired an agent job for getHostname(). This thread will call qemuAgentCommand() and pass the value of mon->timeout for the 'seconds' argument. Inside this function, it will further call qemuAgentGuestSync(), which again uses the mon- >timeout value to determine how long it should wait for a response. It sends the guest-sync command to the guest agent, and then waits for a response (releasing the mutex). While it is waiting, let's say that Thread B calls SetResponseTimeout(). Let's further imagine that SetResponseTimeout() doesn't acquire a job (as in my patch). It can acquire the agent mutex (since the other thread released it while sleeping) and safely change the mon->timeout value immediately. At some point, Thread A wakes up, handles the guest-sync response and issues the 'guest-get-host-name' command to the agent. It uses the value passed as an argument to qemuAgentCommand() to determine how long to wait for a response from the agent. So it will not use the new value set by thread B, but will use the old value that was set when it was first called. (But I think an argument could be made that it would be fine to use this new timeout value as well.) This behavior all seems fine to me. Am I misunderstanding something fundamentally here? Or is there just a general policy that all data modification should be protected by a job, regardless of whether we're using the qemu monitor or agent? If so, perhaps that could be better documented. One final concern: in your fixup commit, you acquire both a normal domain job and an agent job (in the case where the agent is active). But this is something I've been working to remove from libvirt after a discussion with Daniel Berrange in https://bugzilla.redhat.com/show_bug.cgi?id=1759566 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
On 11/9/19 1:12 AM, Jonathon Jongsma wrote: > Thanks for the thorough review Michal, clearly it needs more work. In > particular, I was not yet aware of the private xml save/restore stuff. > Thanks for the pointers in that regard. But I have one particular > question below: > > On Fri, 2019-11-08 at 11:46 +0100, Michal Privoznik wrote: >> 1: It's not that simple. Firstly, you need to grab an agent job >> (QEMU_AGENT_JOB_MODIFY ideally) to make sure no other thread is in >> the >> middle of talking to the agent. Secondly, some domains may not have >> an >> agent configured at all (in which case, priv->agent is going to be >> NULL). And lastly, we need to track the timeout set in domain >> private >> data so that if libvirtd restarts the same timeout is preserved. But >> changing private data means you have to acquire domain job too. > > > Is this really true? I'm obviously still not intimately familiar with > the libvirt 'job' stuff, but my understanding of jobs is that it is a > method to avoid multiple threads talking to the qemu monitor (or agent, > for agent jobs) at the same time. The data integrity of the structs > themselves are protected by mutex. But a mutex is insufficient to > mediate access to the monitor/agent because waiting for a response may > involve a sleep, and the mutex will be released while the thread > sleeps. So this 'job' system was added on top to indicate that the > monitor/agent was in use and other threads need to wait until the job > is complete before they can interact with the monitor or agent. Is my > understanding correct so far? Yes, more or less. You're right in assuming that mutex protects strucutres, and job is like a flag: lock(struct); struct->doNotModify = true; unlock(struct); ... /* now any other thread sees doNotModify set => should avoid modifications */ lock(struct); struct->doNotModify = false; unlock(struct); But one can also argue that every API should grab a job (we have both query and modify jobs available), because resulting code is future proof. I mean, if anybody ever adds a monitor/agent call somewhere, we won't enter monitor without job set (i.e. unlock domain object without doNotModify flag set in my example). To put this into a perspective, imagine there is a thread that does two agent calls (e.g. creating a snapshot with --quiesce calls 'fsfreeze' before creating snapshot followed by 'fsthaw' once snapshot is created). Since talking to an agent means releasing its lock, this thread might execute freeze and thaw with different timeouts (if your API is issued meanwhile). Perhaps in this simple case it's no problem and I just want to apply defensive programming with no real trade. > > In this situation, we're not talking to the qemu monitor or the agent, > we're merely changing some struct data. So from a data-integrity point > of view, we simply need to hold the mutex lock, right? (We do hold the > mutex lock for the domain from calling qemuDomainObjFromDomain()). So > it doesn't seem strictly necessary to acquire a job here (either for > the agent, or for the domain/monitor). > > One argument I can see for acquiring a job here is that it ensures that > any private data is not changed while another thread is in the middle > of a monitor query. And that's potentially a compelling justification > for acquiring a job. But in this circumstance, I don't see that it > gains us much. > > For example, let's say Thread A has already acquired an agent job for > getHostname(). This thread will call qemuAgentCommand() and pass the > value of mon->timeout for the 'seconds' argument. Inside this function, > it will further call qemuAgentGuestSync(), which again uses the mon- >> timeout value to determine how long it should wait for a response. It > sends the guest-sync command to the guest agent, and then waits for a > response (releasing the mutex). While it is waiting, let's say that > Thread B calls SetResponseTimeout(). Let's further imagine that > SetResponseTimeout() doesn't acquire a job (as in my patch). It can > acquire the agent mutex (since the other thread released it while > sleeping) and safely change the mon->timeout value immediately. At some > point, Thread A wakes up, handles the guest-sync response and issues > the 'guest-get-host-name' command to the agent. It uses the value > passed as an argument to qemuAgentCommand() to determine how long to > wait for a response from the agent. So it will not use the new value > set by thread B, but will use the old value that was set when it was > first called. (But I think an argument could be made that it would be > fine to use this new timeout value as well.) > > This behavior all seems fine to me. Am I misunderstanding something > fundamentally here? Or is there just a general policy that all data > modification should be protected by a job, regardless of whether we're > using the qemu monitor or agent? If so, perhaps that could be better > documented. > Alright, let's use only locks. However, one hidden advantage of grabbing a job here is that EndJob() APIs automatically save domain status XML onto disk => if you update priv->agentTimeout in the API, the status XML is automatically written thus new value is safely stored on disk in case libvirtd restarts. > One final concern: in your fixup commit, you acquire both a normal > domain job and an agent job (in the case where the agent is active). > But this is something I've been working to remove from libvirt after a > discussion with Daniel Berrange in > https://bugzilla.redhat.com/show_bug.cgi?id=1759566 > Yeah, well in this case we won't be really executing any guest agent API, merely mutually excluding with other threads that are talking to the agent. But let's use mutexes only. Michal -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
On Wed, Nov 06, 2019 at 03:13:21PM -0600, Jonathon Jongsma wrote: > Some layered products such as oVirt have requested a way to avoid being > blocked by guest agent commands when querying a loaded vm. For example, > many guest agent commands are polled periodically to monitor changes, > and rather than blocking the calling process, they'd prefer to simply > time out when an agent query is taking too long. > > This patch adds a way for the user to specify a custom agent timeout > that is applied to all agent commands. > > One special case to note here is the 'guest-sync' command. 'guest-sync' > is issued internally prior to calling any other command. (For example, > when libvirt wants to call 'guest-get-fsinfo', we first call > 'guest-sync' and then call 'guest-get-fsinfo'). > > Previously, the 'guest-sync' command used a 5-second timeout > (VIR_DOMAIN_QEMU_AGENT_COMMAND_DEFAULT), whereas the actual command that > followed always blocked indefinitely > (VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK). As part of this patch, if a > custom timeout is specified that is shorter than > 5 seconds, this new timeout is also used for 'guest-sync'. If there is > no custom timeout or if the custom timeout is longer than 5 seconds, we > will continue to use the 5-second timeout. > > See https://bugzilla.redhat.com/show_bug.cgi?id=1705426 for additional details. > > Signed-off-by: Jonathon Jongsma <jjongsma@redhat.com> > --- > I've addressed most of the previous review comments from Peter, Daniel, and > Michal but I'm unsure yet whether there's concensus on this feature. Here's a > v2 for your consideration. > > I admit that the proposed API is not perfect. It would probably be more elegant > if each agent command instead had its own 'timeout' argument so that the caller > could specify the timeout for each invocation. But that is not the case, and > introducing duplicate API for each agent command (with an additional timeout > argument) would clutter the public API. In addition, we still have the issue > Michal mentioned elsewhere in the thread where some commands like shutdown may > or may not use the agent, so a timeout argument could be confusing. > > So: is this a viable approach, or should I rethink it? > > Changes in v2: > - Make this an official public API rather than putting it in libvirt-qemu. > - Don't use the qemuDomainObjEnterAgent()/ExitAgent() API, which expects you > to acquire an agent job. Instead, introduce > qemuDomainObjSetAgentResponseTimeout() which simply locks the agent while > setting the variable. > - rename the function slightly for better descriptiveness: > virDomainQemuAgentSetTimeout() -> virDomainAgentSetResponseTimeout() > - added 'flags' argument for future-proofing. > > include/libvirt/libvirt-domain.h | 9 ++++ > include/libvirt/libvirt-qemu.h | 8 +-- > src/driver-hypervisor.h | 6 +++ > src/libvirt-domain.c | 45 +++++++++++++++++ > src/libvirt_public.syms | 1 + > src/qemu/qemu_agent.c | 84 ++++++++++++++++++++------------ > src/qemu/qemu_agent.h | 4 ++ > src/qemu/qemu_domain.c | 15 ++++++ > src/qemu/qemu_domain.h | 5 ++ > src/qemu/qemu_driver.c | 22 +++++++++ > src/remote/remote_driver.c | 1 + > src/remote/remote_protocol.x | 18 ++++++- > src/remote_protocol-structs | 9 ++++ > 13 files changed, 190 insertions(+), 37 deletions(-) > > diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h > index 22277b0a84..83f6c1b835 100644 > --- a/include/libvirt/libvirt-domain.h > +++ b/include/libvirt/libvirt-domain.h > @@ -4916,4 +4916,13 @@ int virDomainGetGuestInfo(virDomainPtr domain, > int *nparams, > unsigned int flags); > > +typedef enum { > + VIR_DOMAIN_AGENT_COMMAND_BLOCK = -2, > + VIR_DOMAIN_AGENT_COMMAND_DEFAULT = -1, > + VIR_DOMAIN_AGENT_COMMAND_NOWAIT = 0, > +} virDomainAgentCommandTimeoutValues; These constants should all have the word "TIMEOUT_" in their name eg VIR_DOMAIN_AGENT_COMMAND_TIMEOUT_NOWAIT > +int virDomainAgentSetResponseTimeout(virDomainPtr domain, > + int timeout, > + unsigned int flags); Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :| -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
© 2016 - 2024 Red Hat, Inc.