Related: https://gitlab.com/libvirt/libvirt/-/issues/276
This patch uses qemuMonitorQueryStats to query "halt_poll_success_ns"
and "halt_poll_fail_ns" for every vCPU. The respective values for each
vCPU are then added together.
Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
---
src/qemu/qemu_driver.c | 69 +++++++++++++++++++++++++++++++++++++-----
1 file changed, 61 insertions(+), 8 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index da95f947..e1c595e5 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -18066,15 +18066,68 @@ qemuDomainGetStatsCpuCgroup(virDomainObj *dom,
}
static int
-qemuDomainGetStatsCpuHaltPollTime(virDomainObj *dom,
- virTypedParamList *params)
+qemuDomainGetStatsCpuHaltPollTime(virQEMUDriver *driver,
+ virDomainObj *dom,
+ virTypedParamList *params,
+ unsigned int privflags)
{
unsigned long long haltPollSuccess = 0;
unsigned long long haltPollFail = 0;
- pid_t pid = dom->pid;
+ qemuDomainObjPrivate *priv = dom->privateData;
+ bool queryStatsCap = virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QUERY_STATS);
- if (virHostCPUGetHaltPollTime(pid, &haltPollSuccess, &haltPollFail) < 0)
- return 0;
+ if (queryStatsCap && HAVE_JOB(privflags) && virDomainObjIsActive(dom)) {
+ size_t i;
+ qemuMonitorQueryStatsTargetType target = QEMU_MONITOR_QUERY_STATS_TARGET_VCPU;
+ qemuMonitorQueryStatsProvider *provider = NULL;
+ g_autoptr(GPtrArray) providers = NULL;
+ g_autoptr(GPtrArray) queried_stats = NULL;
+ provider = qemuMonitorQueryStatsProviderNew(
+ QEMU_MONITOR_QUERY_STATS_PROVIDER_KVM,
+ QEMU_MONITOR_QUERY_STATS_NAME_HALT_POLL_SUCCESS_NS,
+ QEMU_MONITOR_QUERY_STATS_NAME_HALT_POLL_FAIL_NS,
+ QEMU_MONITOR_QUERY_STATS_NAME_LAST);
+
+ providers = g_ptr_array_new_full(1, (GDestroyNotify) qemuMonitorQueryStatsProviderFree);
+ g_ptr_array_add(providers, provider);
+
+ qemuDomainObjEnterMonitor(driver, dom);
+ queried_stats = qemuMonitorQueryStats(priv->mon, target, NULL, providers);
+ qemuDomainObjExitMonitor(dom);
+
+ if (!queried_stats)
+ return 0;
+
+ for (i = 0; i < queried_stats->len; i++) {
+ unsigned long long curHaltPollSuccess, curHaltPollFail;
+ GHashTable *cur_table = queried_stats->pdata[i];
+ virJSONValue *success_obj, *fail_obj;
+ const char *success_str = qemuMonitorQueryStatsNameTypeToString(
+ QEMU_MONITOR_QUERY_STATS_NAME_HALT_POLL_SUCCESS_NS);
+ const char *fail_str = qemuMonitorQueryStatsNameTypeToString(
+ QEMU_MONITOR_QUERY_STATS_NAME_HALT_POLL_FAIL_NS);
+
+ success_obj = g_hash_table_lookup(cur_table, success_str);
+ fail_obj = g_hash_table_lookup(cur_table, fail_str);
+
+ if (!success_obj || !fail_obj)
+ return 0;
+
+ if (virJSONValueGetNumberUlong(success_obj, &curHaltPollSuccess) < 0)
+ return 0;
+
+ if (virJSONValueGetNumberUlong(fail_obj, &curHaltPollFail) < 0)
+ return 0;
+
+ haltPollSuccess += curHaltPollSuccess;
+ haltPollFail += curHaltPollFail;
+ }
+ } else {
+ pid_t pid = dom->pid;
+
+ if (virHostCPUGetHaltPollTime(pid, &haltPollSuccess, &haltPollFail) < 0)
+ return 0;
+ }
if (virTypedParamListAddULLong(params, haltPollSuccess, "cpu.haltpoll.success.time") < 0 ||
virTypedParamListAddULLong(params, haltPollFail, "cpu.haltpoll.fail.time") < 0)
@@ -18087,7 +18140,7 @@ static int
qemuDomainGetStatsCpu(virQEMUDriver *driver,
virDomainObj *dom,
virTypedParamList *params,
- unsigned int privflags G_GNUC_UNUSED)
+ unsigned int privflags)
{
if (qemuDomainGetStatsCpuCgroup(dom, params) < 0)
return -1;
@@ -18095,7 +18148,7 @@ qemuDomainGetStatsCpu(virQEMUDriver *driver,
if (qemuDomainGetStatsCpuCache(driver, dom, params) < 0)
return -1;
- if (qemuDomainGetStatsCpuHaltPollTime(dom, params) < 0)
+ if (qemuDomainGetStatsCpuHaltPollTime(driver, dom, params, privflags) < 0)
return -1;
return 0;
@@ -18929,7 +18982,7 @@ static virQEMUCapsFlags queryDirtyRateRequired[] = {
static struct qemuDomainGetStatsWorker qemuDomainGetStatsWorkers[] = {
{ qemuDomainGetStatsState, VIR_DOMAIN_STATS_STATE, false, NULL },
- { qemuDomainGetStatsCpu, VIR_DOMAIN_STATS_CPU_TOTAL, false, NULL },
+ { qemuDomainGetStatsCpu, VIR_DOMAIN_STATS_CPU_TOTAL, true, NULL },
{ qemuDomainGetStatsBalloon, VIR_DOMAIN_STATS_BALLOON, true, NULL },
{ qemuDomainGetStatsVcpu, VIR_DOMAIN_STATS_VCPU, true, NULL },
{ qemuDomainGetStatsInterface, VIR_DOMAIN_STATS_INTERFACE, false, NULL },
--
2.36.1
On Thu, Jul 14, 2022 at 11:22:04AM +0530, Amneesh Singh wrote:
>Related: https://gitlab.com/libvirt/libvirt/-/issues/276
>
>This patch uses qemuMonitorQueryStats to query "halt_poll_success_ns"
>and "halt_poll_fail_ns" for every vCPU. The respective values for each
>vCPU are then added together.
>
>Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
>---
> src/qemu/qemu_driver.c | 69 +++++++++++++++++++++++++++++++++++++-----
> 1 file changed, 61 insertions(+), 8 deletions(-)
>
>diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>index da95f947..e1c595e5 100644
>--- a/src/qemu/qemu_driver.c
>+++ b/src/qemu/qemu_driver.c
>@@ -18066,15 +18066,68 @@ qemuDomainGetStatsCpuCgroup(virDomainObj *dom,
> }
>
> static int
>-qemuDomainGetStatsCpuHaltPollTime(virDomainObj *dom,
>- virTypedParamList *params)
>+qemuDomainGetStatsCpuHaltPollTime(virQEMUDriver *driver,
>+ virDomainObj *dom,
>+ virTypedParamList *params,
>+ unsigned int privflags)
> {
> unsigned long long haltPollSuccess = 0;
> unsigned long long haltPollFail = 0;
>- pid_t pid = dom->pid;
>+ qemuDomainObjPrivate *priv = dom->privateData;
>+ bool queryStatsCap = virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QUERY_STATS);
>
>- if (virHostCPUGetHaltPollTime(pid, &haltPollSuccess, &haltPollFail) < 0)
>- return 0;
>+ if (queryStatsCap && HAVE_JOB(privflags) && virDomainObjIsActive(dom)) {
>+ size_t i;
>+ qemuMonitorQueryStatsTargetType target = QEMU_MONITOR_QUERY_STATS_TARGET_VCPU;
>+ qemuMonitorQueryStatsProvider *provider = NULL;
>+ g_autoptr(GPtrArray) providers = NULL;
>+ g_autoptr(GPtrArray) queried_stats = NULL;
>+ provider = qemuMonitorQueryStatsProviderNew(
>+ QEMU_MONITOR_QUERY_STATS_PROVIDER_KVM,
>+ QEMU_MONITOR_QUERY_STATS_NAME_HALT_POLL_SUCCESS_NS,
>+ QEMU_MONITOR_QUERY_STATS_NAME_HALT_POLL_FAIL_NS,
>+ QEMU_MONITOR_QUERY_STATS_NAME_LAST);
>+
>+ providers = g_ptr_array_new_full(1, (GDestroyNotify) qemuMonitorQueryStatsProviderFree);
>+ g_ptr_array_add(providers, provider);
>+
>+ qemuDomainObjEnterMonitor(driver, dom);
>+ queried_stats = qemuMonitorQueryStats(priv->mon, target, NULL, providers);
>+ qemuDomainObjExitMonitor(dom);
>+
>+ if (!queried_stats)
>+ return 0;
>+
>+ for (i = 0; i < queried_stats->len; i++) {
>+ unsigned long long curHaltPollSuccess, curHaltPollFail;
>+ GHashTable *cur_table = queried_stats->pdata[i];
>+ virJSONValue *success_obj, *fail_obj;
>+ const char *success_str = qemuMonitorQueryStatsNameTypeToString(
>+ QEMU_MONITOR_QUERY_STATS_NAME_HALT_POLL_SUCCESS_NS);
>+ const char *fail_str = qemuMonitorQueryStatsNameTypeToString(
>+ QEMU_MONITOR_QUERY_STATS_NAME_HALT_POLL_FAIL_NS);
>+
>+ success_obj = g_hash_table_lookup(cur_table, success_str);
>+ fail_obj = g_hash_table_lookup(cur_table, fail_str);
>+
>+ if (!success_obj || !fail_obj)
>+ return 0;
>+
>+ if (virJSONValueGetNumberUlong(success_obj, &curHaltPollSuccess) < 0)
>+ return 0;
>+
>+ if (virJSONValueGetNumberUlong(fail_obj, &curHaltPollFail) < 0)
>+ return 0;
>+
As mentioned before, all these failures do not have to exit the function, but
rather fallback to the old way. You can even create two new functions for the
new and old implementations and then call them from here to make the fallback
easier to spot (and code).
I wanted to change this before pushing as well, but I feel like I'm changing too
much of your code. And I also had to rebase this (although trivially). Would
you mind just changing these few last things so that we can get it in before the
rc0 freeze?
Thanks and have a nice weekend,
Martin
>+ haltPollSuccess += curHaltPollSuccess;
>+ haltPollFail += curHaltPollFail;
>+ }
>+ } else {
>+ pid_t pid = dom->pid;
>+
>+ if (virHostCPUGetHaltPollTime(pid, &haltPollSuccess, &haltPollFail) < 0)
>+ return 0;
>+ }
>
> if (virTypedParamListAddULLong(params, haltPollSuccess, "cpu.haltpoll.success.time") < 0 ||
> virTypedParamListAddULLong(params, haltPollFail, "cpu.haltpoll.fail.time") < 0)
>@@ -18087,7 +18140,7 @@ static int
> qemuDomainGetStatsCpu(virQEMUDriver *driver,
> virDomainObj *dom,
> virTypedParamList *params,
>- unsigned int privflags G_GNUC_UNUSED)
>+ unsigned int privflags)
> {
> if (qemuDomainGetStatsCpuCgroup(dom, params) < 0)
> return -1;
>@@ -18095,7 +18148,7 @@ qemuDomainGetStatsCpu(virQEMUDriver *driver,
> if (qemuDomainGetStatsCpuCache(driver, dom, params) < 0)
> return -1;
>
>- if (qemuDomainGetStatsCpuHaltPollTime(dom, params) < 0)
>+ if (qemuDomainGetStatsCpuHaltPollTime(driver, dom, params, privflags) < 0)
> return -1;
>
> return 0;
>@@ -18929,7 +18982,7 @@ static virQEMUCapsFlags queryDirtyRateRequired[] = {
>
> static struct qemuDomainGetStatsWorker qemuDomainGetStatsWorkers[] = {
> { qemuDomainGetStatsState, VIR_DOMAIN_STATS_STATE, false, NULL },
>- { qemuDomainGetStatsCpu, VIR_DOMAIN_STATS_CPU_TOTAL, false, NULL },
>+ { qemuDomainGetStatsCpu, VIR_DOMAIN_STATS_CPU_TOTAL, true, NULL },
> { qemuDomainGetStatsBalloon, VIR_DOMAIN_STATS_BALLOON, true, NULL },
> { qemuDomainGetStatsVcpu, VIR_DOMAIN_STATS_VCPU, true, NULL },
> { qemuDomainGetStatsInterface, VIR_DOMAIN_STATS_INTERFACE, false, NULL },
>--
>2.36.1
>
On 7/22/22 17:43, Martin Kletzander wrote:
> As mentioned before, all these failures do not have to exit the
> function, but rather fallback to the old way. You can even create
> two new functions for the new and old implementations and then call
> them from here to make the fallback easier to spot (and code).
More precisely, they should just "continue;" to the next iteration of
the for loop, like
if (!success_obj || !fail_obj)
continue;
found = true;
and then go fall back if found is false at the end of the loop.
On the other hand, here:
if (virJSONValueGetNumberUlong(success_obj, &curHaltPollSuccess) < 0)
return 0;
if (virJSONValueGetNumberUlong(fail_obj, &curHaltPollFail) < 0)
return 0;
I am not sure about falling back, because it is really an unexpected
situation where the statistic exist but somehow the value cannot be
parsed.
Paolo
> I wanted to change this before pushing as well, but I feel like I'm
> changing too much of your code. And I also had to rebase this
> (although trivially). Would you mind just changing these few last
> things so that we can get it in before the rc0 freeze?
On Fri, Jul 22, 2022 at 07:09:57PM +0200, Paolo Bonzini wrote: > On 7/22/22 17:43, Martin Kletzander wrote: > > As mentioned before, all these failures do not have to exit the > > function, but rather fallback to the old way. You can even create > > two new functions for the new and old implementations and then call > > them from here to make the fallback easier to spot (and code). > > More precisely, they should just "continue;" to the next iteration of > the for loop, like > > > if (!success_obj || !fail_obj) > continue; > > found = true; > > and then go fall back if found is false at the end of the loop. > > On the other hand, here: > > if (virJSONValueGetNumberUlong(success_obj, &curHaltPollSuccess) < 0) > return 0; > > if (virJSONValueGetNumberUlong(fail_obj, &curHaltPollFail) < 0) > return 0; > > I am not sure about falling back, because it is really an unexpected > situation where the statistic exist but somehow the value cannot be > parsed. Then can we just "continue;" in case the value fails to parse as well? > > Paolo > > > I wanted to change this before pushing as well, but I feel like I'm > > changing too much of your code. And I also had to rebase this > > (although trivially). Would you mind just changing these few last > > things so that we can get it in before the rc0 freeze? Alright, as soon as there is a viable check decided for the virJSONValueGet* statements above, I will push a v4 with the changes you mentioned in your reviews. Thank you both for taking the time to review. >
On Sun, Jul 24, 2022 at 10:50:36AM +0530, Amneesh Singh wrote: >On Fri, Jul 22, 2022 at 07:09:57PM +0200, Paolo Bonzini wrote: >> On 7/22/22 17:43, Martin Kletzander wrote: >> > As mentioned before, all these failures do not have to exit the >> > function, but rather fallback to the old way. You can even create >> > two new functions for the new and old implementations and then call >> > them from here to make the fallback easier to spot (and code). >> >> More precisely, they should just "continue;" to the next iteration of >> the for loop, like >> >> >> if (!success_obj || !fail_obj) >> continue; >> >> found = true; >> >> and then go fall back if found is false at the end of the loop. >> >> On the other hand, here: >> >> if (virJSONValueGetNumberUlong(success_obj, &curHaltPollSuccess) < 0) >> return 0; >> >> if (virJSONValueGetNumberUlong(fail_obj, &curHaltPollFail) < 0) >> return 0; >> >> I am not sure about falling back, because it is really an unexpected >> situation where the statistic exist but somehow the value cannot be >> parsed. The main thing I was worried about was libvirtd crashing on possibly expoited qemu process. Whether we fall back or report nothing in this case is not that big of a deal, since there's something wrong already anyway. >Then can we just "continue;" in case the value fails to parse as well? >> That's another option as well. >> Paolo >> >> > I wanted to change this before pushing as well, but I feel like I'm >> > changing too much of your code. And I also had to rebase this >> > (although trivially). Would you mind just changing these few last >> > things so that we can get it in before the rc0 freeze? >Alright, as soon as there is a viable check decided for the virJSONValueGet* >statements above, I will push a v4 with the changes you mentioned in >your reviews. Thank you both for taking the time to review. >>
© 2016 - 2026 Red Hat, Inc.