[PATCH v2 0/1] qemu: add per-vcpu delay stats

Aleksei Zakharov posted 1 patch 3 years, 2 months ago
Test syntax-check failed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/20210204110010.23828-1-zaharov@selectel.ru
There is a newer version of this series
src/qemu/qemu_driver.c | 39 ++++++++++++++++++++++++++++++++++++---
1 file changed, 36 insertions(+), 3 deletions(-)
[PATCH v2 0/1] qemu: add per-vcpu delay stats
Posted by Aleksei Zakharov 3 years, 2 months ago
The previous patch introduced summarized delay time of the vm.
It's worth to collect per-vcpu, so I dropped that code and
added per-vcpu stats.

Also, fixed coding style errors and used glib functions.

Aleksei Zakharov (1):
  qemu: add per-vcpu delay stats

 src/qemu/qemu_driver.c | 39 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 36 insertions(+), 3 deletions(-)

-- 
2.17.1

[PATCH v3 0/1] qemu: add per-vcpu delay stats
Posted by Aleksei Zakharov 3 years, 2 months ago
Fixed an issue in v2:
Close schedstat file after use.

Fixes since v1:
Collect per-vcpu stats.
Coding style errors
Use glib functions.

Aleksei Zakharov (1):
  qemu: add per-vcpu delay stats

 src/qemu/qemu_driver.c | 40 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 3 deletions(-)

-- 
2.17.1

[PATCH v3 1/1] qemu: add per-vcpu delay stats
Posted by Aleksei Zakharov 3 years, 2 months ago
This patch adds delay time (steal time inside guest) to libvirt
domain per-vcpu stats. Delay time is a consequence of the overloaded
CPU. Knowledge of the delay time of a virtual machine helps to react
exactly when needed and rebalance the load between hosts.
This is used by cloud providers to provide quality of service,
especially when the CPU is oversubscripted.

It's more convenient to work with this metric in a context of a
libvirt domain. Any monitoring software may use this information.

Signed-off-by: Aleksei Zakharov <zaharov@selectel.ru>
---
 src/qemu/qemu_driver.c | 40 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 3 deletions(-)

diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 3d54653217..319bc60632 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -1332,6 +1332,29 @@ static char *qemuConnectGetCapabilities(virConnectPtr conn) {
     return virCapabilitiesFormatXML(caps);
 }
 
+static int
+qemuGetSchedstatDelay(unsigned long long *cpudelay,
+                 pid_t pid, pid_t tid)
+{
+    g_autofree char *proc = NULL;
+    unsigned long long oncpu = 0;
+    g_autofree FILE *schedstat = NULL;
+
+    if (tid)
+        proc = g_strdup_printf("/proc/%d/task/%d/schedstat", (int)pid, (int)tid);
+    else
+        proc = g_strdup_printf("/proc/%d/schedstat", (int)pid);
+    if (!proc)
+        return -1;
+
+    schedstat = fopen(proc, "r");
+    if (!schedstat || fscanf(schedstat, "%llu %llu", &oncpu, cpudelay) < 2) {
+            return -1;
+    }
+
+    VIR_FORCE_FCLOSE(schedstat);
+    return 0;
+}
 
 static int
 qemuGetSchedInfo(unsigned long long *cpuWait,
@@ -1470,6 +1493,7 @@ static int
 qemuDomainHelperGetVcpus(virDomainObjPtr vm,
                          virVcpuInfoPtr info,
                          unsigned long long *cpuwait,
+                         unsigned long long *cpudelay,
                          int maxinfo,
                          unsigned char *cpumaps,
                          int maplen)
@@ -1529,6 +1553,11 @@ qemuDomainHelperGetVcpus(virDomainObjPtr vm,
                 return -1;
         }
 
+        if (cpudelay) {
+            if (qemuGetSchedstatDelay(&(cpudelay[ncpuinfo]), vm->pid, vcpupid) < 0)
+                return -1;
+        }
+
         ncpuinfo++;
     }
 
@@ -4873,7 +4902,7 @@ qemuDomainGetVcpus(virDomainPtr dom,
         goto cleanup;
     }
 
-    ret = qemuDomainHelperGetVcpus(vm, info, NULL, maxinfo, cpumaps, maplen);
+    ret = qemuDomainHelperGetVcpus(vm, info, NULL, NULL, maxinfo, cpumaps, maplen);
 
  cleanup:
     virDomainObjEndAPI(&vm);
@@ -17870,7 +17899,6 @@ qemuDomainGetStatsMemoryBandwidth(virQEMUDriverPtr driver,
     return ret;
 }
 
-
 static int
 qemuDomainGetStatsCpuCache(virQEMUDriverPtr driver,
                            virDomainObjPtr dom,
@@ -18060,6 +18088,7 @@ qemuDomainGetStatsVcpu(virQEMUDriverPtr driver,
     int ret = -1;
     virVcpuInfoPtr cpuinfo = NULL;
     g_autofree unsigned long long *cpuwait = NULL;
+    g_autofree unsigned long long *cpudelay = NULL;
 
     if (virTypedParamListAddUInt(params, virDomainDefGetVcpus(dom->def),
                                  "vcpu.current") < 0)
@@ -18071,6 +18100,7 @@ qemuDomainGetStatsVcpu(virQEMUDriverPtr driver,
 
     cpuinfo = g_new0(virVcpuInfo, virDomainDefGetVcpus(dom->def));
     cpuwait = g_new0(unsigned long long, virDomainDefGetVcpus(dom->def));
+    cpudelay = g_new0(unsigned long long, virDomainDefGetVcpus(dom->def));
 
     if (HAVE_JOB(privflags) && virDomainObjIsActive(dom) &&
         qemuDomainRefreshVcpuHalted(driver, dom, QEMU_ASYNC_JOB_NONE) < 0) {
@@ -18079,7 +18109,7 @@ qemuDomainGetStatsVcpu(virQEMUDriverPtr driver,
             virResetLastError();
     }
 
-    if (qemuDomainHelperGetVcpus(dom, cpuinfo, cpuwait,
+    if (qemuDomainHelperGetVcpus(dom, cpuinfo, cpuwait, cpudelay,
                                  virDomainDefGetVcpus(dom->def),
                                  NULL, 0) < 0) {
         virResetLastError();
@@ -18104,6 +18134,10 @@ qemuDomainGetStatsVcpu(virQEMUDriverPtr driver,
                                        "vcpu.%u.wait", cpuinfo[i].number) < 0)
             goto cleanup;
 
+        if (virTypedParamListAddULLong(params, cpudelay[i],
+                                        "vcpu.%u.delay", cpuinfo[i].number) < 0)
+            goto cleanup;
+
         /* state below is extracted from the individual vcpu structs */
         if (!(vcpu = virDomainDefGetVcpu(dom->def, cpuinfo[i].number)))
             continue;
-- 
2.17.1

Re: [PATCH v3 1/1] qemu: add per-vcpu delay stats
Posted by Peter Krempa 3 years, 2 months ago
On Mon, Feb 15, 2021 at 14:21:45 +0300, Aleksei Zakharov wrote:
> This patch adds delay time (steal time inside guest) to libvirt
> domain per-vcpu stats. Delay time is a consequence of the overloaded
> CPU. Knowledge of the delay time of a virtual machine helps to react
> exactly when needed and rebalance the load between hosts.
> This is used by cloud providers to provide quality of service,
> especially when the CPU is oversubscripted.
> 
> It's more convenient to work with this metric in a context of a
> libvirt domain. Any monitoring software may use this information.

This is a code review, I didn't have the chance to look up more on
whether it makes actually sense to expose this.

> 
> Signed-off-by: Aleksei Zakharov <zaharov@selectel.ru>
> ---
>  src/qemu/qemu_driver.c | 40 +++++++++++++++++++++++++++++++++++++---
>  1 file changed, 37 insertions(+), 3 deletions(-)

Also always post new versions of the patches as a new thread, don't
reply to existing threads.

> 
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 3d54653217..319bc60632 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -1332,6 +1332,29 @@ static char *qemuConnectGetCapabilities(virConnectPtr conn) {
>      return virCapabilitiesFormatXML(caps);
>  }
>  
> +static int
> +qemuGetSchedstatDelay(unsigned long long *cpudelay,
> +                 pid_t pid, pid_t tid)
> +{
> +    g_autofree char *proc = NULL;
> +    unsigned long long oncpu = 0;
> +    g_autofree FILE *schedstat = NULL;

This will just g_free() the pointer [1]...

> +
> +    if (tid)
> +        proc = g_strdup_printf("/proc/%d/task/%d/schedstat", (int)pid, (int)tid);
> +    else
> +        proc = g_strdup_printf("/proc/%d/schedstat", (int)pid);
> +    if (!proc)
> +        return -1;

proc can't be NULL here g_strdup_printf either returns a pointer or
aborts.

> +
> +    schedstat = fopen(proc, "r");
> +    if (!schedstat || fscanf(schedstat, "%llu %llu", &oncpu, cpudelay) < 2) {
> +            return -1;

Alignment is off.


...[1] so if fscanf fails the file will not be closed.

The caller also expects that a libvirt error is reported on failure
since it doesn't report own errors, so please make sure you do so.

Additionally similarly to qemuGetSchedInfo I don't think we should make
the failure to open the file fatal since it's just stats.


> +    }
> +
> +    VIR_FORCE_FCLOSE(schedstat);
> +    return 0;
> +}
>  
>  static int
>  qemuGetSchedInfo(unsigned long long *cpuWait,

[...]

> @@ -17870,7 +17899,6 @@ qemuDomainGetStatsMemoryBandwidth(virQEMUDriverPtr driver,
>      return ret;
>  }
>  
> -

Don't delete unrelated whitespace.

>  static int
>  qemuDomainGetStatsCpuCache(virQEMUDriverPtr driver,
>                             virDomainObjPtr dom,

[...]

> @@ -18104,6 +18134,10 @@ qemuDomainGetStatsVcpu(virQEMUDriverPtr driver,
>                                         "vcpu.%u.wait", cpuinfo[i].number) < 0)
>              goto cleanup;
>  
> +        if (virTypedParamListAddULLong(params, cpudelay[i],
> +                                        "vcpu.%u.delay", cpuinfo[i].number) < 0)
> +            goto cleanup;

Addition of a new stats field must be documented both in the public API
docs and the virsh docs. Just look for any of the existing docs in:

src/libvirt-domain.c and docs/manpages/virsh.rst

> +
>          /* state below is extracted from the individual vcpu structs */
>          if (!(vcpu = virDomainDefGetVcpu(dom->def, cpuinfo[i].number)))
>              continue;
> -- 
> 2.17.1
>