[v1] qmp, hmp: statistics subsystem and KVM suport.

[PATCH 1/8] qmp: Support for querying stats

Posted by Paolo Bonzini 3 years, 9 months ago

From: Mark Kanda <mark.kanda@oracle.com>

Introduce QMP support for querying stats. Provide a framework for adding new
stats and support for the following commands:

- query-stats
Returns a list of all stats per target type (only VM and vCPU to start), with
additional options for specifying stat names, vCPU qom paths, and providers.

- query-stats-schemas
Returns a list of stats included in each target type, with an option for
specifying the provider.  The concepts in the schema are based on the
KVM binary stats' own introspection data, just translated to QAPI.

The framework provides a method to register callbacks for these QMP commands.
Most of the work in fact is done by the callbacks, and a large majority of
this patch is new QAPI structs and commands.

The first use-case will be for fd-based KVM stats (in an upcoming patch).

Examples (with fd-based KVM stats):

- Query all VM stats:

{ "execute": "query-stats", "arguments" : { "target": "vm" } }

{ "return": [
     { "provider": "kvm",
       "stats": [
          { "name": "max_mmu_page_hash_collisions", "value": 0 },
          { "name": "max_mmu_rmap_size", "value": 0 },
          { "name": "nx_lpage_splits", "value": 148 },
          ... ] },
     { "provider": "xyz",
       "stats": [ ... ] }
] }

- Query all vCPU stats:

{ "execute": "query-stats", "arguments" : { "target": "vcpu" } }

{ "return": [
     { "provider": "kvm",
       "qom_path": "/machine/unattached/device[0]"
       "stats": [
          { "name": "guest_mode", "value": 0 },
          { "name": "directed_yield_successful", "value": 0 },
          { "name": "directed_yield_attempted", "value": 106 },
          ... ] },
     { "provider": "kvm",
       "qom_path": "/machine/unattached/device[1]"
       "stats": [
          { "name": "guest_mode", "value": 0 },
          { "name": "directed_yield_successful", "value": 0 },
          { "name": "directed_yield_attempted", "value": 106 },
          ... ] },
] }

- Retrieve the schemas:

{ "execute": "query-stats-schemas" }

{ "return": [
    { "provider": "kvm",
      "target": "vcpu",
      "stats": [
         { "name": "guest_mode",
           "unit": "none",
           "base": 10,
           "exponent": 0,
           "type": "instant" },
        { "name": "directed_yield_successful",
           "unit": "none",
           "base": 10,
           "exponent": 0,
           "type": "cumulative" },
        ... ]
    },
    { "provider": "kvm",
      "target": "vm",
      "stats": [
        { "name": "max_mmu_page_hash_collisions",
           "unit": "none",
           "base": 10,
           "exponent": 0,
           "type": "peak" },
        ... ]
    },
    { "provider": "xyz",
      "target": "vm",
      "stats": [ ... ]
    }
] }

Signed-off-by: Mark Kanda <mark.kanda@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/monitor/stats.h |  33 +++++++
 monitor/qmp-cmds.c      |  71 +++++++++++++++
 qapi/meson.build        |   1 +
 qapi/qapi-schema.json   |   1 +
 qapi/stats.json         | 192 ++++++++++++++++++++++++++++++++++++++++
 5 files changed, 298 insertions(+)
 create mode 100644 include/monitor/stats.h
 create mode 100644 qapi/stats.json

diff --git a/include/monitor/stats.h b/include/monitor/stats.h
new file mode 100644
index 0000000000..89552ab06f
--- /dev/null
+++ b/include/monitor/stats.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef STATS_H
+#define STATS_H
+
+#include "qapi/qapi-types-stats.h"
+
+typedef void StatRetrieveFunc(StatsResultList **result, StatsTarget target, Error **errp);
+typedef void SchemaRetrieveFunc(StatsSchemaList **result, Error **errp);
+
+/*
+ * Register callbacks for the QMP query-stats command.
+ *
+ * @stats_fn: routine to query stats:
+ * @schema_fn: routine to query stat schemas:
+ */
+void add_stats_callbacks(StatRetrieveFunc *stats_fn,
+                         SchemaRetrieveFunc *schemas_fn);
+
+/*
+ * Helper routines for adding stats entries to the results lists.
+ */
+void add_stats_entry(StatsResultList **, StatsProvider, const char *id,
+                     StatsList *stats_list);
+void add_stats_schema(StatsSchemaList **, StatsProvider, StatsTarget,
+                      StatsSchemaValueList *);
+
+#endif /* STATS_H */
diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c
index 5e7302cbb9..97825b25fa 100644
--- a/monitor/qmp-cmds.c
+++ b/monitor/qmp-cmds.c
@@ -35,6 +35,7 @@
 #include "qapi/qapi-commands-control.h"
 #include "qapi/qapi-commands-machine.h"
 #include "qapi/qapi-commands-misc.h"
+#include "qapi/qapi-commands-stats.h"
 #include "qapi/qapi-commands-ui.h"
 #include "qapi/type-helpers.h"
 #include "qapi/qmp/qerror.h"
@@ -43,6 +44,7 @@
 #include "hw/acpi/acpi_dev_interface.h"
 #include "hw/intc/intc.h"
 #include "hw/rdma/rdma.h"
+#include "monitor/stats.h"
 
 NameInfo *qmp_query_name(Error **errp)
 {
@@ -426,3 +428,72 @@ HumanReadableText *qmp_x_query_irq(Error **errp)
 
     return human_readable_text_from_str(buf);
 }
+
+typedef struct StatsCallbacks {
+    StatRetrieveFunc *stats_cb;
+    SchemaRetrieveFunc *schemas_cb;
+    QTAILQ_ENTRY(StatsCallbacks) next;
+} StatsCallbacks;
+
+static QTAILQ_HEAD(, StatsCallbacks) stats_callbacks =
+    QTAILQ_HEAD_INITIALIZER(stats_callbacks);
+
+void add_stats_callbacks(StatRetrieveFunc *stats_fn,
+                         SchemaRetrieveFunc *schemas_fn)
+{
+    StatsCallbacks *entry = g_new(StatsCallbacks, 1);
+    entry->stats_cb = stats_fn;
+    entry->schemas_cb = schemas_fn;
+
+    QTAILQ_INSERT_TAIL(&stats_callbacks, entry, next);
+}
+
+StatsResultList *qmp_query_stats(StatsFilter *filter, Error **errp)
+{
+    StatsResultList *stats_results = NULL;
+    StatsCallbacks *entry;
+
+    QTAILQ_FOREACH(entry, &stats_callbacks, next) {
+        entry->stats_cb(&stats_results, filter->target, errp);
+    }
+
+    return stats_results;
+}
+
+StatsSchemaList *qmp_query_stats_schemas(Error **errp)
+{
+    StatsSchemaList *stats_results = NULL;
+    StatsCallbacks *entry;
+
+    QTAILQ_FOREACH(entry, &stats_callbacks, next) {
+        entry->schemas_cb(&stats_results, errp);
+    }
+
+    return stats_results;
+}
+
+void add_stats_entry(StatsResultList **stats_results, StatsProvider provider,
+                     const char *qom_path, StatsList *stats_list)
+{
+    StatsResult *entry = g_new0(StatsResult, 1);
+    entry->provider = provider;
+    if (qom_path) {
+        entry->has_qom_path = true;
+        entry->qom_path = g_strdup(qom_path);
+    }
+    entry->stats = stats_list;
+
+    QAPI_LIST_PREPEND(*stats_results, entry);
+}
+
+void add_stats_schema(StatsSchemaList **schema_results,
+                      StatsProvider provider, StatsTarget target,
+                      StatsSchemaValueList *stats_list)
+{
+    StatsSchema *entry = g_new0(StatsSchema, 1);
+
+    entry->provider = provider;
+    entry->target = target;
+    entry->stats = stats_list;
+    QAPI_LIST_PREPEND(*schema_results, entry);
+}
diff --git a/qapi/meson.build b/qapi/meson.build
index 656ef0e039..fd5c93d643 100644
--- a/qapi/meson.build
+++ b/qapi/meson.build
@@ -46,6 +46,7 @@ qapi_all_modules = [
   'replay',
   'run-state',
   'sockets',
+  'stats',
   'trace',
   'transaction',
   'yank',
diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json
index 4912b9744e..92d7ecc52c 100644
--- a/qapi/qapi-schema.json
+++ b/qapi/qapi-schema.json
@@ -93,3 +93,4 @@
 { 'include': 'audio.json' }
 { 'include': 'acpi.json' }
 { 'include': 'pci.json' }
+{ 'include': 'stats.json' }
diff --git a/qapi/stats.json b/qapi/stats.json
new file mode 100644
index 0000000000..7454dd7daa
--- /dev/null
+++ b/qapi/stats.json
@@ -0,0 +1,192 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+# Copyright (c) 2022 Oracle and/or its affiliates.
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+##
+# = Statistics
+##
+
+##
+# @StatsType:
+#
+# Enumeration of statistics types
+#
+# @cumulative: stat is cumulative; value can only increase.
+# @instant: stat is instantaneous; value can increase or decrease.
+# @peak: stat is the peak value; value can only increase.
+# @linear-hist: stat is a linear histogram.
+# @log-hist: stat is a logarithmic histogram.
+#
+# Since: 7.1
+##
+{ 'enum' : 'StatsType',
+  'data' : [ 'cumulative', 'instant', 'peak', 'linear-hist', 'log-hist' ] }
+
+##
+# @StatsUnit:
+#
+# Enumeration of unit of measurement for statistics
+#
+# @bytes: stat reported in bytes.
+# @seconds: stat reported in seconds.
+# @cycles: stat reported in clock cycles.
+#
+# Since: 7.1
+##
+{ 'enum' : 'StatsUnit',
+  'data' : [ 'bytes', 'seconds', 'cycles' ] }
+
+##
+# @StatsProvider:
+#
+# Enumeration of statistics providers.
+#
+# Since: 7.1
+##
+{ 'enum': 'StatsProvider',
+  'data': [ ] }
+
+##
+# @StatsTarget:
+#
+# The kinds of objects on which one can request statistics.
+#
+# @vm: the entire virtual machine.
+# @vcpu: a virtual CPU.
+#
+# Since: 7.1
+##
+{ 'enum': 'StatsTarget',
+  'data': [ 'vm', 'vcpu' ] }
+
+##
+# @StatsFilter:
+#
+# The arguments to the query-stats command; specifies a target for which to
+# request statistics, and which statistics are requested from each provider.
+#
+# Since: 7.1
+##
+{ 'struct': 'StatsFilter',
+  'data': { 'target': 'StatsTarget' } }
+
+##
+# @StatsValue:
+#
+# @scalar: single uint64.
+# @list: list of uint64.
+#
+# Since: 7.1
+##
+{ 'alternate': 'StatsValue',
+  'data': { 'scalar': 'uint64',
+            'list': [ 'uint64' ] } }
+
+##
+# @Stats:
+#
+# @name: name of stat.
+# @value: stat value.
+#
+# Since: 7.1
+##
+{ 'struct': 'Stats',
+  'data': { 'name': 'str',
+            'value' : 'StatsValue' } }
+
+##
+# @StatsResult:
+#
+# @provider: provider for this set of statistics.
+# @qom-path: QOM path of the object for which the statistics are returned
+# @stats: list of statistics.
+#
+# Since: 7.1
+##
+{ 'struct': 'StatsResult',
+  'data': { 'provider': 'StatsProvider',
+            '*qom-path': 'str',
+            'stats': [ 'Stats' ] } }
+
+##
+# @query-stats:
+#
+# Return runtime-collected statistics for objects such as the
+# VM or its vCPUs.
+#
+# The arguments are a StatsFilter and specify the provider and objects
+# to return statistics about.
+#
+# Returns: a list of StatsResult, one for each provider and object
+#          (e.g., for each vCPU).
+#
+# Since: 7.1
+##
+{ 'command': 'query-stats',
+  'data': 'StatsFilter',
+  'boxed': true,
+  'returns': [ 'StatsResult' ] }
+
+##
+# @StatsSchemaValue:
+#
+# Schema for a single statistic.
+#
+# @name: stat name.
+#
+# @type: kind of statistic, a @StatType.
+#
+# @unit: base unit of measurement for the statistics @StatUnit.
+#
+# @base: base for the multiple of @unit that the statistic uses, either 2 or 10.
+#        Only present if @exponent is non-zero.
+#
+# @exponent: exponent for the multiple of @unit that the statistic uses
+#
+# @bucket-size: Used with linear-hist to report the width of each bucket
+#               of the histogram.
+#
+# Since: 7.1
+##
+{ 'struct': 'StatsSchemaValue',
+  'data': { 'name': 'str',
+            'type': 'StatsType',
+            '*unit': 'StatsUnit',
+            '*base': 'int8',
+            'exponent': 'int16',
+            '*bucket-size': 'uint32' } }
+
+##
+# @StatsSchema:
+#
+# Schema for all available statistics for a provider and target.
+#
+# @provider: provider for this set of statistics.
+#
+# @target: kind of object that can be queried through this provider.
+#
+# @stats: list of statistics.
+#
+# Since: 7.1
+##
+{ 'struct': 'StatsSchema',
+  'data': { 'provider': 'StatsProvider',
+            'target': 'StatsTarget',
+            'stats': [ 'StatsSchemaValue' ] } }
+
+##
+# @query-stats-schemas:
+#
+# Return the schema for all available runtime-collected statistics.
+#
+# Since: 7.1
+##
+{ 'command': 'query-stats-schemas',
+  'data': { },
+  'returns': [ 'StatsSchema' ] }
-- 
2.35.1

Re: [PATCH 1/8] qmp: Support for querying stats

Posted by Markus Armbruster 3 years, 9 months ago

Paolo Bonzini <pbonzini@redhat.com> writes:

> From: Mark Kanda <mark.kanda@oracle.com>
>
> Introduce QMP support for querying stats. Provide a framework for adding new
> stats and support for the following commands:
>
> - query-stats
> Returns a list of all stats per target type (only VM and vCPU to start), with
> additional options for specifying stat names, vCPU qom paths, and providers.
>
> - query-stats-schemas
> Returns a list of stats included in each target type, with an option for
> specifying the provider.  The concepts in the schema are based on the
> KVM binary stats' own introspection data, just translated to QAPI.

The second sentence helps build the case for "we actually need this
stuff".

Can you point to existing uses of KVM binary stats introspection data?

>
> The framework provides a method to register callbacks for these QMP commands.
> Most of the work in fact is done by the callbacks, and a large majority of
> this patch is new QAPI structs and commands.
>
> The first use-case will be for fd-based KVM stats (in an upcoming patch).
>
> Examples (with fd-based KVM stats):
>
> - Query all VM stats:
>
> { "execute": "query-stats", "arguments" : { "target": "vm" } }
>
> { "return": [
>      { "provider": "kvm",
>        "stats": [
>           { "name": "max_mmu_page_hash_collisions", "value": 0 },
>           { "name": "max_mmu_rmap_size", "value": 0 },
>           { "name": "nx_lpage_splits", "value": 148 },
>           ... ] },
>      { "provider": "xyz",
>        "stats": [ ... ] }
> ] }
>
> - Query all vCPU stats:
>
> { "execute": "query-stats", "arguments" : { "target": "vcpu" } }
>
> { "return": [
>      { "provider": "kvm",
>        "qom_path": "/machine/unattached/device[0]"
>        "stats": [
>           { "name": "guest_mode", "value": 0 },
>           { "name": "directed_yield_successful", "value": 0 },
>           { "name": "directed_yield_attempted", "value": 106 },
>           ... ] },
>      { "provider": "kvm",
>        "qom_path": "/machine/unattached/device[1]"
>        "stats": [
>           { "name": "guest_mode", "value": 0 },
>           { "name": "directed_yield_successful", "value": 0 },
>           { "name": "directed_yield_attempted", "value": 106 },
>           ... ] },
> ] }
>
> - Retrieve the schemas:
>
> { "execute": "query-stats-schemas" }
>
> { "return": [
>     { "provider": "kvm",
>       "target": "vcpu",
>       "stats": [
>          { "name": "guest_mode",
>            "unit": "none",
>            "base": 10,
>            "exponent": 0,
>            "type": "instant" },
>         { "name": "directed_yield_successful",
>            "unit": "none",
>            "base": 10,
>            "exponent": 0,
>            "type": "cumulative" },
>         ... ]
>     },
>     { "provider": "kvm",
>       "target": "vm",
>       "stats": [
>         { "name": "max_mmu_page_hash_collisions",
>            "unit": "none",
>            "base": 10,
>            "exponent": 0,
>            "type": "peak" },
>         ... ]
>     },
>     { "provider": "xyz",
>       "target": "vm",
>       "stats": [ ... ]
>     }
> ] }
>
> Signed-off-by: Mark Kanda <mark.kanda@oracle.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

[...]

> diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json
> index 4912b9744e..92d7ecc52c 100644
> --- a/qapi/qapi-schema.json
> +++ b/qapi/qapi-schema.json
> @@ -93,3 +93,4 @@
>  { 'include': 'audio.json' }
>  { 'include': 'acpi.json' }
>  { 'include': 'pci.json' }
> +{ 'include': 'stats.json' }
> diff --git a/qapi/stats.json b/qapi/stats.json
> new file mode 100644
> index 0000000000..7454dd7daa
> --- /dev/null
> +++ b/qapi/stats.json
> @@ -0,0 +1,192 @@
> +# -*- Mode: Python -*-
> +# vim: filetype=python
> +#
> +# Copyright (c) 2022 Oracle and/or its affiliates.
> +#
> +# This work is licensed under the terms of the GNU GPL, version 2 or later.
> +# See the COPYING file in the top-level directory.
> +#
> +# SPDX-License-Identifier: GPL-2.0-or-later
> +
> +##
> +# = Statistics
> +##
> +
> +##
> +# @StatsType:
> +#
> +# Enumeration of statistics types
> +#
> +# @cumulative: stat is cumulative; value can only increase.
> +# @instant: stat is instantaneous; value can increase or decrease.
> +# @peak: stat is the peak value; value can only increase.
> +# @linear-hist: stat is a linear histogram.
> +# @log-hist: stat is a logarithmic histogram.

For better or worse, we tend to eschew abbreviations in schema
identifiers.  Would you mind @linear-histogram and @log-histogram?

> +#
> +# Since: 7.1
> +##
> +{ 'enum' : 'StatsType',
> +  'data' : [ 'cumulative', 'instant', 'peak', 'linear-hist', 'log-hist' ] }
> +
> +##
> +# @StatsUnit:
> +#
> +# Enumeration of unit of measurement for statistics
> +#
> +# @bytes: stat reported in bytes.
> +# @seconds: stat reported in seconds.
> +# @cycles: stat reported in clock cycles.
> +#
> +# Since: 7.1
> +##
> +{ 'enum' : 'StatsUnit',
> +  'data' : [ 'bytes', 'seconds', 'cycles' ] }
> +
> +##
> +# @StatsProvider:
> +#
> +# Enumeration of statistics providers.
> +#
> +# Since: 7.1
> +##
> +{ 'enum': 'StatsProvider',
> +  'data': [ ] }
> +
> +##
> +# @StatsTarget:
> +#
> +# The kinds of objects on which one can request statistics.
> +#
> +# @vm: the entire virtual machine.
> +# @vcpu: a virtual CPU.
> +#
> +# Since: 7.1
> +##
> +{ 'enum': 'StatsTarget',
> +  'data': [ 'vm', 'vcpu' ] }

Do VM stats include vCPU stats?  "Entire virtual machine" suggests they
do...

> +
> +##
> +# @StatsFilter:
> +#
> +# The arguments to the query-stats command; specifies a target for which to
> +# request statistics, and which statistics are requested from each provider.
> +#
> +# Since: 7.1
> +##
> +{ 'struct': 'StatsFilter',
> +  'data': { 'target': 'StatsTarget' } }

The "and which statistics" part will be implemented later in this
series?

> +
> +##
> +# @StatsValue:
> +#
> +# @scalar: single uint64.
> +# @list: list of uint64.

Recommend to spell out uint64 as "unsigned 64 bit integer".

> +#
> +# Since: 7.1
> +##
> +{ 'alternate': 'StatsValue',
> +  'data': { 'scalar': 'uint64',
> +            'list': [ 'uint64' ] } }
> +
> +##
> +# @Stats:
> +#
> +# @name: name of stat.
> +# @value: stat value.
> +#
> +# Since: 7.1
> +##
> +{ 'struct': 'Stats',
> +  'data': { 'name': 'str',
> +            'value' : 'StatsValue' } }
> +
> +##
> +# @StatsResult:
> +#
> +# @provider: provider for this set of statistics.
> +# @qom-path: QOM path of the object for which the statistics are returned
> +# @stats: list of statistics.
> +#
> +# Since: 7.1
> +##
> +{ 'struct': 'StatsResult',
> +  'data': { 'provider': 'StatsProvider',
> +            '*qom-path': 'str',

When exactly will @qom-path be present?

> +            'stats': [ 'Stats' ] } }
> +
> +##
> +# @query-stats:
> +#
> +# Return runtime-collected statistics for objects such as the
> +# VM or its vCPUs.
> +#
> +# The arguments are a StatsFilter and specify the provider and objects
> +# to return statistics about.
> +#
> +# Returns: a list of StatsResult, one for each provider and object
> +#          (e.g., for each vCPU).
> +#
> +# Since: 7.1
> +##
> +{ 'command': 'query-stats',
> +  'data': 'StatsFilter',
> +  'boxed': true,
> +  'returns': [ 'StatsResult' ] }
> +
> +##
> +# @StatsSchemaValue:
> +#
> +# Schema for a single statistic.
> +#
> +# @name: stat name.
> +#
> +# @type: kind of statistic, a @StatType.

Generated documentation looks like

       type: StatsType
              kind of statistic, a StatType.

I think ", a @StatType" should be dropped.

If we decide to keep it: @StatsType.

> +#
> +# @unit: base unit of measurement for the statistics @StatUnit.

"@StatUnit", too.

If we decide to keep it: @StatsUnit.

@unit is optional.  What's the default?

> +#
> +# @base: base for the multiple of @unit that the statistic uses, either 2 or 10.
> +#        Only present if @exponent is non-zero.
> +#
> +# @exponent: exponent for the multiple of @unit that the statistic uses

Alright, given a stat value 42, what does it mean for the possible
combinations of @base and @exponent?

> +#
> +# @bucket-size: Used with linear-hist to report the width of each bucket
> +#               of the histogram.

Feels too terse.  Example, perhaps?

I assume @bucket-size is present exactly when @type is @linear-hist.
Correct?

> +#
> +# Since: 7.1
> +##
> +{ 'struct': 'StatsSchemaValue',
> +  'data': { 'name': 'str',
> +            'type': 'StatsType',
> +            '*unit': 'StatsUnit',
> +            '*base': 'int8',
> +            'exponent': 'int16',
> +            '*bucket-size': 'uint32' } }
> +
> +##
> +# @StatsSchema:
> +#
> +# Schema for all available statistics for a provider and target.
> +#
> +# @provider: provider for this set of statistics.
> +#
> +# @target: kind of object that can be queried through this provider.
> +#
> +# @stats: list of statistics.
> +#
> +# Since: 7.1
> +##
> +{ 'struct': 'StatsSchema',
> +  'data': { 'provider': 'StatsProvider',
> +            'target': 'StatsTarget',
> +            'stats': [ 'StatsSchemaValue' ] } }

How am I to connect each element of the result of query-stats to an
element of the result of query-stats-schema?

> +
> +##
> +# @query-stats-schemas:
> +#
> +# Return the schema for all available runtime-collected statistics.
> +#
> +# Since: 7.1
> +##
> +{ 'command': 'query-stats-schemas',
> +  'data': { },
> +  'returns': [ 'StatsSchema' ] }

Re: [PATCH 1/8] qmp: Support for querying stats

Posted by Paolo Bonzini 3 years, 9 months ago


Il 4 maggio 2022 15:22:27 CEST, Markus Armbruster <armbru@redhat.com> ha scritto:
>Can you point to existing uses of KVM binary stats introspection data?

There's none, but Google is using it in house. The same data was available before in debugfs and available via the kvm_stat script, so you could also refer to Christian Borntraeger's KVM Forum 2019 talk. The problems with debugfs are basically that it's only available to root and is disabled by secure boot (both issues are not fixable on general because they are Linux policy).

>> index 4912b9744e..92d7ecc52c 100644
>> --- a/qapi/qapi-schema.json
>> +++ b/qapi/qapi-schema.json
>> @@ -93,3 +93,4 @@
>>  { 'include': 'audio.json' }
>>  { 'include': 'acpi.json' }
>>  { 'include': 'pci.json' }
>> +{ 'include': 'stats.json' }
>> diff --git a/qapi/stats.json b/qapi/stats.json
>> new file mode 100644
>> index 0000000000..7454dd7daa
>> --- /dev/null
>> +++ b/qapi/stats.json
>> @@ -0,0 +1,192 @@
>> +# -*- Mode: Python -*-
>> +# vim: filetype=python
>> +#
>> +# Copyright (c) 2022 Oracle and/or its affiliates.
>> +#
>> +# This work is licensed under the terms of the GNU GPL, version 2 or later.
>> +# See the COPYING file in the top-level directory.
>> +#
>> +# SPDX-License-Identifier: GPL-2.0-or-later
>> +
>> +##
>> +# = Statistics
>> +##
>> +
>> +##
>> +# @StatsType:
>> +#
>> +# Enumeration of statistics types
>> +#
>> +# @cumulative: stat is cumulative; value can only increase.
>> +# @instant: stat is instantaneous; value can increase or decrease.
>> +# @peak: stat is the peak value; value can only increase.
>> +# @linear-hist: stat is a linear histogram.
>> +# @log-hist: stat is a logarithmic histogram.
>
>For better or worse, we tend to eschew abbreviations in schema
>identifiers.  Would you mind @linear-histogram and @log-histogram?

Sure.


>> +# Since: 7.1
>> +##
>> +{ 'enum': 'StatsTarget',
>> +  'data': [ 'vm', 'vcpu' ] }
>
>Do VM stats include vCPU stats?  "Entire virtual machine" suggests they
>do...

No, they don't. They are a different sets of data that is gathered on resources shared by the whole VM. Stuff such as "# of pages taken by the KVM page tables" goes there because VCPUs share a single copy of the page tables, as opposed to "# of page faults" which is a VCPU stat.

>> +# The arguments to the query-stats command; specifies a target for which to
>> +# request statistics, and which statistics are requested from each provider.
>> +#
>> +# Since: 7.1
>> +##
>> +{ 'struct': 'StatsFilter',
>> +  'data': { 'target': 'StatsTarget' } }
>
>The "and which statistics" part will be implemented later in this
>series?

Oh, indeed it is. Thanks for noticing.

>> +{ 'struct': 'StatsResult',
>> +  'data': { 'provider': 'StatsProvider',
>> +            '*qom-path': 'str',
>
>When exactly will @qom-path be present?

Only if the target is vcpus, for the current set of targets. Because the target is in the command I am not repeating it here with another discriminated record.

>> +# @type: kind of statistic, a @StatType.
>
>Generated documentation looks like
>
>       type: StatsType
>              kind of statistic, a StatType.
>
>I think ", a @StatType" should be dropped.
>
>If we decide to keep it: @StatsType.

Gotcha.

>
>> +#
>> +# @unit: base unit of measurement for the statistics @StatUnit.
>
>"@StatUnit", too.
>
>If we decide to keep it: @StatsUnit.
>
>@unit is optional.  What's the default?

The stat is an adimensional number: a count of events such a page faults, or the maximum length of a bucket in a hash table,  etc. It's actually the common case.

>> +# @base: base for the multiple of @unit that the statistic uses, either 2 or 10.
>> +#        Only present if @exponent is non-zero.
>> +#
>> +# @exponent: exponent for the multiple of @unit that the statistic uses
>
>Alright, given a stat value 42, what does it mean for the possible
>combinations of @base and @exponent?

Base and exponent are used to represent units like KiB, nanoseconds, etc.

>> +# @bucket-size: Used with linear-hist to report the width of each bucket
>> +#               of the histogram.
>
>Feels too terse.  Example, perhaps?
>
>I assume @bucket-size is present exactly when @type is @linear-hist.
>Correct?

Yep, will expand.

>> +##
>> +# @StatsSchema:
>> +#
>> +# Schema for all available statistics for a provider and target.
>> +#
>> +# @provider: provider for this set of statistics.
>> +#
>> +# @target: kind of object that can be queried through this provider.
>> +#
>> +# @stats: list of statistics.
>> +#
>> +# Since: 7.1
>> +##
>> +{ 'struct': 'StatsSchema',
>> +  'data': { 'provider': 'StatsProvider',
>> +            'target': 'StatsTarget',
>> +            'stats': [ 'StatsSchemaValue' ] } }
>
>How am I to connect each element of the result of query-stats to an
>element of the result of query-stats-schema?

You gave the target to query-stats and the result of query-stats has the provider and name. Target+provider+name uniquely identify a StatsSchemaValue in the result of query-stats-schemas.

Paolo

>
>> +
>> +##
>> +# @query-stats-schemas:
>> +#
>> +# Return the schema for all available runtime-collected statistics.
>> +#
>> +# Since: 7.1
>> +##
>> +{ 'command': 'query-stats-schemas',
>> +  'data': { },
>> +  'returns': [ 'StatsSchema' ] }
>

Re: [PATCH 1/8] qmp: Support for querying stats

Posted by Markus Armbruster 3 years, 9 months ago

Paolo Bonzini <pbonzini@redhat.com> writes:

> Il 4 maggio 2022 15:22:27 CEST, Markus Armbruster <armbru@redhat.com> ha scritto:
>>Can you point to existing uses of KVM binary stats introspection data?
>
> There's none, but Google is using it in house. The same data was available before in debugfs and available via the kvm_stat script, so you could also refer to Christian Borntraeger's KVM Forum 2019 talk. The problems with debugfs are basically that it's only available to root and is disabled by secure boot (both issues are not fixable on general because they are Linux policy).

I keep bothering you about use cases, because I'm habitually opposed to
adding features without credible use cases.

For small features, a bit of plausible hand-waving can suffice, but this
one isn't small enough for that.

Plausible hand-waving can sometimes suffice for *experimental* features.
Say when the use case can't really materialize without the feature.

Double-checking (pardon my ignorance): we're basically exposing the host
kernel's KVM stats via QMP, with the option of extending it to other
sources of stats in the future.  Correct?

I think the argument for accepting the interface is basically "if it's
good enough for the kernel, it's good enough for us".  Valid point.

This means we'll acquire yet another introspection system, unrelated to
the introspection systems we already have in QEMU.

There is overlap.  Quite a few query- commands return stats.  Should
they be redone as statistics provides in this new introspection system?

Assuming the answer is no for at least some of them: what kind of stats
should go where?

I'd love to have a better feel for future directions before comitting to
this.

>>> index 4912b9744e..92d7ecc52c 100644
>>> --- a/qapi/qapi-schema.json
>>> +++ b/qapi/qapi-schema.json
>>> @@ -93,3 +93,4 @@
>>>  { 'include': 'audio.json' }
>>>  { 'include': 'acpi.json' }
>>>  { 'include': 'pci.json' }
>>> +{ 'include': 'stats.json' }
>>> diff --git a/qapi/stats.json b/qapi/stats.json
>>> new file mode 100644
>>> index 0000000000..7454dd7daa
>>> --- /dev/null
>>> +++ b/qapi/stats.json
>>> @@ -0,0 +1,192 @@
>>> +# -*- Mode: Python -*-
>>> +# vim: filetype=python
>>> +#
>>> +# Copyright (c) 2022 Oracle and/or its affiliates.
>>> +#
>>> +# This work is licensed under the terms of the GNU GPL, version 2 or later.
>>> +# See the COPYING file in the top-level directory.
>>> +#
>>> +# SPDX-License-Identifier: GPL-2.0-or-later
>>> +
>>> +##
>>> +# = Statistics
>>> +##
>>> +
>>> +##
>>> +# @StatsType:
>>> +#
>>> +# Enumeration of statistics types
>>> +#
>>> +# @cumulative: stat is cumulative; value can only increase.
>>> +# @instant: stat is instantaneous; value can increase or decrease.
>>> +# @peak: stat is the peak value; value can only increase.
>>> +# @linear-hist: stat is a linear histogram.
>>> +# @log-hist: stat is a logarithmic histogram.
>>
>>For better or worse, we tend to eschew abbreviations in schema
>>identifiers.  Would you mind @linear-histogram and @log-histogram?
>
> Sure.
>
>
>>> +# Since: 7.1
>>> +##
>>> +{ 'enum': 'StatsTarget',
>>> +  'data': [ 'vm', 'vcpu' ] }
>>
>>Do VM stats include vCPU stats?  "Entire virtual machine" suggests they
>>do...
>
> No, they don't. They are a different sets of data that is gathered on resources shared by the whole VM. Stuff such as "# of pages taken by the KVM page tables" goes there because VCPUs share a single copy of the page tables, as opposed to "# of page faults" which is a VCPU stat.

I'm fine with whatever partition you think is useful, I'm just pointing
out that to me the documentation suggests something that ain't :)

>>> +# The arguments to the query-stats command; specifies a target for which to
>>> +# request statistics, and which statistics are requested from each provider.
>>> +#
>>> +# Since: 7.1
>>> +##
>>> +{ 'struct': 'StatsFilter',
>>> +  'data': { 'target': 'StatsTarget' } }
>>
>>The "and which statistics" part will be implemented later in this
>>series?
>
> Oh, indeed it is. Thanks for noticing.
>
>>> +{ 'struct': 'StatsResult',
>>> +  'data': { 'provider': 'StatsProvider',
>>> +            '*qom-path': 'str',
>>
>>When exactly will @qom-path be present?
>
> Only if the target is vcpus, for the current set of targets. Because the target is in the command I am not repeating it here with another discriminated record.

Needs to be documented then.

Alternatively, maybe: the "QOM path of the object for which the
statistics are returned" could be "/" or "/machine" when the object is
the VM.

>>> +# @type: kind of statistic, a @StatType.
>>
>>Generated documentation looks like
>>
>>       type: StatsType
>>              kind of statistic, a StatType.
>>
>>I think ", a @StatType" should be dropped.
>>
>>If we decide to keep it: @StatsType.
>
> Gotcha.
>
>>
>>> +#
>>> +# @unit: base unit of measurement for the statistics @StatUnit.
>>
>>"@StatUnit", too.
>>
>>If we decide to keep it: @StatsUnit.
>>
>>@unit is optional.  What's the default?
>
> The stat is an adimensional number: a count of events such a page faults, or the maximum length of a bucket in a hash table,  etc. It's actually the common case.

I've come to prefer defaulting to a value over giving "absent" its own
meaning.  However, own meaning is somewhat entrenched in the schema
language and its usage, and "absent @unit means adimensional" is kind of
fitting, so I'm not objecting.  I am asking for better documentation,
though :)

>>> +# @base: base for the multiple of @unit that the statistic uses, either 2 or 10.
>>> +#        Only present if @exponent is non-zero.
>>> +#
>>> +# @exponent: exponent for the multiple of @unit that the statistic uses
>>
>>Alright, given a stat value 42, what does it mean for the possible
>>combinations of @base and @exponent?
>
> Base and exponent are used to represent units like KiB, nanoseconds, etc.

Put that in doc comments, please.

>>> +# @bucket-size: Used with linear-hist to report the width of each bucket
>>> +#               of the histogram.
>>
>>Feels too terse.  Example, perhaps?
>>
>>I assume @bucket-size is present exactly when @type is @linear-hist.
>>Correct?
>
> Yep, will expand.
>
>>> +##
>>> +# @StatsSchema:
>>> +#
>>> +# Schema for all available statistics for a provider and target.
>>> +#
>>> +# @provider: provider for this set of statistics.
>>> +#
>>> +# @target: kind of object that can be queried through this provider.
>>> +#
>>> +# @stats: list of statistics.
>>> +#
>>> +# Since: 7.1
>>> +##
>>> +{ 'struct': 'StatsSchema',
>>> +  'data': { 'provider': 'StatsProvider',
>>> +            'target': 'StatsTarget',
>>> +            'stats': [ 'StatsSchemaValue' ] } }
>>
>>How am I to connect each element of the result of query-stats to an
>>element of the result of query-stats-schema?
>
> You gave the target to query-stats and the result of query-stats has the provider and name. Target+provider+name uniquely identify a StatsSchemaValue in the result of query-stats-schemas.

Can we have that spelled out in documentation?

Doc comments or something under docs/, up to you.

>
> Paolo
>
>>
>>> +
>>> +##
>>> +# @query-stats-schemas:
>>> +#
>>> +# Return the schema for all available runtime-collected statistics.
>>> +#
>>> +# Since: 7.1
>>> +##
>>> +{ 'command': 'query-stats-schemas',
>>> +  'data': { },
>>> +  'returns': [ 'StatsSchema' ] }
>>

Re: [PATCH 1/8] qmp: Support for querying stats

Posted by Paolo Bonzini 3 years, 9 months ago

On 5/5/22 15:28, Markus Armbruster wrote:
> Double-checking (pardon my ignorance): we're basically exposing the host
> kernel's KVM stats via QMP, with the option of extending it to other
> sources of stats in the future.  Correct?

Yes.  As long as KVM is the only source, it's basically an opaque 1:1 
mapping of what the kernel gives.

> I think the argument for accepting the interface is basically "if it's
> good enough for the kernel, it's good enough for us".  Valid point.

Also, it was designed from the beginning to be extensible to other 
_kernel_ subsystems as well; i.e. it's not virt-specific in any way.

There is one important point: theoretically, stats names are not part of 
the kernel API.  In practice, you know what the chief penguin thinks of 
breaking userspace and anyway I don't think any of the stats have ever 
been removed when they were in debugfs (which makes them even less of a 
stable API).

For a similar situation see https://lwn.net/Articles/737530/: kernel 
developers hate that tracepoints are part of the stable API, but in 
practice they are (and stats are much harder to break than tracepoints, 
if it's worth exposing them to userspace in the first place).

> This means we'll acquire yet another introspection system, unrelated to
> the introspection systems we already have in QEMU.
> 
> There is overlap.  Quite a few query- commands return stats.  Should
> they be redone as statistics provides in this new introspection system?

I think so, potentially all of them can be moved.  Whether it is worth 
doing it is another story.

In addition, query-stats provides a home for TCG statistics that 
currently QMP exposes only via x- commands; they can be added without 
having to design the whole QAPI thing, and with a slightly less strong 
guarantee of stability.

> Alternatively, maybe: the "QOM path of the object for which the
> statistics are returned" could be "/" or "/machine" when the object is
> the VM.

I like that in principle, however it's not possible to make qom_path 
mandatory.  For example block devices would not have a QOM path.
>> The stat is an adimensional number: a count of events such a page faults, or the maximum length of a bucket in a hash table,  etc. It's actually the common case.
> 
> I've come to prefer defaulting to a value over giving "absent" its own
> meaning.  However, own meaning is somewhat entrenched in the schema
> language and its usage, and "absent @unit means adimensional" is kind of
> fitting, so I'm not objecting.  I am asking for better documentation,
> though :)

Will document.

>>>> +# @base: base for the multiple of @unit that the statistic uses, either 2 or 10.
>>>> +#        Only present if @exponent is non-zero.
>>>> +#
>>>> +# @exponent: exponent for the multiple of @unit that the statistic uses
>>>
>>> Alright, given a stat value 42, what does it mean for the possible
>>> combinations of @base and @exponent?
>>
>> Base and exponent are used to represent units like KiB, nanoseconds, etc.
> 
> Put that in doc comments, please.

Ok, I'll make an example.

>>> How am I to connect each element of the result of query-stats to an
>>> element of the result of query-stats-schema?
>>
>> You gave the target to query-stats and the result of query-stats has the provider and name. Target+provider+name uniquely identify a StatsSchemaValue in the result of query-stats-schemas.
> 
> Can we have that spelled out in documentation?
> 
> Doc comments or something under docs/, up to you.

Hmm, it seemed obvious but I can add something to StatsSchemaValue.

Paolo

Re: [PATCH 1/8] qmp: Support for querying stats

Posted by Markus Armbruster 3 years, 9 months ago

Paolo Bonzini <pbonzini@redhat.com> writes:

> On 5/5/22 15:28, Markus Armbruster wrote:
>> Double-checking (pardon my ignorance): we're basically exposing the host
>> kernel's KVM stats via QMP, with the option of extending it to other
>> sources of stats in the future.  Correct?
>
> Yes.  As long as KVM is the only source, it's basically an opaque 1:1
> mapping of what the kernel gives.

I'd like this to be captured in documentation and / or a commit message,
because ...

>> I think the argument for accepting the interface is basically "if it's
>> good enough for the kernel, it's good enough for us".  Valid point.
>
> Also, it was designed from the beginning to be extensible to other
> _kernel_ subsystems as well; i.e. it's not virt-specific in any way.
>
> There is one important point: theoretically, stats names are not part
> of the kernel API.  In practice, you know what the chief penguin
> thinks of breaking userspace and anyway I don't think any of the stats
> have ever been removed when they were in debugfs (which makes them
> even less of a stable API).
>
> For a similar situation see https://lwn.net/Articles/737530/: kernel
> developers hate that tracepoints are part of the stable API, but in 
> practice they are (and stats are much harder to break than
> tracepoints, if it's worth exposing them to userspace in the first
> place).
>
>> This means we'll acquire yet another introspection system, unrelated to
>> the introspection systems we already have in QEMU.

... ^^^ needs justification.  Explain why passing the kernel's
existing interface through QEMU is useful, and to whom.

>> There is overlap.  Quite a few query- commands return stats.  Should
>> they be redone as statistics provides in this new introspection system?
>
> I think so, potentially all of them can be moved.  Whether it is worth
> doing it is another story.
>
> In addition, query-stats provides a home for TCG statistics that
> currently QMP exposes only via x- commands; they can be added without 
> having to design the whole QAPI thing, and with a slightly less strong
> guarantee of stability.

How strong do we feel about the stability of the stats exposed by this
command?  Separate answers for *structure* of the stats and concrete
stats.

If we're confident neither structure nor concrete stats will change
incompatibly, the commands are stable without reservations.

If we're confident the structure is stable, but unable or unwilling to
commit to the concrete stats, we should explain this in documentation.

If we're unsure about both, the commands should be marked unstable.  We
can always upgrade stability later.

[...]

Re: [PATCH 1/8] qmp: Support for querying stats

Posted by Paolo Bonzini 3 years, 9 months ago

On 5/13/22 15:10, Markus Armbruster wrote:
> Paolo Bonzini <pbonzini@redhat.com> writes:
>> On 5/5/22 15:28, Markus Armbruster wrote:
>>> This means we'll acquire yet another introspection system, unrelated to
>>> the introspection systems we already have in QEMU.
> 
> ... ^^^ needs justification.  Explain why passing the kernel's
> existing interface through QEMU is useful, and to whom.

There are two justifications.

The first is the contents of the schemas: the new introspection data 
provides different information than the QAPI data, namely unit of 
measurement, how the numbers are gathered and change 
(peak/instant/cumulative/histogram), and histogram bucket sizes.  Unless 
you think those should be added to the QAPI introspection (and IMO there 
might be a case only for the unit of measure---and even then it's only a 
very weak case), the separate introspection data justifies itself.

So the existence of query-stats-schemas in my opinion is justified even 
if don't consider the usecase of passing through the kernel's descriptions.

The second justification however is indeed about the dynamicity of the 
schema.  The QAPI introspection data is very much static; and while QOM 
is somewhat more dynamic, generally we consider that to be a bug rather 
than a feature these days.  On the other hand, running old QEMU with new 
kernel is a supported usecase; if old QEMU cannot expose statistics from 
a new kernel, or if a kernel developer needs to change QEMU before 
gathering new info from the new kernel, then that is a poor user interface.

Gathering statistics is important for development, for monitoring and 
for performance measurement.  There are tools such as kvm_stat that do 
this and they rely on the _user_ knowing the interesting data points 
rather than the tool (which can treat them as opaque).  The goal here is 
to take the capabilities of these tools and making them available 
throughout the whole virtualization stack, so that one can observe, 
monitor and measure virtual machines without having shell access + root 
on the host that runs them.

> How strong do we feel about the stability of the stats exposed by this
> command?  Separate answers for *structure* of the stats and concrete
> stats.

I'll try to answer this from the point of view of the kernel:

- will "some" statistics ever be available for all targets that are 
currently supported?  Yes, resoundingly.

- are the names of statistics stable?  Mostly, but not 100%.  If 
somebody notices the same value is being tracked with different names in 
two different architectures, one of them might be renamed.  If the 
statistic tracks a variable that does not exist anymore as the code 
changes, the statistic will go away.  If KVM grows two different ways to 
do the same thing and the default changes, some statistics that were 
previously useful could now be stuck at 0.  All of these events are 
expected to be rare, but 100% stability is neither a goal nor attainable 
in my opinion.

- is the schema format stable?  Yes, it is designed to be extensible but 
it will be backwards compatible.  Don't break userspace and all that.

And for QEMU:

- will "some" statistics ever be available for all targets that are 
currently supported?  Yes, and this will be true even if other 
QEMU-specific targets are added, e.g. block devices.

- will other providers have the same guarantees of stability?  It 
depends.  Statistics based on the current "query-blockstats" output will 
probably be even more stable than KVM stats.  TCG stats might be of 
variable stability.  We can add "x-" in front of providers if we decide 
that such a convention is useful.

- is the QEMU schema format stable?  Yes.  What we have is more or less 
a 1:1 conversion of the KVM schema format, which is pretty 
comprehensive. But if an addition to the schema proves itself worthwhile 
it can be added with the usual care to QAPI backwards compatibility.

> If we're confident neither structure nor concrete stats will change
> incompatibly, the commands are stable without reservations.
> 
> If we're confident the structure is stable, but unable or unwilling to
> commit to the concrete stats, we should explain this in documentation.

Based on the above text do you have a suggested wording and, especially, 
a suggested place?  For example, do you think it would fit better in the 
query-stats or query-stats-schemas documentation?

Thanks,

Paolo

> If we're unsure about both, the commands should be marked unstable.  We
> can always upgrade stability later.

Re: [PATCH 1/8] qmp: Support for querying stats

Posted by Markus Armbruster 3 years, 9 months ago

Paolo Bonzini <pbonzini@redhat.com> writes:

> On 5/13/22 15:10, Markus Armbruster wrote:
>> Paolo Bonzini <pbonzini@redhat.com> writes:
>>> On 5/5/22 15:28, Markus Armbruster wrote:
>>>> This means we'll acquire yet another introspection system, unrelated to
>>>> the introspection systems we already have in QEMU.
>> 
>> ... ^^^ needs justification.  Explain why passing the kernel's
>> existing interface through QEMU is useful, and to whom.
>
> There are two justifications.
>
> The first is the contents of the schemas: the new introspection data 
> provides different information than the QAPI data, namely unit of 
> measurement, how the numbers are gathered and change 
> (peak/instant/cumulative/histogram), and histogram bucket sizes.  Unless 
> you think those should be added to the QAPI introspection (and IMO there 
> might be a case only for the unit of measure---and even then it's only a 
> very weak case), the separate introspection data justifies itself.
>
> So the existence of query-stats-schemas in my opinion is justified even 
> if don't consider the usecase of passing through the kernel's descriptions.
>
> The second justification however is indeed about the dynamicity of the 
> schema.  The QAPI introspection data is very much static; and while QOM 
> is somewhat more dynamic, generally we consider that to be a bug rather 
> than a feature these days.  On the other hand, running old QEMU with new 
> kernel is a supported usecase; if old QEMU cannot expose statistics from 
> a new kernel, or if a kernel developer needs to change QEMU before 
> gathering new info from the new kernel, then that is a poor user interface.
>
> Gathering statistics is important for development, for monitoring and 
> for performance measurement.  There are tools such as kvm_stat that do 
> this and they rely on the _user_ knowing the interesting data points 
> rather than the tool (which can treat them as opaque).  The goal here is 
> to take the capabilities of these tools and making them available 
> throughout the whole virtualization stack, so that one can observe, 
> monitor and measure virtual machines without having shell access + root 
> on the host that runs them.

Work this into one of the commit messages, please.

>> How strong do we feel about the stability of the stats exposed by this
>> command?  Separate answers for *structure* of the stats and concrete
>> stats.
>
> I'll try to answer this from the point of view of the kernel:
>
> - will "some" statistics ever be available for all targets that are 
> currently supported?  Yes, resoundingly.
>
> - are the names of statistics stable?  Mostly, but not 100%.  If 
> somebody notices the same value is being tracked with different names in 
> two different architectures, one of them might be renamed.  If the 
> statistic tracks a variable that does not exist anymore as the code 
> changes, the statistic will go away.  If KVM grows two different ways to 
> do the same thing and the default changes, some statistics that were 
> previously useful could now be stuck at 0.  All of these events are 
> expected to be rare, but 100% stability is neither a goal nor attainable 
> in my opinion.
>
> - is the schema format stable?  Yes, it is designed to be extensible but 
> it will be backwards compatible.  Don't break userspace and all that.
>
> And for QEMU:
>
> - will "some" statistics ever be available for all targets that are 
> currently supported?  Yes, and this will be true even if other 
> QEMU-specific targets are added, e.g. block devices.
>
> - will other providers have the same guarantees of stability?  It 
> depends.  Statistics based on the current "query-blockstats" output will 
> probably be even more stable than KVM stats.  TCG stats might be of 
> variable stability.  We can add "x-" in front of providers if we decide 
> that such a convention is useful.
>
> - is the QEMU schema format stable?  Yes.  What we have is more or less 
> a 1:1 conversion of the KVM schema format, which is pretty 
> comprehensive. But if an addition to the schema proves itself worthwhile 
> it can be added with the usual care to QAPI backwards compatibility.
>
>> If we're confident neither structure nor concrete stats will change
>> incompatibly, the commands are stable without reservations.
>> 
>> If we're confident the structure is stable, but unable or unwilling to
>> commit to the concrete stats, we should explain this in documentation.
>
> Based on the above text do you have a suggested wording and, especially, 

Friday afternoon, worst time for word-smithing...  Feel free to ask
again on Monday :)

> a suggested place?  For example, do you think it would fit better in the 
> query-stats or query-stats-schemas documentation?

No obvious best choice.  I'd lean towards query-stats-schema.  Or
perhaps neither; write a separate introduction instead, like this:

    ##
    # = Statistics
    #
    # Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
    # eiusmod tempor incididunt ut labore et dolore magna aliqua.  Ut enim
    # ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut
    # aliquip ex ea commodo consequat.  Duis aute irure dolor in
    # reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla
    # pariatur.  Excepteur sint occaecat cupidatat non proident, sunt in
    # culpa qui officia deserunt mollit anim id est laborum.
    ##

Comes out in HTML as you'd expect, except it gets also included in the
table of contents, which is a bug.

>> If we're unsure about both, the commands should be marked unstable.  We
>> can always upgrade stability later.

Re: [PATCH 1/8] qmp: Support for querying stats

Posted by Paolo Bonzini 3 years, 9 months ago

On 5/13/22 16:35, Markus Armbruster wrote:
> Friday afternoon, worst time for word-smithing...  Feel free to ask
> again on Monday:)
> 
>> a suggested place?  For example, do you think it would fit better in the
>> query-stats or query-stats-schemas documentation?

I think query-stats-schemas is good enough.

# *Note*: While QEMU and other providers of runtime-collected statistics
# will try to keep the set of available data stable, together with their
# names, it is impossible to provide a full guarantee.  For example, if
# the same value is being tracked with different names on different
# architectures or by different providers, one of them might be renamed.
# A statistic might go away if an algorithm is changed or some code is
# removed; changing a default might cause previously useful statistics
# to always report 0.  Such changes fall outside QEMU's usual deprecation
# policies (also because statistics might be sourced externally, e.g.
# from Linux).  However, they are expected to be rare.

Paolo

Re: [PATCH 1/8] qmp: Support for querying stats

Posted by Markus Armbruster 3 years, 9 months ago

Paolo Bonzini <pbonzini@redhat.com> writes:

> On 5/13/22 16:35, Markus Armbruster wrote:
>> Friday afternoon, worst time for word-smithing...  Feel free to ask
>> again on Monday:)
>> 
>>> a suggested place?  For example, do you think it would fit better in the
>>> query-stats or query-stats-schemas documentation?
>
> I think query-stats-schemas is good enough.
>
> # *Note*: While QEMU and other providers of runtime-collected statistics
> # will try to keep the set of available data stable, together with their
> # names, it is impossible to provide a full guarantee.  For example, if
> # the same value is being tracked with different names on different
> # architectures or by different providers, one of them might be renamed.
> # A statistic might go away if an algorithm is changed or some code is
> # removed; changing a default might cause previously useful statistics
> # to always report 0.  Such changes fall outside QEMU's usual deprecation
> # policies (also because statistics might be sourced externally, e.g.
> # from Linux).  However, they are expected to be rare.

Works for me!

Markup hint: "Note:" is a note section tag, while "*Note*:" is just
text.  I figure we want the section tag here.

Re: [PATCH 1/8] qmp: Support for querying stats

Posted by Daniel P. Berrangé 3 years, 9 months ago

On Thu, May 05, 2022 at 03:28:23PM +0200, Markus Armbruster wrote:
> Paolo Bonzini <pbonzini@redhat.com> writes:
> 
> > Il 4 maggio 2022 15:22:27 CEST, Markus Armbruster <armbru@redhat.com> ha scritto:
> >>Can you point to existing uses of KVM binary stats introspection data?
> >
> > There's none, but Google is using it in house. The same data was available before in debugfs and available via the kvm_stat script, so you could also refer to Christian Borntraeger's KVM Forum 2019 talk. The problems with debugfs are basically that it's only available to root and is disabled by secure boot (both issues are not fixable on general because they are Linux policy).
> 
> I keep bothering you about use cases, because I'm habitually opposed to
> adding features without credible use cases.
> 
> For small features, a bit of plausible hand-waving can suffice, but this
> one isn't small enough for that.
> 
> Plausible hand-waving can sometimes suffice for *experimental* features.
> Say when the use case can't really materialize without the feature.
> 
> Double-checking (pardon my ignorance): we're basically exposing the host
> kernel's KVM stats via QMP, with the option of extending it to other
> sources of stats in the future.  Correct?
> 
> I think the argument for accepting the interface is basically "if it's
> good enough for the kernel, it's good enough for us".  Valid point.
> 
> This means we'll acquire yet another introspection system, unrelated to
> the introspection systems we already have in QEMU.

The second introspection system was the bit I disliked the most.

The inherant tension we have in that respect is that traditionally
with QMP we explicitly /want/ the developer to have todo design+coding
work to expose every new piece of data. Similarly on the client side
we are expecting work to consume any new piece of data.

With this command we explicitly do NOT want the developer to do
any new design+coding work, but instead allow almost arbitrary
passthrough of whatever data the kernel decides to expose, and
consumption of arbitrary data without writing new code.

There is some appeal in why we want todo that, but it is certainly
a divergance from our historical approach to QMP, so we shouldn't
make this decision lightly.

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

Re: [PATCH 1/8] qmp: Support for querying stats

Posted by Dr. David Alan Gilbert 3 years, 9 months ago

* Daniel P. Berrangé (berrange@redhat.com) wrote:
> On Thu, May 05, 2022 at 03:28:23PM +0200, Markus Armbruster wrote:
> > Paolo Bonzini <pbonzini@redhat.com> writes:
> > 
> > > Il 4 maggio 2022 15:22:27 CEST, Markus Armbruster <armbru@redhat.com> ha scritto:
> > >>Can you point to existing uses of KVM binary stats introspection data?
> > >
> > > There's none, but Google is using it in house. The same data was available before in debugfs and available via the kvm_stat script, so you could also refer to Christian Borntraeger's KVM Forum 2019 talk. The problems with debugfs are basically that it's only available to root and is disabled by secure boot (both issues are not fixable on general because they are Linux policy).
> > 
> > I keep bothering you about use cases, because I'm habitually opposed to
> > adding features without credible use cases.
> > 
> > For small features, a bit of plausible hand-waving can suffice, but this
> > one isn't small enough for that.
> > 
> > Plausible hand-waving can sometimes suffice for *experimental* features.
> > Say when the use case can't really materialize without the feature.
> > 
> > Double-checking (pardon my ignorance): we're basically exposing the host
> > kernel's KVM stats via QMP, with the option of extending it to other
> > sources of stats in the future.  Correct?
> > 
> > I think the argument for accepting the interface is basically "if it's
> > good enough for the kernel, it's good enough for us".  Valid point.
> > 
> > This means we'll acquire yet another introspection system, unrelated to
> > the introspection systems we already have in QEMU.
> 
> The second introspection system was the bit I disliked the most.
> 
> The inherant tension we have in that respect is that traditionally
> with QMP we explicitly /want/ the developer to have todo design+coding
> work to expose every new piece of data. Similarly on the client side
> we are expecting work to consume any new piece of data.
> 
> With this command we explicitly do NOT want the developer to do
> any new design+coding work, but instead allow almost arbitrary
> passthrough of whatever data the kernel decides to expose, and
> consumption of arbitrary data without writing new code.

The developer is going to have had to made that design when they put it
in the kernel; they don't really want to repeat the bikeshedding at each
further layer up the stack.  We have to be able to accept that we're
dealing with another (open) interface which has already gone through
review.

Dave

> There is some appeal in why we want todo that, but it is certainly
> a divergance from our historical approach to QMP, so we shouldn't
> make this decision lightly.
> 
> With regards,
> Daniel
> -- 
> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
> 
-- 
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

Re: [PATCH 1/8] qmp: Support for querying stats

Posted by Daniel P. Berrangé 3 years, 9 months ago

On Thu, May 05, 2022 at 09:10:17AM +0200, Paolo Bonzini wrote:
> 
> 
> Il 4 maggio 2022 15:22:27 CEST, Markus Armbruster <armbru@redhat.com> ha scritto:
> >Can you point to existing uses of KVM binary stats introspection data?
> 
> There's none, but Google is using it in house. The same data was
> available before in debugfs and available via the kvm_stat script,
> so you could also refer to Christian Borntraeger's KVM Forum 2019
> talk. The problems with debugfs are basically that it's only
> available to root and is disabled by secure boot (both issues
> are not fixable on general because they are Linux policy).

Libvirt currently uses debugfs to get

 /sys/kernel/debug/kvm/-/halt_poll_success_ns

when we report on CPU usage for VMs. WHen kernel lockdown is enforced
under secure boot we're unable to access this file and even worse
every attempt to access it spams dmesg[1].  We need this query stats
QMP support for that single statistic alone today.


With regards,
Daniel

[1] https://gitlab.com/libvirt/libvirt/-/issues/213
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

Re: [PATCH 1/8] qmp: Support for querying stats

Posted by Dr. David Alan Gilbert 3 years, 9 months ago

* Paolo Bonzini (pbonzini@redhat.com) wrote:
> From: Mark Kanda <mark.kanda@oracle.com>
> 
> Introduce QMP support for querying stats. Provide a framework for adding new
> stats and support for the following commands:
> 
> - query-stats
> Returns a list of all stats per target type (only VM and vCPU to start), with
> additional options for specifying stat names, vCPU qom paths, and providers.
> 
> - query-stats-schemas
> Returns a list of stats included in each target type, with an option for
> specifying the provider.  The concepts in the schema are based on the
> KVM binary stats' own introspection data, just translated to QAPI.
> 
> The framework provides a method to register callbacks for these QMP commands.
> Most of the work in fact is done by the callbacks, and a large majority of
> this patch is new QAPI structs and commands.
> 
> The first use-case will be for fd-based KVM stats (in an upcoming patch).
> 
> Examples (with fd-based KVM stats):
> 
> - Query all VM stats:
> 
> { "execute": "query-stats", "arguments" : { "target": "vm" } }
> 
> { "return": [
>      { "provider": "kvm",
>        "stats": [
>           { "name": "max_mmu_page_hash_collisions", "value": 0 },
>           { "name": "max_mmu_rmap_size", "value": 0 },
>           { "name": "nx_lpage_splits", "value": 148 },

Is there any hierarchy to the naming or is it just a big flat name
space?

>           ... ] },
>      { "provider": "xyz",
>        "stats": [ ... ] }
> ] }
> 
> - Query all vCPU stats:
> 
> { "execute": "query-stats", "arguments" : { "target": "vcpu" } }
> 
> { "return": [
>      { "provider": "kvm",
>        "qom_path": "/machine/unattached/device[0]"
>        "stats": [
>           { "name": "guest_mode", "value": 0 },
>           { "name": "directed_yield_successful", "value": 0 },
>           { "name": "directed_yield_attempted", "value": 106 },
>           ... ] },
>      { "provider": "kvm",
>        "qom_path": "/machine/unattached/device[1]"
>        "stats": [
>           { "name": "guest_mode", "value": 0 },
>           { "name": "directed_yield_successful", "value": 0 },
>           { "name": "directed_yield_attempted", "value": 106 },
>           ... ] },
> ] }
> 
> - Retrieve the schemas:
> 
> { "execute": "query-stats-schemas" }
> 
> { "return": [
>     { "provider": "kvm",
>       "target": "vcpu",
>       "stats": [
>          { "name": "guest_mode",
>            "unit": "none",
>            "base": 10,
>            "exponent": 0,
>            "type": "instant" },
>         { "name": "directed_yield_successful",
>            "unit": "none",
>            "base": 10,
>            "exponent": 0,
>            "type": "cumulative" },
>         ... ]
>     },
>     { "provider": "kvm",
>       "target": "vm",
>       "stats": [
>         { "name": "max_mmu_page_hash_collisions",
>            "unit": "none",
>            "base": 10,
>            "exponent": 0,
>            "type": "peak" },
>         ... ]
>     },

Is there some way to reset the peak or cumulative values?

Dave

>     { "provider": "xyz",
>       "target": "vm",
>       "stats": [ ... ]
>     }
> ] }
> 
> Signed-off-by: Mark Kanda <mark.kanda@oracle.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  include/monitor/stats.h |  33 +++++++
>  monitor/qmp-cmds.c      |  71 +++++++++++++++
>  qapi/meson.build        |   1 +
>  qapi/qapi-schema.json   |   1 +
>  qapi/stats.json         | 192 ++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 298 insertions(+)
>  create mode 100644 include/monitor/stats.h
>  create mode 100644 qapi/stats.json
> 
> diff --git a/include/monitor/stats.h b/include/monitor/stats.h
> new file mode 100644
> index 0000000000..89552ab06f
> --- /dev/null
> +++ b/include/monitor/stats.h
> @@ -0,0 +1,33 @@
> +/*
> + * Copyright (c) 2022 Oracle and/or its affiliates.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#ifndef STATS_H
> +#define STATS_H
> +
> +#include "qapi/qapi-types-stats.h"
> +
> +typedef void StatRetrieveFunc(StatsResultList **result, StatsTarget target, Error **errp);
> +typedef void SchemaRetrieveFunc(StatsSchemaList **result, Error **errp);
> +
> +/*
> + * Register callbacks for the QMP query-stats command.
> + *
> + * @stats_fn: routine to query stats:
> + * @schema_fn: routine to query stat schemas:
> + */
> +void add_stats_callbacks(StatRetrieveFunc *stats_fn,
> +                         SchemaRetrieveFunc *schemas_fn);
> +
> +/*
> + * Helper routines for adding stats entries to the results lists.
> + */
> +void add_stats_entry(StatsResultList **, StatsProvider, const char *id,
> +                     StatsList *stats_list);
> +void add_stats_schema(StatsSchemaList **, StatsProvider, StatsTarget,
> +                      StatsSchemaValueList *);
> +
> +#endif /* STATS_H */
> diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c
> index 5e7302cbb9..97825b25fa 100644
> --- a/monitor/qmp-cmds.c
> +++ b/monitor/qmp-cmds.c
> @@ -35,6 +35,7 @@
>  #include "qapi/qapi-commands-control.h"
>  #include "qapi/qapi-commands-machine.h"
>  #include "qapi/qapi-commands-misc.h"
> +#include "qapi/qapi-commands-stats.h"
>  #include "qapi/qapi-commands-ui.h"
>  #include "qapi/type-helpers.h"
>  #include "qapi/qmp/qerror.h"
> @@ -43,6 +44,7 @@
>  #include "hw/acpi/acpi_dev_interface.h"
>  #include "hw/intc/intc.h"
>  #include "hw/rdma/rdma.h"
> +#include "monitor/stats.h"
>  
>  NameInfo *qmp_query_name(Error **errp)
>  {
> @@ -426,3 +428,72 @@ HumanReadableText *qmp_x_query_irq(Error **errp)
>  
>      return human_readable_text_from_str(buf);
>  }
> +
> +typedef struct StatsCallbacks {
> +    StatRetrieveFunc *stats_cb;
> +    SchemaRetrieveFunc *schemas_cb;
> +    QTAILQ_ENTRY(StatsCallbacks) next;
> +} StatsCallbacks;
> +
> +static QTAILQ_HEAD(, StatsCallbacks) stats_callbacks =
> +    QTAILQ_HEAD_INITIALIZER(stats_callbacks);
> +
> +void add_stats_callbacks(StatRetrieveFunc *stats_fn,
> +                         SchemaRetrieveFunc *schemas_fn)
> +{
> +    StatsCallbacks *entry = g_new(StatsCallbacks, 1);
> +    entry->stats_cb = stats_fn;
> +    entry->schemas_cb = schemas_fn;
> +
> +    QTAILQ_INSERT_TAIL(&stats_callbacks, entry, next);
> +}
> +
> +StatsResultList *qmp_query_stats(StatsFilter *filter, Error **errp)
> +{
> +    StatsResultList *stats_results = NULL;
> +    StatsCallbacks *entry;
> +
> +    QTAILQ_FOREACH(entry, &stats_callbacks, next) {
> +        entry->stats_cb(&stats_results, filter->target, errp);
> +    }
> +
> +    return stats_results;
> +}
> +
> +StatsSchemaList *qmp_query_stats_schemas(Error **errp)
> +{
> +    StatsSchemaList *stats_results = NULL;
> +    StatsCallbacks *entry;
> +
> +    QTAILQ_FOREACH(entry, &stats_callbacks, next) {
> +        entry->schemas_cb(&stats_results, errp);
> +    }
> +
> +    return stats_results;
> +}
> +
> +void add_stats_entry(StatsResultList **stats_results, StatsProvider provider,
> +                     const char *qom_path, StatsList *stats_list)
> +{
> +    StatsResult *entry = g_new0(StatsResult, 1);
> +    entry->provider = provider;
> +    if (qom_path) {
> +        entry->has_qom_path = true;
> +        entry->qom_path = g_strdup(qom_path);
> +    }
> +    entry->stats = stats_list;
> +
> +    QAPI_LIST_PREPEND(*stats_results, entry);
> +}
> +
> +void add_stats_schema(StatsSchemaList **schema_results,
> +                      StatsProvider provider, StatsTarget target,
> +                      StatsSchemaValueList *stats_list)
> +{
> +    StatsSchema *entry = g_new0(StatsSchema, 1);
> +
> +    entry->provider = provider;
> +    entry->target = target;
> +    entry->stats = stats_list;
> +    QAPI_LIST_PREPEND(*schema_results, entry);
> +}
> diff --git a/qapi/meson.build b/qapi/meson.build
> index 656ef0e039..fd5c93d643 100644
> --- a/qapi/meson.build
> +++ b/qapi/meson.build
> @@ -46,6 +46,7 @@ qapi_all_modules = [
>    'replay',
>    'run-state',
>    'sockets',
> +  'stats',
>    'trace',
>    'transaction',
>    'yank',
> diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json
> index 4912b9744e..92d7ecc52c 100644
> --- a/qapi/qapi-schema.json
> +++ b/qapi/qapi-schema.json
> @@ -93,3 +93,4 @@
>  { 'include': 'audio.json' }
>  { 'include': 'acpi.json' }
>  { 'include': 'pci.json' }
> +{ 'include': 'stats.json' }
> diff --git a/qapi/stats.json b/qapi/stats.json
> new file mode 100644
> index 0000000000..7454dd7daa
> --- /dev/null
> +++ b/qapi/stats.json
> @@ -0,0 +1,192 @@
> +# -*- Mode: Python -*-
> +# vim: filetype=python
> +#
> +# Copyright (c) 2022 Oracle and/or its affiliates.
> +#
> +# This work is licensed under the terms of the GNU GPL, version 2 or later.
> +# See the COPYING file in the top-level directory.
> +#
> +# SPDX-License-Identifier: GPL-2.0-or-later
> +
> +##
> +# = Statistics
> +##
> +
> +##
> +# @StatsType:
> +#
> +# Enumeration of statistics types
> +#
> +# @cumulative: stat is cumulative; value can only increase.
> +# @instant: stat is instantaneous; value can increase or decrease.
> +# @peak: stat is the peak value; value can only increase.
> +# @linear-hist: stat is a linear histogram.
> +# @log-hist: stat is a logarithmic histogram.
> +#
> +# Since: 7.1
> +##
> +{ 'enum' : 'StatsType',
> +  'data' : [ 'cumulative', 'instant', 'peak', 'linear-hist', 'log-hist' ] }
> +
> +##
> +# @StatsUnit:
> +#
> +# Enumeration of unit of measurement for statistics
> +#
> +# @bytes: stat reported in bytes.
> +# @seconds: stat reported in seconds.
> +# @cycles: stat reported in clock cycles.
> +#
> +# Since: 7.1
> +##
> +{ 'enum' : 'StatsUnit',
> +  'data' : [ 'bytes', 'seconds', 'cycles' ] }
> +
> +##
> +# @StatsProvider:
> +#
> +# Enumeration of statistics providers.
> +#
> +# Since: 7.1
> +##
> +{ 'enum': 'StatsProvider',
> +  'data': [ ] }
> +
> +##
> +# @StatsTarget:
> +#
> +# The kinds of objects on which one can request statistics.
> +#
> +# @vm: the entire virtual machine.
> +# @vcpu: a virtual CPU.
> +#
> +# Since: 7.1
> +##
> +{ 'enum': 'StatsTarget',
> +  'data': [ 'vm', 'vcpu' ] }
> +
> +##
> +# @StatsFilter:
> +#
> +# The arguments to the query-stats command; specifies a target for which to
> +# request statistics, and which statistics are requested from each provider.
> +#
> +# Since: 7.1
> +##
> +{ 'struct': 'StatsFilter',
> +  'data': { 'target': 'StatsTarget' } }
> +
> +##
> +# @StatsValue:
> +#
> +# @scalar: single uint64.
> +# @list: list of uint64.
> +#
> +# Since: 7.1
> +##
> +{ 'alternate': 'StatsValue',
> +  'data': { 'scalar': 'uint64',
> +            'list': [ 'uint64' ] } }
> +
> +##
> +# @Stats:
> +#
> +# @name: name of stat.
> +# @value: stat value.
> +#
> +# Since: 7.1
> +##
> +{ 'struct': 'Stats',
> +  'data': { 'name': 'str',
> +            'value' : 'StatsValue' } }
> +
> +##
> +# @StatsResult:
> +#
> +# @provider: provider for this set of statistics.
> +# @qom-path: QOM path of the object for which the statistics are returned
> +# @stats: list of statistics.
> +#
> +# Since: 7.1
> +##
> +{ 'struct': 'StatsResult',
> +  'data': { 'provider': 'StatsProvider',
> +            '*qom-path': 'str',
> +            'stats': [ 'Stats' ] } }
> +
> +##
> +# @query-stats:
> +#
> +# Return runtime-collected statistics for objects such as the
> +# VM or its vCPUs.
> +#
> +# The arguments are a StatsFilter and specify the provider and objects
> +# to return statistics about.
> +#
> +# Returns: a list of StatsResult, one for each provider and object
> +#          (e.g., for each vCPU).
> +#
> +# Since: 7.1
> +##
> +{ 'command': 'query-stats',
> +  'data': 'StatsFilter',
> +  'boxed': true,
> +  'returns': [ 'StatsResult' ] }
> +
> +##
> +# @StatsSchemaValue:
> +#
> +# Schema for a single statistic.
> +#
> +# @name: stat name.
> +#
> +# @type: kind of statistic, a @StatType.
> +#
> +# @unit: base unit of measurement for the statistics @StatUnit.
> +#
> +# @base: base for the multiple of @unit that the statistic uses, either 2 or 10.
> +#        Only present if @exponent is non-zero.
> +#
> +# @exponent: exponent for the multiple of @unit that the statistic uses
> +#
> +# @bucket-size: Used with linear-hist to report the width of each bucket
> +#               of the histogram.
> +#
> +# Since: 7.1
> +##
> +{ 'struct': 'StatsSchemaValue',
> +  'data': { 'name': 'str',
> +            'type': 'StatsType',
> +            '*unit': 'StatsUnit',
> +            '*base': 'int8',
> +            'exponent': 'int16',
> +            '*bucket-size': 'uint32' } }
> +
> +##
> +# @StatsSchema:
> +#
> +# Schema for all available statistics for a provider and target.
> +#
> +# @provider: provider for this set of statistics.
> +#
> +# @target: kind of object that can be queried through this provider.
> +#
> +# @stats: list of statistics.
> +#
> +# Since: 7.1
> +##
> +{ 'struct': 'StatsSchema',
> +  'data': { 'provider': 'StatsProvider',
> +            'target': 'StatsTarget',
> +            'stats': [ 'StatsSchemaValue' ] } }
> +
> +##
> +# @query-stats-schemas:
> +#
> +# Return the schema for all available runtime-collected statistics.
> +#
> +# Since: 7.1
> +##
> +{ 'command': 'query-stats-schemas',
> +  'data': { },
> +  'returns': [ 'StatsSchema' ] }
> -- 
> 2.35.1
> 
> 
-- 
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

Re: [PATCH 1/8] qmp: Support for querying stats

Posted by Paolo Bonzini 3 years, 9 months ago

On 4/27/22 11:19, Dr. David Alan Gilbert wrote:
>> { "return": [
>>       { "provider": "kvm",
>>         "stats": [
>>            { "name": "max_mmu_page_hash_collisions", "value": 0 },
>>            { "name": "max_mmu_rmap_size", "value": 0 },
>>            { "name": "nx_lpage_splits", "value": 148 },
> 
> Is there any hierarchy to the naming or is it just a big flat name
> space?

Within KVM no, but there is a hierarchy of provider->stat.

>>      { "provider": "kvm",
>>        "target": "vm",
>>        "stats": [
>>          { "name": "max_mmu_page_hash_collisions",
>>             "unit": "none",
>>             "base": 10,
>>             "exponent": 0,
>>             "type": "peak" },
>>          ... ]
>>      },
> 
> Is there some way to reset the peak or cumulative values?

Not yet, but the plan is to allow pwrite for peak and cumulative 
statistics, and possibly for histograms as well.  Alternatively it could 
be a ioctl.  Indecision about write support is also the reason why mmap 
is not allowed yet.

Paolo

> Dave
> 
>>      { "provider": "xyz",
>>        "target": "vm",
>>        "stats": [ ... ]
>>      }
>> ] }
>>
>> Signed-off-by: Mark Kanda <mark.kanda@oracle.com>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>   include/monitor/stats.h |  33 +++++++
>>   monitor/qmp-cmds.c      |  71 +++++++++++++++
>>   qapi/meson.build        |   1 +
>>   qapi/qapi-schema.json   |   1 +
>>   qapi/stats.json         | 192 ++++++++++++++++++++++++++++++++++++++++
>>   5 files changed, 298 insertions(+)
>>   create mode 100644 include/monitor/stats.h
>>   create mode 100644 qapi/stats.json
>>
>> diff --git a/include/monitor/stats.h b/include/monitor/stats.h
>> new file mode 100644
>> index 0000000000..89552ab06f
>> --- /dev/null
>> +++ b/include/monitor/stats.h
>> @@ -0,0 +1,33 @@
>> +/*
>> + * Copyright (c) 2022 Oracle and/or its affiliates.
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2.
>> + * See the COPYING file in the top-level directory.
>> + */
>> +
>> +#ifndef STATS_H
>> +#define STATS_H
>> +
>> +#include "qapi/qapi-types-stats.h"
>> +
>> +typedef void StatRetrieveFunc(StatsResultList **result, StatsTarget target, Error **errp);
>> +typedef void SchemaRetrieveFunc(StatsSchemaList **result, Error **errp);
>> +
>> +/*
>> + * Register callbacks for the QMP query-stats command.
>> + *
>> + * @stats_fn: routine to query stats:
>> + * @schema_fn: routine to query stat schemas:
>> + */
>> +void add_stats_callbacks(StatRetrieveFunc *stats_fn,
>> +                         SchemaRetrieveFunc *schemas_fn);
>> +
>> +/*
>> + * Helper routines for adding stats entries to the results lists.
>> + */
>> +void add_stats_entry(StatsResultList **, StatsProvider, const char *id,
>> +                     StatsList *stats_list);
>> +void add_stats_schema(StatsSchemaList **, StatsProvider, StatsTarget,
>> +                      StatsSchemaValueList *);
>> +
>> +#endif /* STATS_H */
>> diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c
>> index 5e7302cbb9..97825b25fa 100644
>> --- a/monitor/qmp-cmds.c
>> +++ b/monitor/qmp-cmds.c
>> @@ -35,6 +35,7 @@
>>   #include "qapi/qapi-commands-control.h"
>>   #include "qapi/qapi-commands-machine.h"
>>   #include "qapi/qapi-commands-misc.h"
>> +#include "qapi/qapi-commands-stats.h"
>>   #include "qapi/qapi-commands-ui.h"
>>   #include "qapi/type-helpers.h"
>>   #include "qapi/qmp/qerror.h"
>> @@ -43,6 +44,7 @@
>>   #include "hw/acpi/acpi_dev_interface.h"
>>   #include "hw/intc/intc.h"
>>   #include "hw/rdma/rdma.h"
>> +#include "monitor/stats.h"
>>   
>>   NameInfo *qmp_query_name(Error **errp)
>>   {
>> @@ -426,3 +428,72 @@ HumanReadableText *qmp_x_query_irq(Error **errp)
>>   
>>       return human_readable_text_from_str(buf);
>>   }
>> +
>> +typedef struct StatsCallbacks {
>> +    StatRetrieveFunc *stats_cb;
>> +    SchemaRetrieveFunc *schemas_cb;
>> +    QTAILQ_ENTRY(StatsCallbacks) next;
>> +} StatsCallbacks;
>> +
>> +static QTAILQ_HEAD(, StatsCallbacks) stats_callbacks =
>> +    QTAILQ_HEAD_INITIALIZER(stats_callbacks);
>> +
>> +void add_stats_callbacks(StatRetrieveFunc *stats_fn,
>> +                         SchemaRetrieveFunc *schemas_fn)
>> +{
>> +    StatsCallbacks *entry = g_new(StatsCallbacks, 1);
>> +    entry->stats_cb = stats_fn;
>> +    entry->schemas_cb = schemas_fn;
>> +
>> +    QTAILQ_INSERT_TAIL(&stats_callbacks, entry, next);
>> +}
>> +
>> +StatsResultList *qmp_query_stats(StatsFilter *filter, Error **errp)
>> +{
>> +    StatsResultList *stats_results = NULL;
>> +    StatsCallbacks *entry;
>> +
>> +    QTAILQ_FOREACH(entry, &stats_callbacks, next) {
>> +        entry->stats_cb(&stats_results, filter->target, errp);
>> +    }
>> +
>> +    return stats_results;
>> +}
>> +
>> +StatsSchemaList *qmp_query_stats_schemas(Error **errp)
>> +{
>> +    StatsSchemaList *stats_results = NULL;
>> +    StatsCallbacks *entry;
>> +
>> +    QTAILQ_FOREACH(entry, &stats_callbacks, next) {
>> +        entry->schemas_cb(&stats_results, errp);
>> +    }
>> +
>> +    return stats_results;
>> +}
>> +
>> +void add_stats_entry(StatsResultList **stats_results, StatsProvider provider,
>> +                     const char *qom_path, StatsList *stats_list)
>> +{
>> +    StatsResult *entry = g_new0(StatsResult, 1);
>> +    entry->provider = provider;
>> +    if (qom_path) {
>> +        entry->has_qom_path = true;
>> +        entry->qom_path = g_strdup(qom_path);
>> +    }
>> +    entry->stats = stats_list;
>> +
>> +    QAPI_LIST_PREPEND(*stats_results, entry);
>> +}
>> +
>> +void add_stats_schema(StatsSchemaList **schema_results,
>> +                      StatsProvider provider, StatsTarget target,
>> +                      StatsSchemaValueList *stats_list)
>> +{
>> +    StatsSchema *entry = g_new0(StatsSchema, 1);
>> +
>> +    entry->provider = provider;
>> +    entry->target = target;
>> +    entry->stats = stats_list;
>> +    QAPI_LIST_PREPEND(*schema_results, entry);
>> +}
>> diff --git a/qapi/meson.build b/qapi/meson.build
>> index 656ef0e039..fd5c93d643 100644
>> --- a/qapi/meson.build
>> +++ b/qapi/meson.build
>> @@ -46,6 +46,7 @@ qapi_all_modules = [
>>     'replay',
>>     'run-state',
>>     'sockets',
>> +  'stats',
>>     'trace',
>>     'transaction',
>>     'yank',
>> diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json
>> index 4912b9744e..92d7ecc52c 100644
>> --- a/qapi/qapi-schema.json
>> +++ b/qapi/qapi-schema.json
>> @@ -93,3 +93,4 @@
>>   { 'include': 'audio.json' }
>>   { 'include': 'acpi.json' }
>>   { 'include': 'pci.json' }
>> +{ 'include': 'stats.json' }
>> diff --git a/qapi/stats.json b/qapi/stats.json
>> new file mode 100644
>> index 0000000000..7454dd7daa
>> --- /dev/null
>> +++ b/qapi/stats.json
>> @@ -0,0 +1,192 @@
>> +# -*- Mode: Python -*-
>> +# vim: filetype=python
>> +#
>> +# Copyright (c) 2022 Oracle and/or its affiliates.
>> +#
>> +# This work is licensed under the terms of the GNU GPL, version 2 or later.
>> +# See the COPYING file in the top-level directory.
>> +#
>> +# SPDX-License-Identifier: GPL-2.0-or-later
>> +
>> +##
>> +# = Statistics
>> +##
>> +
>> +##
>> +# @StatsType:
>> +#
>> +# Enumeration of statistics types
>> +#
>> +# @cumulative: stat is cumulative; value can only increase.
>> +# @instant: stat is instantaneous; value can increase or decrease.
>> +# @peak: stat is the peak value; value can only increase.
>> +# @linear-hist: stat is a linear histogram.
>> +# @log-hist: stat is a logarithmic histogram.
>> +#
>> +# Since: 7.1
>> +##
>> +{ 'enum' : 'StatsType',
>> +  'data' : [ 'cumulative', 'instant', 'peak', 'linear-hist', 'log-hist' ] }
>> +
>> +##
>> +# @StatsUnit:
>> +#
>> +# Enumeration of unit of measurement for statistics
>> +#
>> +# @bytes: stat reported in bytes.
>> +# @seconds: stat reported in seconds.
>> +# @cycles: stat reported in clock cycles.
>> +#
>> +# Since: 7.1
>> +##
>> +{ 'enum' : 'StatsUnit',
>> +  'data' : [ 'bytes', 'seconds', 'cycles' ] }
>> +
>> +##
>> +# @StatsProvider:
>> +#
>> +# Enumeration of statistics providers.
>> +#
>> +# Since: 7.1
>> +##
>> +{ 'enum': 'StatsProvider',
>> +  'data': [ ] }
>> +
>> +##
>> +# @StatsTarget:
>> +#
>> +# The kinds of objects on which one can request statistics.
>> +#
>> +# @vm: the entire virtual machine.
>> +# @vcpu: a virtual CPU.
>> +#
>> +# Since: 7.1
>> +##
>> +{ 'enum': 'StatsTarget',
>> +  'data': [ 'vm', 'vcpu' ] }
>> +
>> +##
>> +# @StatsFilter:
>> +#
>> +# The arguments to the query-stats command; specifies a target for which to
>> +# request statistics, and which statistics are requested from each provider.
>> +#
>> +# Since: 7.1
>> +##
>> +{ 'struct': 'StatsFilter',
>> +  'data': { 'target': 'StatsTarget' } }
>> +
>> +##
>> +# @StatsValue:
>> +#
>> +# @scalar: single uint64.
>> +# @list: list of uint64.
>> +#
>> +# Since: 7.1
>> +##
>> +{ 'alternate': 'StatsValue',
>> +  'data': { 'scalar': 'uint64',
>> +            'list': [ 'uint64' ] } }
>> +
>> +##
>> +# @Stats:
>> +#
>> +# @name: name of stat.
>> +# @value: stat value.
>> +#
>> +# Since: 7.1
>> +##
>> +{ 'struct': 'Stats',
>> +  'data': { 'name': 'str',
>> +            'value' : 'StatsValue' } }
>> +
>> +##
>> +# @StatsResult:
>> +#
>> +# @provider: provider for this set of statistics.
>> +# @qom-path: QOM path of the object for which the statistics are returned
>> +# @stats: list of statistics.
>> +#
>> +# Since: 7.1
>> +##
>> +{ 'struct': 'StatsResult',
>> +  'data': { 'provider': 'StatsProvider',
>> +            '*qom-path': 'str',
>> +            'stats': [ 'Stats' ] } }
>> +
>> +##
>> +# @query-stats:
>> +#
>> +# Return runtime-collected statistics for objects such as the
>> +# VM or its vCPUs.
>> +#
>> +# The arguments are a StatsFilter and specify the provider and objects
>> +# to return statistics about.
>> +#
>> +# Returns: a list of StatsResult, one for each provider and object
>> +#          (e.g., for each vCPU).
>> +#
>> +# Since: 7.1
>> +##
>> +{ 'command': 'query-stats',
>> +  'data': 'StatsFilter',
>> +  'boxed': true,
>> +  'returns': [ 'StatsResult' ] }
>> +
>> +##
>> +# @StatsSchemaValue:
>> +#
>> +# Schema for a single statistic.
>> +#
>> +# @name: stat name.
>> +#
>> +# @type: kind of statistic, a @StatType.
>> +#
>> +# @unit: base unit of measurement for the statistics @StatUnit.
>> +#
>> +# @base: base for the multiple of @unit that the statistic uses, either 2 or 10.
>> +#        Only present if @exponent is non-zero.
>> +#
>> +# @exponent: exponent for the multiple of @unit that the statistic uses
>> +#
>> +# @bucket-size: Used with linear-hist to report the width of each bucket
>> +#               of the histogram.
>> +#
>> +# Since: 7.1
>> +##
>> +{ 'struct': 'StatsSchemaValue',
>> +  'data': { 'name': 'str',
>> +            'type': 'StatsType',
>> +            '*unit': 'StatsUnit',
>> +            '*base': 'int8',
>> +            'exponent': 'int16',
>> +            '*bucket-size': 'uint32' } }
>> +
>> +##
>> +# @StatsSchema:
>> +#
>> +# Schema for all available statistics for a provider and target.
>> +#
>> +# @provider: provider for this set of statistics.
>> +#
>> +# @target: kind of object that can be queried through this provider.
>> +#
>> +# @stats: list of statistics.
>> +#
>> +# Since: 7.1
>> +##
>> +{ 'struct': 'StatsSchema',
>> +  'data': { 'provider': 'StatsProvider',
>> +            'target': 'StatsTarget',
>> +            'stats': [ 'StatsSchemaValue' ] } }
>> +
>> +##
>> +# @query-stats-schemas:
>> +#
>> +# Return the schema for all available runtime-collected statistics.
>> +#
>> +# Since: 7.1
>> +##
>> +{ 'command': 'query-stats-schemas',
>> +  'data': { },
>> +  'returns': [ 'StatsSchema' ] }
>> -- 
>> 2.35.1
>>
>>