Introduce the kvm-pmu-filter object and support the PMU event with raw
format.
The raw format, as a native PMU event code representation, can be used
for several architectures.
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Tested-by: Yi Lai <yi1.lai@intel.com>
---
Changes since RFC v2:
* Drop hexadecimal variants and support numeric version in QAPI
directly. (Daniel)
* Define three-level sections with new accelerator.json. (Markus)
* QAPI style fixes:
- KVMPMU* stuff -> KvmPmu*.
- KVMPMUFilterProperty -> KVMPMUFilterProperties.
- KVMPMUEventEncodeFmt -> KvmPmuEventFormat.
- drop prefix in KvmPmuFilterAction and KvmPmuEventFormat.
* Bump up the supported QAPI version to v10.1.
* Add Tested-by from Yi.
Changes since RFC v1:
* Make "action" as a global (per filter object) item, not a per-event
parameter. (Dapeng)
* Bump up the supported QAPI version to v10.0.
---
MAINTAINERS | 2 +
accel/kvm/kvm-pmu.c | 114 +++++++++++++++++++++++++++++++++++++++
accel/kvm/meson.build | 1 +
include/system/kvm-pmu.h | 35 ++++++++++++
qapi/accelerator.json | 14 +++++
qapi/kvm.json | 84 +++++++++++++++++++++++++++++
qapi/meson.build | 1 +
qapi/qapi-schema.json | 1 +
qapi/qom.json | 3 ++
9 files changed, 255 insertions(+)
create mode 100644 accel/kvm/kvm-pmu.c
create mode 100644 include/system/kvm-pmu.h
create mode 100644 qapi/accelerator.json
create mode 100644 qapi/kvm.json
diff --git a/MAINTAINERS b/MAINTAINERS
index d54b5578f883..3ca551025fb8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -434,6 +434,7 @@ F: accel/kvm/
F: accel/stubs/kvm-stub.c
F: include/hw/kvm/
F: include/system/kvm*.h
+F: qapi/kvm.json
F: scripts/kvm/kvm_flightrecorder
ARM KVM CPUs
@@ -503,6 +504,7 @@ F: accel/Makefile.objs
F: accel/stubs/Makefile.objs
F: cpu-common.c
F: cpu-target.c
+F: qapi/accelerator.c
F: system/cpus.c
Apple Silicon HVF CPUs
diff --git a/accel/kvm/kvm-pmu.c b/accel/kvm/kvm-pmu.c
new file mode 100644
index 000000000000..22f749bf9183
--- /dev/null
+++ b/accel/kvm/kvm-pmu.c
@@ -0,0 +1,114 @@
+/*
+ * QEMU KVM PMU Related Abstractions
+ *
+ * Copyright (C) 2025 Intel Corporation.
+ *
+ * Author: Zhao Liu <zhao1.liu@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/error.h"
+#include "qapi/qapi-visit-kvm.h"
+#include "qemu/cutils.h"
+#include "qom/object_interfaces.h"
+#include "system/kvm-pmu.h"
+
+static void kvm_pmu_filter_set_action(Object *obj, int value,
+ Error **errp G_GNUC_UNUSED)
+{
+ KVMPMUFilter *filter = KVM_PMU_FILTER(obj);
+
+ filter->action = value;
+}
+
+static int kvm_pmu_filter_get_action(Object *obj,
+ Error **errp G_GNUC_UNUSED)
+{
+ KVMPMUFilter *filter = KVM_PMU_FILTER(obj);
+
+ return filter->action;
+}
+
+static void kvm_pmu_filter_get_event(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ KVMPMUFilter *filter = KVM_PMU_FILTER(obj);
+
+ visit_type_KvmPmuFilterEventList(v, name, &filter->events, errp);
+}
+
+static void kvm_pmu_filter_set_event(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ KVMPMUFilter *filter = KVM_PMU_FILTER(obj);
+ KvmPmuFilterEventList *head = NULL, *old_head, *node;
+ int nevents = 0;
+
+ old_head = filter->events;
+ if (!visit_type_KvmPmuFilterEventList(v, name, &head, errp)) {
+ return;
+ }
+
+ for (node = head; node; node = node->next) {
+ switch (node->value->format) {
+ case KVM_PMU_EVENT_FORMAT_RAW:
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ nevents++;
+ }
+
+ filter->nevents = nevents;
+ filter->events = head;
+ qapi_free_KvmPmuFilterEventList(old_head);
+ return;
+}
+
+static void kvm_pmu_filter_class_init(ObjectClass *oc, void *data)
+{
+ object_class_property_add_enum(oc, "action", "KvmPmuFilterAction",
+ &KvmPmuFilterAction_lookup,
+ kvm_pmu_filter_get_action,
+ kvm_pmu_filter_set_action);
+ object_class_property_set_description(oc, "action",
+ "KVM PMU event action");
+
+ object_class_property_add(oc, "events", "KvmPmuFilterEventList",
+ kvm_pmu_filter_get_event,
+ kvm_pmu_filter_set_event,
+ NULL, NULL);
+ object_class_property_set_description(oc, "events",
+ "KVM PMU event list");
+}
+
+static void kvm_pmu_filter_instance_init(Object *obj)
+{
+ KVMPMUFilter *filter = KVM_PMU_FILTER(obj);
+
+ filter->action = KVM_PMU_FILTER_ACTION_ALLOW;
+ filter->nevents = 0;
+}
+
+static const TypeInfo kvm_pmu_filter_info = {
+ .parent = TYPE_OBJECT,
+ .name = TYPE_KVM_PMU_FILTER,
+ .class_init = kvm_pmu_filter_class_init,
+ .instance_size = sizeof(KVMPMUFilter),
+ .instance_init = kvm_pmu_filter_instance_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_USER_CREATABLE },
+ { }
+ }
+};
+
+static void kvm_pmu_event_register_type(void)
+{
+ type_register_static(&kvm_pmu_filter_info);
+}
+
+type_init(kvm_pmu_event_register_type);
diff --git a/accel/kvm/meson.build b/accel/kvm/meson.build
index 397a1fe1fd1e..dfab2854f3a8 100644
--- a/accel/kvm/meson.build
+++ b/accel/kvm/meson.build
@@ -2,6 +2,7 @@ kvm_ss = ss.source_set()
kvm_ss.add(files(
'kvm-all.c',
'kvm-accel-ops.c',
+ 'kvm-pmu.c',
))
specific_ss.add_all(when: 'CONFIG_KVM', if_true: kvm_ss)
diff --git a/include/system/kvm-pmu.h b/include/system/kvm-pmu.h
new file mode 100644
index 000000000000..818fa309c191
--- /dev/null
+++ b/include/system/kvm-pmu.h
@@ -0,0 +1,35 @@
+/*
+ * QEMU KVM PMU Related Abstraction Header
+ *
+ * Copyright (C) 2025 Intel Corporation.
+ *
+ * Author: Zhao Liu <zhao1.liu@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef KVM_PMU_H
+#define KVM_PMU_H
+
+#include "qapi/qapi-types-kvm.h"
+#include "qom/object.h"
+
+#define TYPE_KVM_PMU_FILTER "kvm-pmu-filter"
+OBJECT_DECLARE_SIMPLE_TYPE(KVMPMUFilter, KVM_PMU_FILTER)
+
+/**
+ * KVMPMUFilter:
+ * @action: action that KVM PMU filter will take for selected PMU events.
+ * @nevents: number of PMU event entries listed in @events
+ * @events: list of PMU event entries. A PMU event entry may represent one
+ * event or multiple events due to its format.
+ */
+struct KVMPMUFilter {
+ Object parent_obj;
+
+ KvmPmuFilterAction action;
+ uint32_t nevents;
+ KvmPmuFilterEventList *events;
+};
+
+#endif /* KVM_PMU_H */
diff --git a/qapi/accelerator.json b/qapi/accelerator.json
new file mode 100644
index 000000000000..1fe0d64be113
--- /dev/null
+++ b/qapi/accelerator.json
@@ -0,0 +1,14 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+# Copyright (C) 2025 Intel Corporation.
+#
+# Author: Zhao Liu <zhao1.liu@intel.com>
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+##
+# = Accelerators
+##
+
+{ 'include': 'kvm.json' }
diff --git a/qapi/kvm.json b/qapi/kvm.json
new file mode 100644
index 000000000000..1861d86a9726
--- /dev/null
+++ b/qapi/kvm.json
@@ -0,0 +1,84 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+# Copyright (C) 2025 Intel Corporation.
+#
+# Author: Zhao Liu <zhao1.liu@intel.com>
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+##
+# == KVM
+##
+
+##
+# === PMU stuff (KVM related)
+##
+
+##
+# @KvmPmuFilterAction:
+#
+# Actions that KVM PMU filter supports.
+#
+# @deny: disable the PMU event/counter in KVM PMU filter.
+#
+# @allow: enable the PMU event/counter in KVM PMU filter.
+#
+# Since 10.1
+##
+{ 'enum': 'KvmPmuFilterAction',
+ 'data': ['allow', 'deny'] }
+
+##
+# @KvmPmuEventFormat:
+#
+# Encoding formats of PMU event that QEMU/KVM supports.
+#
+# @raw: the encoded event code that KVM can directly consume.
+#
+# Since 10.1
+##
+{ 'enum': 'KvmPmuEventFormat',
+ 'data': ['raw'] }
+
+##
+# @KvmPmuRawEvent:
+#
+# Raw PMU event code.
+#
+# @code: the raw value that has been encoded, and QEMU could deliver
+# to KVM directly.
+#
+# Since 10.1
+##
+{ 'struct': 'KvmPmuRawEvent',
+ 'data': { 'code': 'uint64' } }
+
+##
+# @KvmPmuFilterEvent:
+#
+# PMU event filtered by KVM.
+#
+# @format: PMU event format.
+#
+# Since 10.1
+##
+{ 'union': 'KvmPmuFilterEvent',
+ 'base': { 'format': 'KvmPmuEventFormat' },
+ 'discriminator': 'format',
+ 'data': { 'raw': 'KvmPmuRawEvent' } }
+
+##
+# @KvmPmuFilterProperties:
+#
+# Properties of KVM PMU Filter.
+#
+# @action: action that KVM PMU filter will take for selected PMU events.
+#
+# @events: list of selected PMU events.
+#
+# Since 10.1
+##
+{ 'struct': 'KvmPmuFilterProperties',
+ 'data': { 'action': 'KvmPmuFilterAction',
+ '*events': ['KvmPmuFilterEvent'] } }
diff --git a/qapi/meson.build b/qapi/meson.build
index eadde4db307f..dba27ebc7489 100644
--- a/qapi/meson.build
+++ b/qapi/meson.build
@@ -37,6 +37,7 @@ qapi_all_modules = [
'error',
'introspect',
'job',
+ 'kvm',
'machine-common',
'machine',
'machine-target',
diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json
index c41c01eb2ab9..c7fed7940af7 100644
--- a/qapi/qapi-schema.json
+++ b/qapi/qapi-schema.json
@@ -66,6 +66,7 @@
{ 'include': 'compat.json' }
{ 'include': 'control.json' }
{ 'include': 'introspect.json' }
+{ 'include': 'accelerator.json' }
{ 'include': 'qom.json' }
{ 'include': 'qdev.json' }
{ 'include': 'machine-common.json' }
diff --git a/qapi/qom.json b/qapi/qom.json
index 28ce24cd8d08..517f4c06c260 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -8,6 +8,7 @@
{ 'include': 'block-core.json' }
{ 'include': 'common.json' }
{ 'include': 'crypto.json' }
+{ 'include': 'kvm.json' }
##
# = QEMU Object Model (QOM)
@@ -1108,6 +1109,7 @@
'if': 'CONFIG_LINUX' },
'iommufd',
'iothread',
+ 'kvm-pmu-filter',
'main-loop',
{ 'name': 'memory-backend-epc',
'if': 'CONFIG_LINUX' },
@@ -1183,6 +1185,7 @@
'if': 'CONFIG_LINUX' },
'iommufd': 'IOMMUFDProperties',
'iothread': 'IothreadProperties',
+ 'kvm-pmu-filter': 'KvmPmuFilterProperties',
'main-loop': 'MainLoopProperties',
'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties',
'if': 'CONFIG_LINUX' },
--
2.34.1
Philippe, there's a question for you on target-specific QAPI schema.
Zhao Liu <zhao1.liu@intel.com> writes:
> Introduce the kvm-pmu-filter object and support the PMU event with raw
> format.
>
> The raw format, as a native PMU event code representation, can be used
> for several architectures.
>
> Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
> Tested-by: Yi Lai <yi1.lai@intel.com>
[...]
> diff --git a/accel/kvm/kvm-pmu.c b/accel/kvm/kvm-pmu.c
> new file mode 100644
> index 000000000000..22f749bf9183
> --- /dev/null
> +++ b/accel/kvm/kvm-pmu.c
[...]
> +static const TypeInfo kvm_pmu_filter_info = {
> + .parent = TYPE_OBJECT,
> + .name = TYPE_KVM_PMU_FILTER,
> + .class_init = kvm_pmu_filter_class_init,
> + .instance_size = sizeof(KVMPMUFilter),
> + .instance_init = kvm_pmu_filter_instance_init,
> + .interfaces = (InterfaceInfo[]) {
> + { TYPE_USER_CREATABLE },
> + { }
> + }
> +};
> +
> +static void kvm_pmu_event_register_type(void)
> +{
> + type_register_static(&kvm_pmu_filter_info);
> +}
> +
> +type_init(kvm_pmu_event_register_type);
> diff --git a/accel/kvm/meson.build b/accel/kvm/meson.build
> index 397a1fe1fd1e..dfab2854f3a8 100644
> --- a/accel/kvm/meson.build
> +++ b/accel/kvm/meson.build
> @@ -2,6 +2,7 @@ kvm_ss = ss.source_set()
> kvm_ss.add(files(
> 'kvm-all.c',
> 'kvm-accel-ops.c',
> + 'kvm-pmu.c',
> ))
>
> specific_ss.add_all(when: 'CONFIG_KVM', if_true: kvm_ss)
The new file is compiled into the binary when CONFIG_KVM. Therefore,
object kvm-pmu-filter is available exactly then. Makes sense.
However, ...
[...]
> diff --git a/qapi/kvm.json b/qapi/kvm.json
> new file mode 100644
> index 000000000000..1861d86a9726
> --- /dev/null
> +++ b/qapi/kvm.json
> @@ -0,0 +1,84 @@
> +# -*- Mode: Python -*-
> +# vim: filetype=python
> +#
> +# Copyright (C) 2025 Intel Corporation.
> +#
> +# Author: Zhao Liu <zhao1.liu@intel.com>
> +#
> +# SPDX-License-Identifier: GPL-2.0-or-later
> +
> +##
> +# == KVM
> +##
> +
> +##
> +# === PMU stuff (KVM related)
> +##
> +
> +##
> +# @KvmPmuFilterAction:
> +#
> +# Actions that KVM PMU filter supports.
> +#
> +# @deny: disable the PMU event/counter in KVM PMU filter.
> +#
> +# @allow: enable the PMU event/counter in KVM PMU filter.
> +#
> +# Since 10.1
> +##
> +{ 'enum': 'KvmPmuFilterAction',
> + 'data': ['allow', 'deny'] }
> +
> +##
> +# @KvmPmuEventFormat:
> +#
> +# Encoding formats of PMU event that QEMU/KVM supports.
> +#
> +# @raw: the encoded event code that KVM can directly consume.
> +#
> +# Since 10.1
> +##
> +{ 'enum': 'KvmPmuEventFormat',
> + 'data': ['raw'] }
> +
> +##
> +# @KvmPmuRawEvent:
> +#
> +# Raw PMU event code.
> +#
> +# @code: the raw value that has been encoded, and QEMU could deliver
> +# to KVM directly.
> +#
> +# Since 10.1
> +##
> +{ 'struct': 'KvmPmuRawEvent',
> + 'data': { 'code': 'uint64' } }
> +
> +##
> +# @KvmPmuFilterEvent:
> +#
> +# PMU event filtered by KVM.
> +#
> +# @format: PMU event format.
> +#
> +# Since 10.1
> +##
> +{ 'union': 'KvmPmuFilterEvent',
> + 'base': { 'format': 'KvmPmuEventFormat' },
> + 'discriminator': 'format',
> + 'data': { 'raw': 'KvmPmuRawEvent' } }
> +
> +##
> +# @KvmPmuFilterProperties:
> +#
> +# Properties of KVM PMU Filter.
> +#
> +# @action: action that KVM PMU filter will take for selected PMU events.
> +#
> +# @events: list of selected PMU events.
> +#
> +# Since 10.1
> +##
> +{ 'struct': 'KvmPmuFilterProperties',
> + 'data': { 'action': 'KvmPmuFilterAction',
> + '*events': ['KvmPmuFilterEvent'] } }
... the QAPI schema doesn't reflect that.
To make it reflect, we'd have to add 'if': 'CONFIG_KVM'. Since
CONFIG_KVM can only be used in target-specific code, we'd have to put
the definitions in a target-specific schema module kvm-target.json.
This makes the headers generated for the module target-specific, which
can be inconvenient. Whether it's inconvenient here, I can't say.
I understand target-specific QAPI modules are problematic for the single
binary / heterogeneous machine work. Philippe, thoughts on this one?
[...]
Zhao Liu <zhao1.liu@intel.com> writes:
> Introduce the kvm-pmu-filter object and support the PMU event with raw
> format.
>
> The raw format, as a native PMU event code representation, can be used
> for several architectures.
>
> Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
> Tested-by: Yi Lai <yi1.lai@intel.com>
[...]
> diff --git a/qapi/accelerator.json b/qapi/accelerator.json
> new file mode 100644
> index 000000000000..1fe0d64be113
> --- /dev/null
> +++ b/qapi/accelerator.json
> @@ -0,0 +1,14 @@
> +# -*- Mode: Python -*-
> +# vim: filetype=python
> +#
> +# Copyright (C) 2025 Intel Corporation.
> +#
> +# Author: Zhao Liu <zhao1.liu@intel.com>
> +#
> +# SPDX-License-Identifier: GPL-2.0-or-later
> +
> +##
> +# = Accelerators
> +##
> +
> +{ 'include': 'kvm.json' }
> diff --git a/qapi/kvm.json b/qapi/kvm.json
> new file mode 100644
> index 000000000000..1861d86a9726
> --- /dev/null
> +++ b/qapi/kvm.json
> @@ -0,0 +1,84 @@
> +# -*- Mode: Python -*-
> +# vim: filetype=python
> +#
> +# Copyright (C) 2025 Intel Corporation.
> +#
> +# Author: Zhao Liu <zhao1.liu@intel.com>
> +#
> +# SPDX-License-Identifier: GPL-2.0-or-later
> +
> +##
> +# == KVM
> +##
There's KVM-specific stuff elsewhere in the schema. Some if of it
should probably be moved here. Can you have a look? This is not a
demand; it's fine if you can't. If you can: separate patch preceding
this one to create kvm.json and move stuff there.
> +
> +##
> +# === PMU stuff (KVM related)
The KVM subsection contains just this subsubsection. Awkward. Can we
do without this subsubsection now? We can always add it later, when we
have enough KVM stuff to warrant structuring it into subsubsections.
If we decide we want it:
# === KVM performance monitor unit (PMU)
> +##
> +
> +##
> +# @KvmPmuFilterAction:
> +#
> +# Actions that KVM PMU filter supports.
> +#
> +# @deny: disable the PMU event/counter in KVM PMU filter.
> +#
> +# @allow: enable the PMU event/counter in KVM PMU filter.
> +#
> +# Since 10.1
> +##
> +{ 'enum': 'KvmPmuFilterAction',
> + 'data': ['allow', 'deny'] }
> +
> +##
> +# @KvmPmuEventFormat:
Maybe KvmPmuFilterEventFormat? Or is that too long?
> +#
> +# Encoding formats of PMU event that QEMU/KVM supports.
> +#
> +# @raw: the encoded event code that KVM can directly consume.
Suggest
# @raw: raw KVM PMU event code.
> +#
> +# Since 10.1
> +##
> +{ 'enum': 'KvmPmuEventFormat',
> + 'data': ['raw'] }
> +
> +##
> +# @KvmPmuRawEvent:
Maybe KvmPmuFilterEventRaw? Or is that too long?
> +#
> +# Raw PMU event code.
> +#
> +# @code: the raw value that has been encoded, and QEMU could deliver
> +# to KVM directly.
Suggest
##
# @KvmPmuRawEvent
#
# @code: raw KVM PMU event code, to be passed verbatim to KVM.
> +#
> +# Since 10.1
> +##
> +{ 'struct': 'KvmPmuRawEvent',
> + 'data': { 'code': 'uint64' } }
> +
> +##
> +# @KvmPmuFilterEvent:
> +#
> +# PMU event filtered by KVM.
Suggest
# A KVM PMU event specification.
> +#
> +# @format: PMU event format.
> +#
> +# Since 10.1
> +##
> +{ 'union': 'KvmPmuFilterEvent',
> + 'base': { 'format': 'KvmPmuEventFormat' },
> + 'discriminator': 'format',
> + 'data': { 'raw': 'KvmPmuRawEvent' } }
> +
> +##
> +# @KvmPmuFilterProperties:
> +#
> +# Properties of KVM PMU Filter.
> +#
> +# @action: action that KVM PMU filter will take for selected PMU events.
> +#
> +# @events: list of selected PMU events.
Here's my try:
# Properties of kvm-pmu-filter objects. A kvm-pmu-filter object
# restricts the guest's access to the PMU with either an allowlist or
# a denylist.
#
# @action: whether @events is an allowlist or a denylist.
#
# @events: list of KVM PMU event specifications.
> +#
> +# Since 10.1
> +##
> +{ 'struct': 'KvmPmuFilterProperties',
> + 'data': { 'action': 'KvmPmuFilterAction',
> + '*events': ['KvmPmuFilterEvent'] } }
> diff --git a/qapi/meson.build b/qapi/meson.build
> index eadde4db307f..dba27ebc7489 100644
> --- a/qapi/meson.build
> +++ b/qapi/meson.build
> @@ -37,6 +37,7 @@ qapi_all_modules = [
> 'error',
> 'introspect',
> 'job',
> + 'kvm',
> 'machine-common',
> 'machine',
> 'machine-target',
> diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json
> index c41c01eb2ab9..c7fed7940af7 100644
> --- a/qapi/qapi-schema.json
> +++ b/qapi/qapi-schema.json
> @@ -66,6 +66,7 @@
> { 'include': 'compat.json' }
> { 'include': 'control.json' }
> { 'include': 'introspect.json' }
> +{ 'include': 'accelerator.json' }
> { 'include': 'qom.json' }
> { 'include': 'qdev.json' }
> { 'include': 'machine-common.json' }
> diff --git a/qapi/qom.json b/qapi/qom.json
> index 28ce24cd8d08..517f4c06c260 100644
> --- a/qapi/qom.json
> +++ b/qapi/qom.json
> @@ -8,6 +8,7 @@
> { 'include': 'block-core.json' }
> { 'include': 'common.json' }
> { 'include': 'crypto.json' }
> +{ 'include': 'kvm.json' }
>
> ##
> # = QEMU Object Model (QOM)
> @@ -1108,6 +1109,7 @@
> 'if': 'CONFIG_LINUX' },
> 'iommufd',
> 'iothread',
> + 'kvm-pmu-filter',
> 'main-loop',
> { 'name': 'memory-backend-epc',
> 'if': 'CONFIG_LINUX' },
> @@ -1183,6 +1185,7 @@
> 'if': 'CONFIG_LINUX' },
> 'iommufd': 'IOMMUFDProperties',
> 'iothread': 'IothreadProperties',
> + 'kvm-pmu-filter': 'KvmPmuFilterProperties',
> 'main-loop': 'MainLoopProperties',
> 'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties',
> 'if': 'CONFIG_LINUX' },
> > diff --git a/qapi/kvm.json b/qapi/kvm.json
> > new file mode 100644
> > index 000000000000..1861d86a9726
> > --- /dev/null
> > +++ b/qapi/kvm.json
> > @@ -0,0 +1,84 @@
> > +# -*- Mode: Python -*-
> > +# vim: filetype=python
> > +#
> > +# Copyright (C) 2025 Intel Corporation.
> > +#
> > +# Author: Zhao Liu <zhao1.liu@intel.com>
> > +#
> > +# SPDX-License-Identifier: GPL-2.0-or-later
> > +
> > +##
> > +# == KVM
> > +##
>
> There's KVM-specific stuff elsewhere in the schema. Some if of it
> should probably be moved here. Can you have a look? This is not a
> demand; it's fine if you can't. If you can: separate patch preceding
> this one to create kvm.json and move stuff there.
Sure! That's what I should have done, and I'll be back to follow up on
this discussion when I get something new.
> > +
> > +##
> > +# === PMU stuff (KVM related)
>
> The KVM subsection contains just this subsubsection. Awkward. Can we
> do without this subsubsection now? We can always add it later, when we
> have enough KVM stuff to warrant structuring it into subsubsections.
Thanks! I agree. As I commit to do above, if I find others about KVM,
we can add this subsection you suggested below :-).
> If we decide we want it:
>
> # === KVM performance monitor unit (PMU)
Good name.
> > +##
> > +
> > +##
> > +# @KvmPmuFilterAction:
> > +#
> > +# Actions that KVM PMU filter supports.
> > +#
> > +# @deny: disable the PMU event/counter in KVM PMU filter.
> > +#
> > +# @allow: enable the PMU event/counter in KVM PMU filter.
> > +#
> > +# Since 10.1
> > +##
> > +{ 'enum': 'KvmPmuFilterAction',
> > + 'data': ['allow', 'deny'] }
> > +
> > +##
> > +# @KvmPmuEventFormat:
>
> Maybe KvmPmuFilterEventFormat? Or is that too long?
For another 2 formats: 'x86-select-umask' and 'x86-masked-entry', their
enum value names already have 7 words:
- KVM_PMU_EVENT_FORMAT_X86_SELECT_UMASK
- KVM_PMU_EVENT_FORMAT_X86_MASKED_ENTRY
With "Filter" in name,
- KVM_PMU_FILTER_EVENT_FORMAT_X86_SELECT_UMASK
- KVM_PMU_FILTER_EVENT_FORMAT_X86_MASKED_ENTRY
Look still okay. I'll rename it.
> > +#
> > +# Encoding formats of PMU event that QEMU/KVM supports.
> > +#
> > +# @raw: the encoded event code that KVM can directly consume.
>
> Suggest
>
> # @raw: raw KVM PMU event code.
Concise. I agree.
> > +#
> > +# Since 10.1
> > +##
> > +{ 'enum': 'KvmPmuEventFormat',
> > + 'data': ['raw'] }
> > +
> > +##
> > +# @KvmPmuRawEvent:
>
> Maybe KvmPmuFilterEventRaw? Or is that too long?
KvmPmuFilterEventRaw is fine (not too long).
> > +#
> > +# Raw PMU event code.
> > +#
> > +# @code: the raw value that has been encoded, and QEMU could deliver
> > +# to KVM directly.
>
> Suggest
>
> ##
> # @KvmPmuRawEvent
> #
> # @code: raw KVM PMU event code, to be passed verbatim to KVM.
Thanks for polishing it up, it looks much better.
> > +#
> > +# Since 10.1
> > +##
> > +{ 'struct': 'KvmPmuRawEvent',
> > + 'data': { 'code': 'uint64' } }
> > +
> > +##
> > +# @KvmPmuFilterEvent:
> > +#
> > +# PMU event filtered by KVM.
>
> Suggest
>
> # A KVM PMU event specification.
Sure.
> > +#
> > +# @format: PMU event format.
> > +#
> > +# Since 10.1
> > +##
> > +{ 'union': 'KvmPmuFilterEvent',
> > + 'base': { 'format': 'KvmPmuEventFormat' },
> > + 'discriminator': 'format',
> > + 'data': { 'raw': 'KvmPmuRawEvent' } }
> > +
> > +##
> > +# @KvmPmuFilterProperties:
> > +#
> > +# Properties of KVM PMU Filter.
> > +#
> > +# @action: action that KVM PMU filter will take for selected PMU events.
> > +#
> > +# @events: list of selected PMU events.
>
> Here's my try:
>
> # Properties of kvm-pmu-filter objects. A kvm-pmu-filter object
> # restricts the guest's access to the PMU with either an allowlist or
> # a denylist.
> #
> # @action: whether @events is an allowlist or a denylist.
> #
> # @events: list of KVM PMU event specifications.
Thank you very much! Your description is very accurate.
Regards,
Zhao
Zhao Liu <zhao1.liu@intel.com> writes: > Introduce the kvm-pmu-filter object and support the PMU event with raw > format. Remind me, what does the kvm-pmu-filter object do, and why would we want to use it? > The raw format, as a native PMU event code representation, can be used > for several architectures. > > Signed-off-by: Zhao Liu <zhao1.liu@intel.com> > Tested-by: Yi Lai <yi1.lai@intel.com>
Hi Markus On Thu, Apr 10, 2025 at 04:21:01PM +0200, Markus Armbruster wrote: > Date: Thu, 10 Apr 2025 16:21:01 +0200 > From: Markus Armbruster <armbru@redhat.com> > Subject: Re: [PATCH 1/5] qapi/qom: Introduce kvm-pmu-filter object > > Zhao Liu <zhao1.liu@intel.com> writes: > > > Introduce the kvm-pmu-filter object and support the PMU event with raw > > format. > > Remind me, what does the kvm-pmu-filter object do, and why would we > want to use it? KVM PMU filter allows user space to set PMU event whitelist / blacklist for Guest. Both ARM and x86's KVMs accept a list of PMU events, and x86 also accpets other formats & fixed counter field. The earliest version uses custom parsing rules, which is not flexible enough to scale. So to support such complex configuration in cli, it's best to define and parse it via QAPI, and it's best to support the JSON way. Based on these considerations, I found "object" to be a suitable enough choice. Thus kvm-pmu-filter object handles all the complexity of parsing values from cli, and it can include some checks that QAPI cannot include (such as the 12-bit limit). Thanks, Zhao
Zhao Liu <zhao1.liu@intel.com> writes: > Hi Markus > > On Thu, Apr 10, 2025 at 04:21:01PM +0200, Markus Armbruster wrote: >> Date: Thu, 10 Apr 2025 16:21:01 +0200 >> From: Markus Armbruster <armbru@redhat.com> >> Subject: Re: [PATCH 1/5] qapi/qom: Introduce kvm-pmu-filter object >> >> Zhao Liu <zhao1.liu@intel.com> writes: >> >> > Introduce the kvm-pmu-filter object and support the PMU event with raw >> > format. >> >> Remind me, what does the kvm-pmu-filter object do, and why would we >> want to use it? > > KVM PMU filter allows user space to set PMU event whitelist / blacklist > for Guest. Both ARM and x86's KVMs accept a list of PMU events, and x86 > also accpets other formats & fixed counter field. But what does the system *do* with these event lists? > The earliest version uses custom parsing rules, which is not flexible > enough to scale. So to support such complex configuration in cli, it's > best to define and parse it via QAPI, and it's best to support the JSON > way. > > Based on these considerations, I found "object" to be a suitable enough > choice. > > Thus kvm-pmu-filter object handles all the complexity of parsing values > from cli, and it can include some checks that QAPI cannot include (such > as the 12-bit limit). > > Thanks, > Zhao
On Fri, Apr 11, 2025 at 06:38:35AM +0200, Markus Armbruster wrote: > Date: Fri, 11 Apr 2025 06:38:35 +0200 > From: Markus Armbruster <armbru@redhat.com> > Subject: Re: [PATCH 1/5] qapi/qom: Introduce kvm-pmu-filter object > > Zhao Liu <zhao1.liu@intel.com> writes: > > > Hi Markus > > > > On Thu, Apr 10, 2025 at 04:21:01PM +0200, Markus Armbruster wrote: > >> Date: Thu, 10 Apr 2025 16:21:01 +0200 > >> From: Markus Armbruster <armbru@redhat.com> > >> Subject: Re: [PATCH 1/5] qapi/qom: Introduce kvm-pmu-filter object > >> > >> Zhao Liu <zhao1.liu@intel.com> writes: > >> > >> > Introduce the kvm-pmu-filter object and support the PMU event with raw > >> > format. > >> > >> Remind me, what does the kvm-pmu-filter object do, and why would we > >> want to use it? > > > > KVM PMU filter allows user space to set PMU event whitelist / blacklist > > for Guest. Both ARM and x86's KVMs accept a list of PMU events, and x86 > > also accpets other formats & fixed counter field. > > But what does the system *do* with these event lists? This is for security purposes, and can restrict Guest users from accessing certain sensitive hardware information on the Host via perf or PMU counter. When a PMU event is blocked by KVM, Guest users can't get the corresponding event count via perf/PMU counter. EMM, if ‘system’ refers to the QEMU part, then QEMU is responsible for checking the format and passing the list to KVM. Thanks, Zhao
Zhao Liu <zhao1.liu@intel.com> writes: > On Fri, Apr 11, 2025 at 06:38:35AM +0200, Markus Armbruster wrote: >> Date: Fri, 11 Apr 2025 06:38:35 +0200 >> From: Markus Armbruster <armbru@redhat.com> >> Subject: Re: [PATCH 1/5] qapi/qom: Introduce kvm-pmu-filter object >> >> Zhao Liu <zhao1.liu@intel.com> writes: >> >> > Hi Markus >> > >> > On Thu, Apr 10, 2025 at 04:21:01PM +0200, Markus Armbruster wrote: >> >> Date: Thu, 10 Apr 2025 16:21:01 +0200 >> >> From: Markus Armbruster <armbru@redhat.com> >> >> Subject: Re: [PATCH 1/5] qapi/qom: Introduce kvm-pmu-filter object >> >> >> >> Zhao Liu <zhao1.liu@intel.com> writes: >> >> >> >> > Introduce the kvm-pmu-filter object and support the PMU event with raw >> >> > format. >> >> >> >> Remind me, what does the kvm-pmu-filter object do, and why would we >> >> want to use it? >> > >> > KVM PMU filter allows user space to set PMU event whitelist / blacklist >> > for Guest. Both ARM and x86's KVMs accept a list of PMU events, and x86 >> > also accpets other formats & fixed counter field. >> >> But what does the system *do* with these event lists? > > This is for security purposes, and can restrict Guest users from > accessing certain sensitive hardware information on the Host via perf or > PMU counter. > > When a PMU event is blocked by KVM, Guest users can't get the > corresponding event count via perf/PMU counter. > > EMM, if ‘system’ refers to the QEMU part, then QEMU is responsible > for checking the format and passing the list to KVM. > > Thanks, > Zhao This helped some, thanks. To make sure I got it: KVM can restrict the guest's access to the PMU. This is either a whitelist (guest can access exactly what's on this list), or a blacklist (guest can access exactly what's not this list). QEMU's kvm-pmu-filter object provides an interface to this KVM feature. KVM takes "raw" list entries: an entry is a number, and the number's meaning depends on the architecture. The kvm-pmu-filter object can take such entries, and passes them to straight to KVM. On x86, we commonly use two slightly higher level formats: select & umask, and masked. The kvm-pmu-filter object can take entries in either format, and maps them to "raw". Correct?
Hi Markus,
> > This is for security purposes, and can restrict Guest users from
> > accessing certain sensitive hardware information on the Host via perf or
> > PMU counter.
> >
> > When a PMU event is blocked by KVM, Guest users can't get the
> > corresponding event count via perf/PMU counter.
> >
> > EMM, if ‘system’ refers to the QEMU part, then QEMU is responsible
> > for checking the format and passing the list to KVM.
> >
> > Thanks,
> > Zhao
>
> This helped some, thanks. To make sure I got it:
>
> KVM can restrict the guest's access to the PMU. This is either a
> whitelist (guest can access exactly what's on this list), or a blacklist
> (guest can access exactly what's not this list).
Yes! The "action" field controls if it's a "whitelist" (allow) or
"blacklist" (deny).
And "access" means Guest could get the event count, if "no access", then
Guest would get nothing.
For example, if we set a the whitelist ony for the event (select: 0xc4,
umask: 0) in QEMU:
pmu='{"qom-type":"kvm-pmu-filter","id":"f0","action":"allow","events":[{"format":"x86-select-umask","select":196,"umask":0}]}'
then in Guest, this command tries to get count of 2 events:
perf stat -e cpu/event=0xc4,name=branches/,cpu/event=0xc5,name=branch-misses/ sleep 1
Since another event (select: 0xc5, umask: 0) is not on whitelist, its
"access" is blocked by KVM, so user would get the result like:
Performance counter stats for 'sleep 1':
348709 branches
0 branch-misses
1.015962921 seconds time elapsed
0.000000000 seconds user
0.015195000 seconds sys
The "allowed" event has the normal output, and the result of "denied"
event is zero.
> QEMU's kvm-pmu-filter object provides an interface to this KVM feature.
Yes!
> KVM takes "raw" list entries: an entry is a number, and the number's
> meaning depends on the architecture.
Yes, and meaning also depends on format. masked-entry format has special
meaning (with a flag).
> The kvm-pmu-filter object can take such entries, and passes them to
> straight to KVM.
>
> On x86, we commonly use two slightly higher level formats: select &
> umask, and masked. The kvm-pmu-filter object can take entries in either
> format, and maps them to "raw".
>
> Correct?
Yes, Markus, you're right! (And sorry for late reply.)
And "raw" format as a lower level format can be used for other arches
(e.g., ARM).
Thanks,
Zhao
Hi Zhao,
On 24/4/25 08:33, Zhao Liu wrote:
> Hi Markus,
>
>>> This is for security purposes, and can restrict Guest users from
>>> accessing certain sensitive hardware information on the Host via perf or
>>> PMU counter.
>>>
>>> When a PMU event is blocked by KVM, Guest users can't get the
>>> corresponding event count via perf/PMU counter.
>>>
>>> EMM, if ‘system’ refers to the QEMU part, then QEMU is responsible
>>> for checking the format and passing the list to KVM.
>>>
>>> Thanks,
>>> Zhao
>>
>> This helped some, thanks. To make sure I got it:
>>
>> KVM can restrict the guest's access to the PMU. This is either a
>> whitelist (guest can access exactly what's on this list), or a blacklist
>> (guest can access exactly what's not this list).
>
> Yes! The "action" field controls if it's a "whitelist" (allow) or
> "blacklist" (deny).
>
> And "access" means Guest could get the event count, if "no access", then
> Guest would get nothing.
>
> For example, if we set a the whitelist ony for the event (select: 0xc4,
> umask: 0) in QEMU:
>
> pmu='{"qom-type":"kvm-pmu-filter","id":"f0","action":"allow","events":[{"format":"x86-select-umask","select":196,"umask":0}]}'
>
> then in Guest, this command tries to get count of 2 events:
>
> perf stat -e cpu/event=0xc4,name=branches/,cpu/event=0xc5,name=branch-misses/ sleep 1
>
> Since another event (select: 0xc5, umask: 0) is not on whitelist, its
> "access" is blocked by KVM, so user would get the result like:
>
> Performance counter stats for 'sleep 1':
>
> 348709 branches
> 0 branch-misses
>
> 1.015962921 seconds time elapsed
>
> 0.000000000 seconds user
> 0.015195000 seconds sys
>
> The "allowed" event has the normal output, and the result of "denied"
> event is zero.
>
>> QEMU's kvm-pmu-filter object provides an interface to this KVM feature.
>
> Yes!
>
>> KVM takes "raw" list entries: an entry is a number, and the number's
>> meaning depends on the architecture.
>
> Yes, and meaning also depends on format. masked-entry format has special
> meaning (with a flag).
>
>> The kvm-pmu-filter object can take such entries, and passes them to
>> straight to KVM.
>>
>> On x86, we commonly use two slightly higher level formats: select &
>> umask, and masked. The kvm-pmu-filter object can take entries in either
>> format, and maps them to "raw".
>>
>> Correct?
>
> Yes, Markus, you're right! (And sorry for late reply.)
>
> And "raw" format as a lower level format can be used for other arches
> (e.g., ARM).
Since you provide the ability to use a raw format, are we sure other
accelerators will never be interested in such PMU filtering?
I'm pretty sure HVF could benefit of it (whether we implement it there
is another story).
What do you think about adding this as a generic accelerator feature.
If a particular accel doesn't support it and we ask to filter, we simply
report an error.
Hi Philip and Markus,
Let's discuss how to handle compilation for different architectures as
well as different accelerators here.
> > And "raw" format as a lower level format can be used for other arches
> > (e.g., ARM).
>
> Since you provide the ability to use a raw format, are we sure other
> accelerators will never be interested in such PMU filtering?
>
> I'm pretty sure HVF could benefit of it (whether we implement it there
> is another story).
Nice to know it could benefit more cases.
> What do you think about adding this as a generic accelerator feature.
I can implement pmu-filter directly at the "accel" level.
> If a particular accel doesn't support it and we ask to filter, we simply
> report an error.
One of the main issues is how to organize the QAPI scheme:
First we have a "qapi/accelerator.json" like current implementation to
provide:
##
# = Accelerators
##
Then we should have a "qapi/accelerator-target.json" (which will follows
qapi/accelerator.json in qapi-schema.json, just like machine.json &
machine-target.json), and place all pmu-filter related things in this
file with specify the compilation condition, for example:
{ 'struct': 'KvmPmuFilterProperties',
'data': { 'action': 'KvmPmuFilterAction',
'*x86-fixed-counter': 'uint32',
'*events': ['KvmPmuFilterEvent'] },
'if': 'CONFIG_KVM' }
In the future, this could be expanded to: 'if': { 'any': [ 'CONFIG_HVF', 'CONFIG_KVM' ] }.
I understand that there is no way to specify the architecture here,
because it is not possible to specify a combination case like
"TARGET_I386 & CONFIG_KVM", "TARGET_ARM & CONFIG_KVM", "TARGET_ARM & CONFIG_HVF"
(please educate me if such "if" condition be implemented in QAPI :-)).
So, I will put the arch-specific format check in pmu-filter.c by adding
arch macros as I mentioned in this reply:
https://lore.kernel.org/qemu-devel/aA3TeaYG9mNMdEiW@intel.com/
And there'll need accel-specific format check (for example, maksed-entry
is KVM specific, and it is not defined in x86 spec). I can check the
accel-specific format in the `check` hook of
object_class_property_add_link(), which links the pmu-filter object to
accelerator.
Do you like this idea?
Thanks,
Zhao
© 2016 - 2025 Red Hat, Inc.