From: Kan Liang <kan.liang@linux.intel.com>
Currently, exposing PMU capabilities to a KVM guest is done by emulating
guest PMCs via host perf events, i.e. by having KVM be "just" another user
of perf. As a result, the guest and host are effectively competing for
resources, and emulating guest accesses to vPMU resources requires
expensive actions (expensive relative to the native instruction). The
overhead and resource competition results in degraded guest performance
and ultimately very poor vPMU accuracy.
To address the issues with the perf-emulated vPMU, introduce a "mediated
vPMU", where the data plane (PMCs and enable/disable knobs) is exposed
directly to the guest, but the control plane (event selectors and access
to fixed counters) is managed by KVM (via MSR interceptions). To allow
host perf usage of the PMU to (partially) co-exist with KVM/guest usage
of the PMU, KVM and perf will coordinate to a world switch between host
perf context and guest vPMU context near VM-Enter/VM-Exit.
Add two exported APIs, perf_{create,release}_mediated_pmu(), to allow KVM
to create and release a mediated PMU instance (per VM). Because host perf
context will be deactivated while the guest is running, mediated PMU usage
will be mutually exclusive with perf analysis of the guest, i.e. perf
events that do NOT exclude the guest will not behave as expected.
To avoid silent failure of !exclude_guest perf events, disallow creating a
mediated PMU if there are active !exclude_guest events, and on the perf
side, disallowing creating new !exclude_guest perf events while there is
at least one active mediated PMU.
Exempt PMU resources that do not support mediated PMU usage, i.e. that are
outside the scope/view of KVM's vPMU and will not be swapped out while the
guest is running.
Guard mediated PMU with a new kconfig to help readers identify code paths
that are unique to mediated PMU support, and to allow for adding arch-
specific hooks without stubs. KVM x86 is expected to be the only KVM
architecture to support a mediated PMU in the near future (e.g. arm64 is
trending toward a partitioned PMU implementation), and KVM x86 will select
PERF_GUEST_MEDIATED_PMU unconditionally, i.e. won't need stubs.
Immediately select PERF_GUEST_MEDIATED_PMU when KVM x86 is enabled so that
all paths are compile tested. Full KVM support is on its way...
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Mingwei Zhang <mizhang@google.com>
[sean: add kconfig and WARNing, rewrite changelog, swizzle patch ordering]
Tested-by: Xudong Hao <xudong.hao@intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/kvm/Kconfig | 1 +
include/linux/perf_event.h | 6 +++
init/Kconfig | 4 ++
kernel/events/core.c | 82 ++++++++++++++++++++++++++++++++++++++
4 files changed, 93 insertions(+)
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 278f08194ec8..d916bd766c94 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -37,6 +37,7 @@ config KVM_X86
select SCHED_INFO
select PERF_EVENTS
select GUEST_PERF_EVENTS
+ select PERF_GUEST_MEDIATED_PMU
select HAVE_KVM_MSI
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_NO_POLL
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index fd1d91017b99..94f679634ef6 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -305,6 +305,7 @@ struct perf_event_pmu_context;
#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100
#define PERF_PMU_CAP_AUX_PAUSE 0x0200
#define PERF_PMU_CAP_AUX_PREFER_LARGE 0x0400
+#define PERF_PMU_CAP_MEDIATED_VPMU 0x0800
/**
* pmu::scope
@@ -1914,6 +1915,11 @@ extern int perf_event_account_interrupt(struct perf_event *event);
extern int perf_event_period(struct perf_event *event, u64 value);
extern u64 perf_event_pause(struct perf_event *event, bool reset);
+#ifdef CONFIG_PERF_GUEST_MEDIATED_PMU
+int perf_create_mediated_pmu(void);
+void perf_release_mediated_pmu(void);
+#endif
+
#else /* !CONFIG_PERF_EVENTS: */
static inline void *
diff --git a/init/Kconfig b/init/Kconfig
index cab3ad28ca49..45b9ac626829 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -2010,6 +2010,10 @@ config GUEST_PERF_EVENTS
bool
depends on HAVE_PERF_EVENTS
+config PERF_GUEST_MEDIATED_PMU
+ bool
+ depends on GUEST_PERF_EVENTS
+
config PERF_USE_VMALLOC
bool
help
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e34112df8b31..cfeea7d330f9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5657,6 +5657,8 @@ static void __free_event(struct perf_event *event)
call_rcu(&event->rcu_head, free_event_rcu);
}
+static void mediated_pmu_unaccount_event(struct perf_event *event);
+
DEFINE_FREE(__free_event, struct perf_event *, if (_T) __free_event(_T))
/* vs perf_event_alloc() success */
@@ -5666,6 +5668,7 @@ static void _free_event(struct perf_event *event)
irq_work_sync(&event->pending_disable_irq);
unaccount_event(event);
+ mediated_pmu_unaccount_event(event);
if (event->rb) {
/*
@@ -6188,6 +6191,81 @@ u64 perf_event_pause(struct perf_event *event, bool reset)
}
EXPORT_SYMBOL_GPL(perf_event_pause);
+#ifdef CONFIG_PERF_GUEST_MEDIATED_PMU
+static atomic_t nr_include_guest_events __read_mostly;
+
+static atomic_t nr_mediated_pmu_vms __read_mostly;
+static DEFINE_MUTEX(perf_mediated_pmu_mutex);
+
+/* !exclude_guest event of PMU with PERF_PMU_CAP_MEDIATED_VPMU */
+static inline bool is_include_guest_event(struct perf_event *event)
+{
+ if ((event->pmu->capabilities & PERF_PMU_CAP_MEDIATED_VPMU) &&
+ !event->attr.exclude_guest)
+ return true;
+
+ return false;
+}
+
+static int mediated_pmu_account_event(struct perf_event *event)
+{
+ if (!is_include_guest_event(event))
+ return 0;
+
+ guard(mutex)(&perf_mediated_pmu_mutex);
+
+ if (atomic_read(&nr_mediated_pmu_vms))
+ return -EOPNOTSUPP;
+
+ atomic_inc(&nr_include_guest_events);
+ return 0;
+}
+
+static void mediated_pmu_unaccount_event(struct perf_event *event)
+{
+ if (!is_include_guest_event(event))
+ return;
+
+ atomic_dec(&nr_include_guest_events);
+}
+
+/*
+ * Currently invoked at VM creation to
+ * - Check whether there are existing !exclude_guest events of PMU with
+ * PERF_PMU_CAP_MEDIATED_VPMU
+ * - Set nr_mediated_pmu_vms to prevent !exclude_guest event creation on
+ * PMUs with PERF_PMU_CAP_MEDIATED_VPMU
+ *
+ * No impact for the PMU without PERF_PMU_CAP_MEDIATED_VPMU. The perf
+ * still owns all the PMU resources.
+ */
+int perf_create_mediated_pmu(void)
+{
+ guard(mutex)(&perf_mediated_pmu_mutex);
+ if (atomic_inc_not_zero(&nr_mediated_pmu_vms))
+ return 0;
+
+ if (atomic_read(&nr_include_guest_events))
+ return -EBUSY;
+
+ atomic_inc(&nr_mediated_pmu_vms);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(perf_create_mediated_pmu);
+
+void perf_release_mediated_pmu(void)
+{
+ if (WARN_ON_ONCE(!atomic_read(&nr_mediated_pmu_vms)))
+ return;
+
+ atomic_dec(&nr_mediated_pmu_vms);
+}
+EXPORT_SYMBOL_GPL(perf_release_mediated_pmu);
+#else
+static int mediated_pmu_account_event(struct perf_event *event) { return 0; }
+static void mediated_pmu_unaccount_event(struct perf_event *event) {}
+#endif
+
/*
* Holding the top-level event's child_mutex means that any
* descendant process that has inherited this event will block
@@ -13078,6 +13156,10 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
if (err)
return ERR_PTR(err);
+ err = mediated_pmu_account_event(event);
+ if (err)
+ return ERR_PTR(err);
+
/* symmetric to unaccount_event() in _free_event() */
account_event(event);
--
2.52.0.223.gf5cc29aaa4-goog
On Fri, Dec 05, 2025 at 04:16:40PM -0800, Sean Christopherson wrote:
> +static atomic_t nr_include_guest_events __read_mostly;
> +
> +static atomic_t nr_mediated_pmu_vms __read_mostly;
> +static DEFINE_MUTEX(perf_mediated_pmu_mutex);
> +static int mediated_pmu_account_event(struct perf_event *event)
> +{
> + if (!is_include_guest_event(event))
> + return 0;
> +
> + guard(mutex)(&perf_mediated_pmu_mutex);
> +
> + if (atomic_read(&nr_mediated_pmu_vms))
> + return -EOPNOTSUPP;
> +
> + atomic_inc(&nr_include_guest_events);
> + return 0;
> +}
> +
> +static void mediated_pmu_unaccount_event(struct perf_event *event)
> +{
> + if (!is_include_guest_event(event))
> + return;
> +
> + atomic_dec(&nr_include_guest_events);
> +}
> +int perf_create_mediated_pmu(void)
> +{
> + guard(mutex)(&perf_mediated_pmu_mutex);
> + if (atomic_inc_not_zero(&nr_mediated_pmu_vms))
> + return 0;
> +
> + if (atomic_read(&nr_include_guest_events))
> + return -EBUSY;
> +
> + atomic_inc(&nr_mediated_pmu_vms);
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(perf_create_mediated_pmu);
> +
> +void perf_release_mediated_pmu(void)
> +{
> + if (WARN_ON_ONCE(!atomic_read(&nr_mediated_pmu_vms)))
> + return;
> +
> + atomic_dec(&nr_mediated_pmu_vms);
> +}
> +EXPORT_SYMBOL_GPL(perf_release_mediated_pmu);
These two things are supposed to be symmetric, but are implemented
differently; what gives?
That is, should not both have the general shape:
if (atomic_inc_not_zero(&A))
return 0;
guard(mutex)(&lock);
if (atomic_read(&B))
return -EBUSY;
atomic_inc(&A);
return 0;
Similarly, I would imagine both release variants to have the underflow
warn on like:
if (WARN_ON_ONCE(!atomic_read(&A)))
return;
atomic_dec(&A);
Hmm?
Also, EXPORT_SYMBOL_FOR_KVM() ?
I can make these edits when applying, if/when we get to applying. Let me
continue reading.
On Mon, Dec 08, 2025, Peter Zijlstra wrote:
> On Fri, Dec 05, 2025 at 04:16:40PM -0800, Sean Christopherson wrote:
>
> > +static atomic_t nr_include_guest_events __read_mostly;
> > +
> > +static atomic_t nr_mediated_pmu_vms __read_mostly;
> > +static DEFINE_MUTEX(perf_mediated_pmu_mutex);
>
> > +static int mediated_pmu_account_event(struct perf_event *event)
> > +{
> > + if (!is_include_guest_event(event))
> > + return 0;
> > +
> > + guard(mutex)(&perf_mediated_pmu_mutex);
> > +
> > + if (atomic_read(&nr_mediated_pmu_vms))
> > + return -EOPNOTSUPP;
> > +
> > + atomic_inc(&nr_include_guest_events);
> > + return 0;
> > +}
> > +
> > +static void mediated_pmu_unaccount_event(struct perf_event *event)
> > +{
> > + if (!is_include_guest_event(event))
> > + return;
> > +
> > + atomic_dec(&nr_include_guest_events);
> > +}
>
> > +int perf_create_mediated_pmu(void)
> > +{
> > + guard(mutex)(&perf_mediated_pmu_mutex);
> > + if (atomic_inc_not_zero(&nr_mediated_pmu_vms))
> > + return 0;
> > +
> > + if (atomic_read(&nr_include_guest_events))
> > + return -EBUSY;
> > +
> > + atomic_inc(&nr_mediated_pmu_vms);
> > + return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(perf_create_mediated_pmu);
> > +
> > +void perf_release_mediated_pmu(void)
> > +{
> > + if (WARN_ON_ONCE(!atomic_read(&nr_mediated_pmu_vms)))
> > + return;
> > +
> > + atomic_dec(&nr_mediated_pmu_vms);
> > +}
> > +EXPORT_SYMBOL_GPL(perf_release_mediated_pmu);
>
> These two things are supposed to be symmetric, but are implemented
> differently; what gives?
>
> That is, should not both have the general shape:
>
> if (atomic_inc_not_zero(&A))
> return 0;
>
> guard(mutex)(&lock);
>
> if (atomic_read(&B))
> return -EBUSY;
>
> atomic_inc(&A);
> return 0;
>
> Similarly, I would imagine both release variants to have the underflow
> warn on like:
>
> if (WARN_ON_ONCE(!atomic_read(&A)))
> return;
>
> atomic_dec(&A);
>
> Hmm?
IIUC, you're suggesting someting like this? If so, that makes perfect sense to me.
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c6368c64b866..fa2e7b722283 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6356,7 +6356,8 @@ static int mediated_pmu_account_event(struct perf_event *event)
static void mediated_pmu_unaccount_event(struct perf_event *event)
{
- if (!is_include_guest_event(event))
+ if (!is_include_guest_event(event) ||
+ WARN_ON_ONCE(!atomic_read(&nr_include_guest_events)))
return;
atomic_dec(&nr_include_guest_events);
> Also, EXPORT_SYMBOL_FOR_KVM() ?
Ya, for sure. I posted this against a branch without EXPORT_SYMBOL_FOR_KVM(),
because there are also hard dependencies on the for-6.19 KVM pull requests, and
I didn't want to wait to post until 6.19-rc1 because of the impending winter
break. Though I also simply forgot about these exports :-(
These could also use EXPORT_SYMBOL_FOR_KVM():
EXPORT_SYMBOL_FOR_MODULES(perf_load_guest_lvtpc, "kvm");
EXPORT_SYMBOL_FOR_MODULES(perf_put_guest_lvtpc, "kvm");
> I can make these edits when applying, if/when we get to applying. Let me
> continue reading.
>
The following commit has been merged into the perf/core branch of tip:
Commit-ID: 23faa33d88df6d126cebd3121ea2cff3586e7b95
Gitweb: https://git.kernel.org/tip/23faa33d88df6d126cebd3121ea2cff3586e7b95
Author: Peter Zijlstra <peterz@infradead.org>
AuthorDate: Wed, 17 Dec 2025 13:23:59 +01:00
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 17 Dec 2025 13:31:09 +01:00
perf: Use EXPORT_SYMBOL_FOR_KVM() for the mediated APIs
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20251208115156.GE3707891@noisy.programming.kicks-ass.net
---
arch/x86/events/core.c | 5 +++--
kernel/events/core.c | 5 +++--
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index df7a32b..0ecac94 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -30,6 +30,7 @@
#include <linux/device.h>
#include <linux/nospec.h>
#include <linux/static_call.h>
+#include <linux/kvm_types.h>
#include <asm/apic.h>
#include <asm/stacktrace.h>
@@ -1771,14 +1772,14 @@ void perf_load_guest_lvtpc(u32 guest_lvtpc)
APIC_DM_FIXED | PERF_GUEST_MEDIATED_PMI_VECTOR | masked);
this_cpu_write(guest_lvtpc_loaded, true);
}
-EXPORT_SYMBOL_FOR_MODULES(perf_load_guest_lvtpc, "kvm");
+EXPORT_SYMBOL_FOR_KVM(perf_load_guest_lvtpc);
void perf_put_guest_lvtpc(void)
{
this_cpu_write(guest_lvtpc_loaded, false);
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
-EXPORT_SYMBOL_FOR_MODULES(perf_put_guest_lvtpc, "kvm");
+EXPORT_SYMBOL_FOR_KVM(perf_put_guest_lvtpc);
#endif /* CONFIG_PERF_GUEST_MEDIATED_PMU */
static int
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e6a4b1e..376fb07 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -57,6 +57,7 @@
#include <linux/task_work.h>
#include <linux/percpu-rwsem.h>
#include <linux/unwind_deferred.h>
+#include <linux/kvm_types.h>
#include "internal.h"
@@ -6388,7 +6389,7 @@ int perf_create_mediated_pmu(void)
atomic_inc(&nr_mediated_pmu_vms);
return 0;
}
-EXPORT_SYMBOL_GPL(perf_create_mediated_pmu);
+EXPORT_SYMBOL_FOR_KVM(perf_create_mediated_pmu);
void perf_release_mediated_pmu(void)
{
@@ -6397,7 +6398,7 @@ void perf_release_mediated_pmu(void)
atomic_dec(&nr_mediated_pmu_vms);
}
-EXPORT_SYMBOL_GPL(perf_release_mediated_pmu);
+EXPORT_SYMBOL_FOR_KVM(perf_release_mediated_pmu);
/* When loading a guest's mediated PMU, schedule out all exclude_guest events. */
void perf_load_guest_context(void)
On Wed, Dec 17, 2025 at 12:37:57PM -0000, tip-bot2 for Peter Zijlstra wrote:
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index e6a4b1e..376fb07 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -57,6 +57,7 @@
> #include <linux/task_work.h>
> #include <linux/percpu-rwsem.h>
> #include <linux/unwind_deferred.h>
> +#include <linux/kvm_types.h>
>
> #include "internal.h"
>
> @@ -6388,7 +6389,7 @@ int perf_create_mediated_pmu(void)
> atomic_inc(&nr_mediated_pmu_vms);
> return 0;
> }
> -EXPORT_SYMBOL_GPL(perf_create_mediated_pmu);
> +EXPORT_SYMBOL_FOR_KVM(perf_create_mediated_pmu);
>
> void perf_release_mediated_pmu(void)
> {
> @@ -6397,7 +6398,7 @@ void perf_release_mediated_pmu(void)
>
> atomic_dec(&nr_mediated_pmu_vms);
> }
> -EXPORT_SYMBOL_GPL(perf_release_mediated_pmu);
> +EXPORT_SYMBOL_FOR_KVM(perf_release_mediated_pmu);
>
> /* When loading a guest's mediated PMU, schedule out all exclude_guest events. */
> void perf_load_guest_context(void)
Bah, so the !KVM architectures hate on this.
Sean, would something like this be acceptable?
---
Subject: kvm: Fix linux/kvm_types.h for !KVM architectures
As is, <linux/kvm_types.h> hard relies on architectures having
<asm/kvm_types.h> which (obviously) breaks for architectures that don't
have KVM support.
This means generic code (kernel/events/ in this case) cannot use
EXPORT_SYMBOL_FOR_KVM().
Rearrange things just so that <linux/kvm_types.h> becomes usable and
provides the (expected) empty stub for EXPORT_SYMBOL_FOR_KVM() for !KVM.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index a568d8e6f4e8..a4cc13e41eec 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -6,6 +6,8 @@
#include <linux/bits.h>
#include <linux/export.h>
#include <linux/types.h>
+
+#ifdef CONFIG_KVM
#include <asm/kvm_types.h>
#ifdef KVM_SUB_MODULES
@@ -20,13 +22,14 @@
* if there are no submodules, e.g. to allow suppressing exports if KVM=m, but
* kvm.ko won't actually be built (due to lack of at least one submodule).
*/
-#ifndef EXPORT_SYMBOL_FOR_KVM
-#if IS_MODULE(CONFIG_KVM)
+#if defined(EXPORT_SYMBOL_FOR_KVM) && IS_MODULE(CONFIG_KVM)
#define EXPORT_SYMBOL_FOR_KVM(symbol) EXPORT_SYMBOL_FOR_MODULES(symbol, "kvm")
-#else
-#define EXPORT_SYMBOL_FOR_KVM(symbol)
#endif /* IS_MODULE(CONFIG_KVM) */
-#endif /* EXPORT_SYMBOL_FOR_KVM */
+#endif /* KVM_SUB_MODULES */
+#endif
+
+#ifndef EXPORT_SYMBOL_FOR_KVM
+#define EXPORTEXPORT_SYMBOL_FOR_KVM(symbol)
#endif
#ifndef __ASSEMBLER__
On Thu, Dec 18, 2025 at 09:31:09AM +0100, Peter Zijlstra wrote:
> On Wed, Dec 17, 2025 at 12:37:57PM -0000, tip-bot2 for Peter Zijlstra wrote:
> > diff --git a/kernel/events/core.c b/kernel/events/core.c
> > index e6a4b1e..376fb07 100644
> > --- a/kernel/events/core.c
> > +++ b/kernel/events/core.c
> > @@ -57,6 +57,7 @@
> > #include <linux/task_work.h>
> > #include <linux/percpu-rwsem.h>
> > #include <linux/unwind_deferred.h>
> > +#include <linux/kvm_types.h>
> >
> > #include "internal.h"
> >
> > @@ -6388,7 +6389,7 @@ int perf_create_mediated_pmu(void)
> > atomic_inc(&nr_mediated_pmu_vms);
> > return 0;
> > }
> > -EXPORT_SYMBOL_GPL(perf_create_mediated_pmu);
> > +EXPORT_SYMBOL_FOR_KVM(perf_create_mediated_pmu);
> >
> > void perf_release_mediated_pmu(void)
> > {
> > @@ -6397,7 +6398,7 @@ void perf_release_mediated_pmu(void)
> >
> > atomic_dec(&nr_mediated_pmu_vms);
> > }
> > -EXPORT_SYMBOL_GPL(perf_release_mediated_pmu);
> > +EXPORT_SYMBOL_FOR_KVM(perf_release_mediated_pmu);
> >
> > /* When loading a guest's mediated PMU, schedule out all exclude_guest events. */
> > void perf_load_guest_context(void)
>
> Bah, so the !KVM architectures hate on this.
>
> Sean, would something like this be acceptable?
Hmm, the other option is doing something like so:
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 376fb07d869b..014d832e8eaa 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -57,7 +57,6 @@
#include <linux/task_work.h>
#include <linux/percpu-rwsem.h>
#include <linux/unwind_deferred.h>
-#include <linux/kvm_types.h>
#include "internal.h"
@@ -6325,6 +6324,8 @@ u64 perf_event_pause(struct perf_event *event, bool reset)
EXPORT_SYMBOL_GPL(perf_event_pause);
#ifdef CONFIG_PERF_GUEST_MEDIATED_PMU
+#include <linux/kvm_types.h>
+
static atomic_t nr_include_guest_events __read_mostly;
static atomic_t nr_mediated_pmu_vms __read_mostly;
> ---
> Subject: kvm: Fix linux/kvm_types.h for !KVM architectures
>
> As is, <linux/kvm_types.h> hard relies on architectures having
> <asm/kvm_types.h> which (obviously) breaks for architectures that don't
> have KVM support.
>
> This means generic code (kernel/events/ in this case) cannot use
> EXPORT_SYMBOL_FOR_KVM().
>
> Rearrange things just so that <linux/kvm_types.h> becomes usable and
> provides the (expected) empty stub for EXPORT_SYMBOL_FOR_KVM() for !KVM.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
> diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> index a568d8e6f4e8..a4cc13e41eec 100644
> --- a/include/linux/kvm_types.h
> +++ b/include/linux/kvm_types.h
> @@ -6,6 +6,8 @@
> #include <linux/bits.h>
> #include <linux/export.h>
> #include <linux/types.h>
> +
> +#ifdef CONFIG_KVM
> #include <asm/kvm_types.h>
>
> #ifdef KVM_SUB_MODULES
> @@ -20,13 +22,14 @@
> * if there are no submodules, e.g. to allow suppressing exports if KVM=m, but
> * kvm.ko won't actually be built (due to lack of at least one submodule).
> */
> -#ifndef EXPORT_SYMBOL_FOR_KVM
> -#if IS_MODULE(CONFIG_KVM)
> +#if defined(EXPORT_SYMBOL_FOR_KVM) && IS_MODULE(CONFIG_KVM)
> #define EXPORT_SYMBOL_FOR_KVM(symbol) EXPORT_SYMBOL_FOR_MODULES(symbol, "kvm")
> -#else
> -#define EXPORT_SYMBOL_FOR_KVM(symbol)
> #endif /* IS_MODULE(CONFIG_KVM) */
> -#endif /* EXPORT_SYMBOL_FOR_KVM */
> +#endif /* KVM_SUB_MODULES */
> +#endif
> +
> +#ifndef EXPORT_SYMBOL_FOR_KVM
> +#define EXPORTEXPORT_SYMBOL_FOR_KVM(symbol)
> #endif
>
> #ifndef __ASSEMBLER__
On Thu, Dec 18, 2025, Peter Zijlstra wrote:
> On Thu, Dec 18, 2025 at 09:31:09AM +0100, Peter Zijlstra wrote:
> > On Wed, Dec 17, 2025 at 12:37:57PM -0000, tip-bot2 for Peter Zijlstra wrote:
> > > diff --git a/kernel/events/core.c b/kernel/events/core.c
> > > index e6a4b1e..376fb07 100644
> > > --- a/kernel/events/core.c
> > > +++ b/kernel/events/core.c
> > > @@ -57,6 +57,7 @@
> > > #include <linux/task_work.h>
> > > #include <linux/percpu-rwsem.h>
> > > #include <linux/unwind_deferred.h>
> > > +#include <linux/kvm_types.h>
> > Bah, so the !KVM architectures hate on this.
> >
> > Sean, would something like this be acceptable?
>
> Hmm, the other option is doing something like so:
>
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 376fb07d869b..014d832e8eaa 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -57,7 +57,6 @@
> #include <linux/task_work.h>
> #include <linux/percpu-rwsem.h>
> #include <linux/unwind_deferred.h>
> -#include <linux/kvm_types.h>
>
> #include "internal.h"
>
> @@ -6325,6 +6324,8 @@ u64 perf_event_pause(struct perf_event *event, bool reset)
> EXPORT_SYMBOL_GPL(perf_event_pause);
>
> #ifdef CONFIG_PERF_GUEST_MEDIATED_PMU
> +#include <linux/kvm_types.h>
Hrm, quick and dirty, but I don't love the idea of punting on the underlying
issue, because not being able to include kvm_types.h will be a big deterrent to
using EXPORT_SYMBOL_FOR_KVM().
> static atomic_t nr_include_guest_events __read_mostly;
>
> static atomic_t nr_mediated_pmu_vms __read_mostly;
>
> > ---
> > Subject: kvm: Fix linux/kvm_types.h for !KVM architectures
> >
> > As is, <linux/kvm_types.h> hard relies on architectures having
> > <asm/kvm_types.h> which (obviously) breaks for architectures that don't
> > have KVM support.
> >
> > This means generic code (kernel/events/ in this case) cannot use
> > EXPORT_SYMBOL_FOR_KVM().
> >
> > Rearrange things just so that <linux/kvm_types.h> becomes usable and
> > provides the (expected) empty stub for EXPORT_SYMBOL_FOR_KVM() for !KVM.
> >
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > ---
> > diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> > index a568d8e6f4e8..a4cc13e41eec 100644
> > --- a/include/linux/kvm_types.h
> > +++ b/include/linux/kvm_types.h
> > @@ -6,6 +6,8 @@
> > #include <linux/bits.h>
> > #include <linux/export.h>
> > #include <linux/types.h>
> > +
> > +#ifdef CONFIG_KVM
> > #include <asm/kvm_types.h>
This won't work, because asm/kvm_types.h #defines KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE,
which guards the "struct kvm_mmu_memory_cache" definition. E.g. on x86 with
CONFIG_KVM=n, that yields errors like:
In file included from include/linux/kvm_host.h:45,
from arch/x86/events/intel/core.c:17:
arch/x86/include/asm/kvm_host.h:854:37: error: field ‘mmu_pte_list_desc_cache’ has incomplete type
854 | struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
| ^~~~~~~~~~~~~~~~~~~~~~~
In general, I'm hesitant to guard an include with a conditional Kconfig, precisely
because doing so has a tendency to result in wonky, config-specific build errors.
Rather than gate the check on KVM being enabled, what if we restrict the asm
include to architectures that support KVM in any capacity? Alternatively, we
could add a HAVE_KVM, but I'd rather not add HAVE_KVM, because then we'll end up
with the same mess if architectures get clever and conditionally select HAVE_KVM
(IIRC, that's exactly what happened when HAVE_KVM was a thing in the past).
Compiled tested on all KVM architectures along with csky (and an include of
kvm_types.h in init/main.c).
--
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 18 Dec 2025 15:47:59 +0000
Subject: [PATCH] KVM: Allow linux/kvm_types.h to be included on non-KVM
architectures
Include the arch-defined asm/kvm_types.h if and only if the kernel is
being compiled for an architecture that supports KVM so that kvm_types.h
can be included in generic code without having to guard _those_ includes,
and without having to add "generic-y += kvm_types.h" for all architectures
that don't support KVM.
Assert that KVM=n if asm/kvm_types.h isn't included to provide a more
helpful error message if an arch name changes (highly unlikely) or a new
arch that supports KVM comes along.
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
include/linux/kvm_types.h | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index a568d8e6f4e8..797721e298df 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -6,7 +6,23 @@
#include <linux/bits.h>
#include <linux/export.h>
#include <linux/types.h>
+
+/*
+ * Include the arch-defined kvm_types.h if and only if the target architecture
+ * supports KVM, so that linux/kvm_types.h can be included in generic code
+ * without requiring _all_ architectures to add generic-y += kvm_types.h.
+ */
+#if defined(CONFIG_ARM64) || \
+ defined(CONFIG_LOONGARCH) || \
+ defined(CONFIG_MIPS) || \
+ defined(CONFIG_PPC) || \
+ defined(CONFIG_RISCV) || \
+ defined(CONFIG_S390) || \
+ defined(CONFIG_X86)
#include <asm/kvm_types.h>
+#else
+static_assert(!IS_ENABLED(CONFIG_KVM));
+#endif
#ifdef KVM_SUB_MODULES
#define EXPORT_SYMBOL_FOR_KVM_INTERNAL(symbol) \
base-commit: 8f0b4cce4481fb22653697cced8d0d04027cb1e8
--
On Thu, Dec 18, 2025 at 10:40:51AM -0800, Sean Christopherson wrote: > Include the arch-defined asm/kvm_types.h if and only if the kernel is > being compiled for an architecture that supports KVM so that kvm_types.h > can be included in generic code without having to guard _those_ includes, > and without having to add "generic-y += kvm_types.h" for all architectures > that don't support KVM. Something jogged my brain and the below seems to work for the few architectures I've tried. Let me update the patch and see if the build robot still finds fail. --- diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 295c94a3ccc1..9aff61e7b8f2 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -32,6 +32,7 @@ mandatory-y += irq_work.h mandatory-y += kdebug.h mandatory-y += kmap_size.h mandatory-y += kprobes.h +mandatory-y += kvm_types.h mandatory-y += linkage.h mandatory-y += local.h mandatory-y += local64.h
On Fri, Dec 19, 2025, Peter Zijlstra wrote: > On Thu, Dec 18, 2025 at 10:40:51AM -0800, Sean Christopherson wrote: > > > > Include the arch-defined asm/kvm_types.h if and only if the kernel is > > being compiled for an architecture that supports KVM so that kvm_types.h > > can be included in generic code without having to guard _those_ includes, > > and without having to add "generic-y += kvm_types.h" for all architectures > > that don't support KVM. > > Something jogged my brain and the below seems to work for the few > architectures I've tried. Let me update the patch and see if the build > robot still finds fail. Nice! Works on my end as well. Just when I think I've learned most of the build system's tricks... > --- > diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild > index 295c94a3ccc1..9aff61e7b8f2 100644 > --- a/include/asm-generic/Kbuild > +++ b/include/asm-generic/Kbuild > @@ -32,6 +32,7 @@ mandatory-y += irq_work.h > mandatory-y += kdebug.h > mandatory-y += kmap_size.h > mandatory-y += kprobes.h > +mandatory-y += kvm_types.h > mandatory-y += linkage.h > mandatory-y += local.h > mandatory-y += local64.h
The following commit has been merged into the perf/core branch of tip:
Commit-ID: 01122b89361e565b3c88b9fbebe92dc5c7420cb7
Gitweb: https://git.kernel.org/tip/01122b89361e565b3c88b9fbebe92dc5c7420cb7
Author: Peter Zijlstra <peterz@infradead.org>
AuthorDate: Wed, 17 Dec 2025 13:23:59 +01:00
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Fri, 19 Dec 2025 08:54:59 +01:00
perf: Use EXPORT_SYMBOL_FOR_KVM() for the mediated APIs
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20251208115156.GE3707891@noisy.programming.kicks-ass.net
---
arch/x86/events/core.c | 5 +++--
include/asm-generic/Kbuild | 1 +
kernel/events/core.c | 5 +++--
3 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index df7a32b..0ecac94 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -30,6 +30,7 @@
#include <linux/device.h>
#include <linux/nospec.h>
#include <linux/static_call.h>
+#include <linux/kvm_types.h>
#include <asm/apic.h>
#include <asm/stacktrace.h>
@@ -1771,14 +1772,14 @@ void perf_load_guest_lvtpc(u32 guest_lvtpc)
APIC_DM_FIXED | PERF_GUEST_MEDIATED_PMI_VECTOR | masked);
this_cpu_write(guest_lvtpc_loaded, true);
}
-EXPORT_SYMBOL_FOR_MODULES(perf_load_guest_lvtpc, "kvm");
+EXPORT_SYMBOL_FOR_KVM(perf_load_guest_lvtpc);
void perf_put_guest_lvtpc(void)
{
this_cpu_write(guest_lvtpc_loaded, false);
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
-EXPORT_SYMBOL_FOR_MODULES(perf_put_guest_lvtpc, "kvm");
+EXPORT_SYMBOL_FOR_KVM(perf_put_guest_lvtpc);
#endif /* CONFIG_PERF_GUEST_MEDIATED_PMU */
static int
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index 295c94a..9aff61e 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -32,6 +32,7 @@ mandatory-y += irq_work.h
mandatory-y += kdebug.h
mandatory-y += kmap_size.h
mandatory-y += kprobes.h
+mandatory-y += kvm_types.h
mandatory-y += linkage.h
mandatory-y += local.h
mandatory-y += local64.h
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e6a4b1e..376fb07 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -57,6 +57,7 @@
#include <linux/task_work.h>
#include <linux/percpu-rwsem.h>
#include <linux/unwind_deferred.h>
+#include <linux/kvm_types.h>
#include "internal.h"
@@ -6388,7 +6389,7 @@ int perf_create_mediated_pmu(void)
atomic_inc(&nr_mediated_pmu_vms);
return 0;
}
-EXPORT_SYMBOL_GPL(perf_create_mediated_pmu);
+EXPORT_SYMBOL_FOR_KVM(perf_create_mediated_pmu);
void perf_release_mediated_pmu(void)
{
@@ -6397,7 +6398,7 @@ void perf_release_mediated_pmu(void)
atomic_dec(&nr_mediated_pmu_vms);
}
-EXPORT_SYMBOL_GPL(perf_release_mediated_pmu);
+EXPORT_SYMBOL_FOR_KVM(perf_release_mediated_pmu);
/* When loading a guest's mediated PMU, schedule out all exclude_guest events. */
void perf_load_guest_context(void)
The following commit has been merged into the perf/core branch of tip:
Commit-ID: 3cb3c2f6886f9489df13de8efe7a1e803a3f21ea
Gitweb: https://git.kernel.org/tip/3cb3c2f6886f9489df13de8efe7a1e803a3f21ea
Author: Peter Zijlstra <peterz@infradead.org>
AuthorDate: Wed, 17 Dec 2025 12:08:01 +01:00
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 17 Dec 2025 13:31:09 +01:00
perf: Clean up mediated vPMU accounting
The mediated_pmu_account_event() and perf_create_mediated_pmu()
functions implement the exclusion between '!exclude_guest' counters
and mediated vPMUs. Their implementation is basically identical,
except mirrored in what they count/check.
Make sure the actual implementations reflect this similarity.
Notably:
- while perf_release_mediated_pmu() has an underflow check;
mediated_pmu_unaccount_event() did not.
- while perf_create_mediated_pmu() has an inc_not_zero() path;
mediated_pmu_account_event() did not.
Also, the inc_not_zero() path can be outsite of
perf_mediated_pmu_mutex. The mutex must guard the 0->1 (of either
nr_include_guest_events or nr_mediated_pmu_vms) transition, but once a
counter is already non-zero, it can safely be incremented further.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20251208115156.GE3707891@noisy.programming.kicks-ass.net
---
kernel/events/core.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index dd842a4..e6a4b1e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6344,8 +6344,10 @@ static int mediated_pmu_account_event(struct perf_event *event)
if (!is_include_guest_event(event))
return 0;
- guard(mutex)(&perf_mediated_pmu_mutex);
+ if (atomic_inc_not_zero(&nr_include_guest_events))
+ return 0;
+ guard(mutex)(&perf_mediated_pmu_mutex);
if (atomic_read(&nr_mediated_pmu_vms))
return -EOPNOTSUPP;
@@ -6358,6 +6360,9 @@ static void mediated_pmu_unaccount_event(struct perf_event *event)
if (!is_include_guest_event(event))
return;
+ if (WARN_ON_ONCE(!atomic_read(&nr_include_guest_events)))
+ return;
+
atomic_dec(&nr_include_guest_events);
}
@@ -6373,10 +6378,10 @@ static void mediated_pmu_unaccount_event(struct perf_event *event)
*/
int perf_create_mediated_pmu(void)
{
- guard(mutex)(&perf_mediated_pmu_mutex);
if (atomic_inc_not_zero(&nr_mediated_pmu_vms))
return 0;
+ guard(mutex)(&perf_mediated_pmu_mutex);
if (atomic_read(&nr_include_guest_events))
return -EBUSY;
The following commit has been merged into the perf/core branch of tip:
Commit-ID: eff95e170275d9e80b968f335cd03d0ac250d2d1
Gitweb: https://git.kernel.org/tip/eff95e170275d9e80b968f335cd03d0ac250d2d1
Author: Kan Liang <kan.liang@linux.intel.com>
AuthorDate: Fri, 05 Dec 2025 16:16:40 -08:00
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 17 Dec 2025 13:31:04 +01:00
perf: Add APIs to create/release mediated guest vPMUs
Currently, exposing PMU capabilities to a KVM guest is done by emulating
guest PMCs via host perf events, i.e. by having KVM be "just" another user
of perf. As a result, the guest and host are effectively competing for
resources, and emulating guest accesses to vPMU resources requires
expensive actions (expensive relative to the native instruction). The
overhead and resource competition results in degraded guest performance
and ultimately very poor vPMU accuracy.
To address the issues with the perf-emulated vPMU, introduce a "mediated
vPMU", where the data plane (PMCs and enable/disable knobs) is exposed
directly to the guest, but the control plane (event selectors and access
to fixed counters) is managed by KVM (via MSR interceptions). To allow
host perf usage of the PMU to (partially) co-exist with KVM/guest usage
of the PMU, KVM and perf will coordinate to a world switch between host
perf context and guest vPMU context near VM-Enter/VM-Exit.
Add two exported APIs, perf_{create,release}_mediated_pmu(), to allow KVM
to create and release a mediated PMU instance (per VM). Because host perf
context will be deactivated while the guest is running, mediated PMU usage
will be mutually exclusive with perf analysis of the guest, i.e. perf
events that do NOT exclude the guest will not behave as expected.
To avoid silent failure of !exclude_guest perf events, disallow creating a
mediated PMU if there are active !exclude_guest events, and on the perf
side, disallowing creating new !exclude_guest perf events while there is
at least one active mediated PMU.
Exempt PMU resources that do not support mediated PMU usage, i.e. that are
outside the scope/view of KVM's vPMU and will not be swapped out while the
guest is running.
Guard mediated PMU with a new kconfig to help readers identify code paths
that are unique to mediated PMU support, and to allow for adding arch-
specific hooks without stubs. KVM x86 is expected to be the only KVM
architecture to support a mediated PMU in the near future (e.g. arm64 is
trending toward a partitioned PMU implementation), and KVM x86 will select
PERF_GUEST_MEDIATED_PMU unconditionally, i.e. won't need stubs.
Immediately select PERF_GUEST_MEDIATED_PMU when KVM x86 is enabled so that
all paths are compile tested. Full KVM support is on its way...
[sean: add kconfig and WARNing, rewrite changelog, swizzle patch ordering]
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Mingwei Zhang <mizhang@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Xudong Hao <xudong.hao@intel.com>
Link: https://patch.msgid.link/20251206001720.468579-5-seanjc@google.com
---
arch/x86/kvm/Kconfig | 1 +-
include/linux/perf_event.h | 6 +++-
init/Kconfig | 4 ++-
kernel/events/core.c | 82 +++++++++++++++++++++++++++++++++++++-
4 files changed, 93 insertions(+)
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 278f081..d916bd7 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -37,6 +37,7 @@ config KVM_X86
select SCHED_INFO
select PERF_EVENTS
select GUEST_PERF_EVENTS
+ select PERF_GUEST_MEDIATED_PMU
select HAVE_KVM_MSI
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_NO_POLL
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 9870d76..31929da 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -305,6 +305,7 @@ struct perf_event_pmu_context;
#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100
#define PERF_PMU_CAP_AUX_PAUSE 0x0200
#define PERF_PMU_CAP_AUX_PREFER_LARGE 0x0400
+#define PERF_PMU_CAP_MEDIATED_VPMU 0x0800
/**
* pmu::scope
@@ -1914,6 +1915,11 @@ extern int perf_event_account_interrupt(struct perf_event *event);
extern int perf_event_period(struct perf_event *event, u64 value);
extern u64 perf_event_pause(struct perf_event *event, bool reset);
+#ifdef CONFIG_PERF_GUEST_MEDIATED_PMU
+int perf_create_mediated_pmu(void);
+void perf_release_mediated_pmu(void);
+#endif
+
#else /* !CONFIG_PERF_EVENTS: */
static inline void *
diff --git a/init/Kconfig b/init/Kconfig
index fa79feb..6628ff2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -2061,6 +2061,10 @@ config GUEST_PERF_EVENTS
bool
depends on HAVE_PERF_EVENTS
+config PERF_GUEST_MEDIATED_PMU
+ bool
+ depends on GUEST_PERF_EVENTS
+
config PERF_USE_VMALLOC
bool
help
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6973483..5a2166b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5656,6 +5656,8 @@ static void __free_event(struct perf_event *event)
call_rcu(&event->rcu_head, free_event_rcu);
}
+static void mediated_pmu_unaccount_event(struct perf_event *event);
+
DEFINE_FREE(__free_event, struct perf_event *, if (_T) __free_event(_T))
/* vs perf_event_alloc() success */
@@ -5665,6 +5667,7 @@ static void _free_event(struct perf_event *event)
irq_work_sync(&event->pending_disable_irq);
unaccount_event(event);
+ mediated_pmu_unaccount_event(event);
if (event->rb) {
/*
@@ -6187,6 +6190,81 @@ u64 perf_event_pause(struct perf_event *event, bool reset)
}
EXPORT_SYMBOL_GPL(perf_event_pause);
+#ifdef CONFIG_PERF_GUEST_MEDIATED_PMU
+static atomic_t nr_include_guest_events __read_mostly;
+
+static atomic_t nr_mediated_pmu_vms __read_mostly;
+static DEFINE_MUTEX(perf_mediated_pmu_mutex);
+
+/* !exclude_guest event of PMU with PERF_PMU_CAP_MEDIATED_VPMU */
+static inline bool is_include_guest_event(struct perf_event *event)
+{
+ if ((event->pmu->capabilities & PERF_PMU_CAP_MEDIATED_VPMU) &&
+ !event->attr.exclude_guest)
+ return true;
+
+ return false;
+}
+
+static int mediated_pmu_account_event(struct perf_event *event)
+{
+ if (!is_include_guest_event(event))
+ return 0;
+
+ guard(mutex)(&perf_mediated_pmu_mutex);
+
+ if (atomic_read(&nr_mediated_pmu_vms))
+ return -EOPNOTSUPP;
+
+ atomic_inc(&nr_include_guest_events);
+ return 0;
+}
+
+static void mediated_pmu_unaccount_event(struct perf_event *event)
+{
+ if (!is_include_guest_event(event))
+ return;
+
+ atomic_dec(&nr_include_guest_events);
+}
+
+/*
+ * Currently invoked at VM creation to
+ * - Check whether there are existing !exclude_guest events of PMU with
+ * PERF_PMU_CAP_MEDIATED_VPMU
+ * - Set nr_mediated_pmu_vms to prevent !exclude_guest event creation on
+ * PMUs with PERF_PMU_CAP_MEDIATED_VPMU
+ *
+ * No impact for the PMU without PERF_PMU_CAP_MEDIATED_VPMU. The perf
+ * still owns all the PMU resources.
+ */
+int perf_create_mediated_pmu(void)
+{
+ guard(mutex)(&perf_mediated_pmu_mutex);
+ if (atomic_inc_not_zero(&nr_mediated_pmu_vms))
+ return 0;
+
+ if (atomic_read(&nr_include_guest_events))
+ return -EBUSY;
+
+ atomic_inc(&nr_mediated_pmu_vms);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(perf_create_mediated_pmu);
+
+void perf_release_mediated_pmu(void)
+{
+ if (WARN_ON_ONCE(!atomic_read(&nr_mediated_pmu_vms)))
+ return;
+
+ atomic_dec(&nr_mediated_pmu_vms);
+}
+EXPORT_SYMBOL_GPL(perf_release_mediated_pmu);
+#else
+static int mediated_pmu_account_event(struct perf_event *event) { return 0; }
+static void mediated_pmu_unaccount_event(struct perf_event *event) {}
+#endif
+
/*
* Holding the top-level event's child_mutex means that any
* descendant process that has inherited this event will block
@@ -13147,6 +13225,10 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
if (err)
return ERR_PTR(err);
+ err = mediated_pmu_account_event(event);
+ if (err)
+ return ERR_PTR(err);
+
/* symmetric to unaccount_event() in _free_event() */
account_event(event);
© 2016 - 2026 Red Hat, Inc.