IBS on future hardware adds the ability to filter IBS events by examining
RIP bit 63. Because Linux kernel addresses always have bit 63 set while
user-space addresses never do, this capability can be used as a privilege
filter.
So far, IBS supports privilege filtering in software (swfilt=1), where
samples are dropped in the NMI handler. The RIP bit63 hardware filter
enables IBS to be usable by unprivileged users without passing swfilt
flag. So, swfilt flag will silently be ignored when the hardware
filtering capability is present.
Example (non-root user):
$ perf record -e ibs_op//u -- <workload>
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
---
arch/x86/events/amd/ibs.c | 46 ++++++++++++++++++++++++++++++++-------
1 file changed, 38 insertions(+), 8 deletions(-)
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index cb3ae4e4744c..13ecc8d92b23 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -321,11 +321,6 @@ static int perf_ibs_init(struct perf_event *event)
event->attr.exclude_idle)
return -EINVAL;
- if (!(event->attr.config2 & IBS_SW_FILTER_MASK) &&
- (event->attr.exclude_kernel || event->attr.exclude_user ||
- event->attr.exclude_hv))
- return -EINVAL;
-
ret = validate_group(event);
if (ret)
return ret;
@@ -338,6 +333,32 @@ static int perf_ibs_init(struct perf_event *event)
hwc->extra_reg.reg = perf_ibs->msr2;
}
+ if (ibs_caps & IBS_CAPS_BIT63_FILTER) {
+ if (perf_ibs == &perf_ibs_fetch) {
+ if (event->attr.exclude_kernel) {
+ hwc->extra_reg.config |= IBS_FETCH_2_EXCL_RIP_63_EQ_1;
+ hwc->extra_reg.reg = perf_ibs->msr2;
+ }
+ if (event->attr.exclude_user) {
+ hwc->extra_reg.config |= IBS_FETCH_2_EXCL_RIP_63_EQ_0;
+ hwc->extra_reg.reg = perf_ibs->msr2;
+ }
+ } else {
+ if (event->attr.exclude_kernel) {
+ hwc->extra_reg.config |= IBS_OP_2_EXCL_RIP_63_EQ_1;
+ hwc->extra_reg.reg = perf_ibs->msr2;
+ }
+ if (event->attr.exclude_user) {
+ hwc->extra_reg.config |= IBS_OP_2_EXCL_RIP_63_EQ_0;
+ hwc->extra_reg.reg = perf_ibs->msr2;
+ }
+ }
+ } else if (!(event->attr.config2 & IBS_SW_FILTER_MASK) &&
+ (event->attr.exclude_kernel || event->attr.exclude_user ||
+ event->attr.exclude_hv)) {
+ return -EINVAL;
+ }
+
if (hwc->sample_period) {
if (config & perf_ibs->cnt_mask)
/* raw max_cnt may not be set */
@@ -1280,7 +1301,7 @@ static bool perf_ibs_is_kernel_br_target(struct perf_event *event,
op_data.op_brn_ret && kernel_ip(br_target));
}
-static bool perf_ibs_swfilt_discard(struct perf_ibs *perf_ibs, struct perf_event *event,
+static bool perf_ibs_discard_sample(struct perf_ibs *perf_ibs, struct perf_event *event,
struct pt_regs *regs, struct perf_ibs_data *ibs_data,
int br_target_idx)
{
@@ -1435,8 +1456,9 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
regs.flags |= PERF_EFLAGS_EXACT;
}
- if ((event->attr.config2 & IBS_SW_FILTER_MASK) &&
- perf_ibs_swfilt_discard(perf_ibs, event, ®s, &ibs_data, br_target_idx)) {
+ if (((ibs_caps & IBS_CAPS_BIT63_FILTER) ||
+ (event->attr.config2 & IBS_SW_FILTER_MASK)) &&
+ perf_ibs_discard_sample(perf_ibs, event, ®s, &ibs_data, br_target_idx)) {
throttle = perf_event_account_interrupt(event);
goto out;
}
@@ -1899,6 +1921,14 @@ static __init int amd_ibs_init(void)
perf_ibs_pm_init();
+#ifdef CONFIG_X86_32
+ /*
+ * IBS_CAPS_BIT63_FILTER is used for exclude_kernel/user filtering,
+ * which obviously won't work for 32 bit kernel.
+ */
+ caps &= ~IBS_CAPS_BIT63_FILTER;
+#endif
+
ibs_caps = caps;
/* make ibs_caps visible to other cpus: */
smp_mb();
--
2.43.0
On Sun, Feb 15, 2026 at 8:26 PM Ravi Bangoria <ravi.bangoria@amd.com> wrote:
>
> IBS on future hardware adds the ability to filter IBS events by examining
> RIP bit 63. Because Linux kernel addresses always have bit 63 set while
> user-space addresses never do, this capability can be used as a privilege
> filter.
Since x86's top-byte-ignore/linear-address-masking leaves bit 63 could
this break in the future if the kernel later ignores all bits,
including bit 63, and user space wants to use bit 63 of the pointer
for metadata? Does the bit 63 assumption hold for guest operating
systems?
Thanks,
Ian
> So far, IBS supports privilege filtering in software (swfilt=1), where
> samples are dropped in the NMI handler. The RIP bit63 hardware filter
> enables IBS to be usable by unprivileged users without passing swfilt
> flag. So, swfilt flag will silently be ignored when the hardware
> filtering capability is present.
>
> Example (non-root user):
> $ perf record -e ibs_op//u -- <workload>
>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
> ---
> arch/x86/events/amd/ibs.c | 46 ++++++++++++++++++++++++++++++++-------
> 1 file changed, 38 insertions(+), 8 deletions(-)
>
> diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
> index cb3ae4e4744c..13ecc8d92b23 100644
> --- a/arch/x86/events/amd/ibs.c
> +++ b/arch/x86/events/amd/ibs.c
> @@ -321,11 +321,6 @@ static int perf_ibs_init(struct perf_event *event)
> event->attr.exclude_idle)
> return -EINVAL;
>
> - if (!(event->attr.config2 & IBS_SW_FILTER_MASK) &&
> - (event->attr.exclude_kernel || event->attr.exclude_user ||
> - event->attr.exclude_hv))
> - return -EINVAL;
> -
> ret = validate_group(event);
> if (ret)
> return ret;
> @@ -338,6 +333,32 @@ static int perf_ibs_init(struct perf_event *event)
> hwc->extra_reg.reg = perf_ibs->msr2;
> }
>
> + if (ibs_caps & IBS_CAPS_BIT63_FILTER) {
> + if (perf_ibs == &perf_ibs_fetch) {
> + if (event->attr.exclude_kernel) {
> + hwc->extra_reg.config |= IBS_FETCH_2_EXCL_RIP_63_EQ_1;
> + hwc->extra_reg.reg = perf_ibs->msr2;
> + }
> + if (event->attr.exclude_user) {
> + hwc->extra_reg.config |= IBS_FETCH_2_EXCL_RIP_63_EQ_0;
> + hwc->extra_reg.reg = perf_ibs->msr2;
> + }
> + } else {
> + if (event->attr.exclude_kernel) {
> + hwc->extra_reg.config |= IBS_OP_2_EXCL_RIP_63_EQ_1;
> + hwc->extra_reg.reg = perf_ibs->msr2;
> + }
> + if (event->attr.exclude_user) {
> + hwc->extra_reg.config |= IBS_OP_2_EXCL_RIP_63_EQ_0;
> + hwc->extra_reg.reg = perf_ibs->msr2;
> + }
> + }
> + } else if (!(event->attr.config2 & IBS_SW_FILTER_MASK) &&
> + (event->attr.exclude_kernel || event->attr.exclude_user ||
> + event->attr.exclude_hv)) {
> + return -EINVAL;
> + }
> +
> if (hwc->sample_period) {
> if (config & perf_ibs->cnt_mask)
> /* raw max_cnt may not be set */
> @@ -1280,7 +1301,7 @@ static bool perf_ibs_is_kernel_br_target(struct perf_event *event,
> op_data.op_brn_ret && kernel_ip(br_target));
> }
>
> -static bool perf_ibs_swfilt_discard(struct perf_ibs *perf_ibs, struct perf_event *event,
> +static bool perf_ibs_discard_sample(struct perf_ibs *perf_ibs, struct perf_event *event,
> struct pt_regs *regs, struct perf_ibs_data *ibs_data,
> int br_target_idx)
> {
> @@ -1435,8 +1456,9 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
> regs.flags |= PERF_EFLAGS_EXACT;
> }
>
> - if ((event->attr.config2 & IBS_SW_FILTER_MASK) &&
> - perf_ibs_swfilt_discard(perf_ibs, event, ®s, &ibs_data, br_target_idx)) {
> + if (((ibs_caps & IBS_CAPS_BIT63_FILTER) ||
> + (event->attr.config2 & IBS_SW_FILTER_MASK)) &&
> + perf_ibs_discard_sample(perf_ibs, event, ®s, &ibs_data, br_target_idx)) {
> throttle = perf_event_account_interrupt(event);
> goto out;
> }
> @@ -1899,6 +1921,14 @@ static __init int amd_ibs_init(void)
>
> perf_ibs_pm_init();
>
> +#ifdef CONFIG_X86_32
> + /*
> + * IBS_CAPS_BIT63_FILTER is used for exclude_kernel/user filtering,
> + * which obviously won't work for 32 bit kernel.
> + */
> + caps &= ~IBS_CAPS_BIT63_FILTER;
> +#endif
> +
> ibs_caps = caps;
> /* make ibs_caps visible to other cpus: */
> smp_mb();
> --
> 2.43.0
>
Hi Ian, >> IBS on future hardware adds the ability to filter IBS events by examining >> RIP bit 63. Because Linux kernel addresses always have bit 63 set while >> user-space addresses never do, this capability can be used as a privilege >> filter. > > Since x86's top-byte-ignore/linear-address-masking leaves bit 63 could > this break in the future if the kernel later ignores all bits, > including bit 63, and user space wants to use bit 63 of the pointer > for metadata? The AMD equivalent feature is called Upper Address Ignore (UAI). o Identifying whether an address is in kernel or user space by examining bit 63 is so fundamental in Linux that the UAI design was revisited in UAIv2 to restore bit 63 as canonical: https://lore.kernel.org/lkml/6a5076ad-405e-4e5e-af55-fe2a6b01467d@www.fastmail.com o UAI applies only to data addresses; instruction addresses must remain canonical. So I assume this should not be an issue, at least for now. > Does the bit 63 assumption hold for guest operating systems? Yes, this seems to be an issue, even with current swfilt approach. Let me inspect the code and get back. Thanks for the review, Ravi
Hi Ian,
>> Does the bit 63 assumption hold for guest operating systems?
>
> Yes, this seems to be an issue, even with current swfilt approach. Let
> me inspect the code and get back.
All mainstream 64 bit OSes use the bit-63 set for kernel addresses and zero
for userspace addresses. This norm does not apply to 32 bit guests, but
those are rare, and profiling them with IBS would be even rarer. So, I'll
document this limitation in the perf-amd-ibs man page.
While looking at this, I found some issues in IBS. Below patch fixes it:
---
From deb6cdcbc60778b57a6eef60b2b7bd1b8e3cea74 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@amd.com>
Date: Fri, 6 Mar 2026 04:52:00 +0000
Subject: [PATCH] perf/amd/ibs: Improve guest profiling
IBS captures the RIP but not its privilege level. Since the NMI is
delivered with delay, CPL can change between the IBS tag and NMI
delivery. Add a check to catch and discard invalid guest samples
using CPL stored in vCPU save area. This will work when there is
user/kernel CPL change in between IBS tag and NMI delivery within
the guest boundary. But it won't work when there is a guest entry
or exit in between IBS tag and NMI delivery.
When profiling a guest and the IBS RIP is valid, assign the sample
IP from the IBS-captured RIP and set PERF_SAMPLE_IP in sample_flags
so that perf_prepare_sample() do not overwrite the RIP with
perf_guest_get_ip() from the vCPU save area. This keeps the perf
sample IP consistent with IBS raw data, data_src, weight, phy_addr
etc. The privilege level in the perf "misc" field can now go out
of sync, as it is taken from the vCPU save area.
Reported-by: Ian Rogers <irogers@google.com>
Closes: https://lore.kernel.org/r/CAP-5=fV_cJskvLRZhQQXMGAcPUb_Rg_b30PDJNXzxL49JK4B5g@mail.gmail.com
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
---
arch/x86/events/amd/ibs.c | 37 +++++++++++++++++++++++++++++++++++++
1 file changed, 37 insertions(+)
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index eeb607b84dda..70408b0b1597 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -1415,6 +1415,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
unsigned int msr;
u64 *buf, *config, period, new_config = 0;
int br_target_idx = -1;
+ unsigned int guest_state;
if (!test_bit(IBS_STARTED, pcpu->state)) {
fail:
@@ -1526,6 +1527,42 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
regs.flags |= PERF_EFLAGS_EXACT;
}
+ guest_state = perf_guest_state();
+ if (!event->attr.exclude_guest && guest_state & PERF_GUEST_ACTIVE) {
+ /*
+ * IBS captures the RIP but not its privilege level. Since
+ * NMI arrives delayed, CPL might change in between IBS tag
+ * and the NMI delivery. Below checks can identify and filter
+ * out invalid samples when the CPL changes are within the
+ * guest boundary. However, these checks fail to handle cases
+ * where the CPU performs a guest entry or exit in between
+ * the IBS tag and the NMI delivery.
+ */
+ if (event->attr.exclude_kernel && !(guest_state & PERF_GUEST_USER)) {
+ throttle = perf_event_account_interrupt(event);
+ goto out;
+ }
+ if (event->attr.exclude_user && guest_state & PERF_GUEST_USER) {
+ throttle = perf_event_account_interrupt(event);
+ goto out;
+ }
+
+ /*
+ * Assign the IBS RIP value directly in the perf sample here
+ * to prevent perf_prepare_sample() from retrieving it from
+ * the vCPU save-area. With this, rest of the perf sample
+ * fields (raw data, data_src, weight, phy_addr, etc.) will
+ * remain in sync with sample IP. However, privilege level
+ * captured as part of perf sample "misc" field could now
+ * go out of sync since privilege level is fetched from the
+ * vCPU save area.
+ */
+ if (regs.flags & PERF_EFLAGS_EXACT) {
+ data.ip = regs.ip;
+ data.sample_flags |= PERF_SAMPLE_IP;
+ }
+ }
+
if (((ibs_caps & IBS_CAPS_BIT63_FILTER) ||
(event->attr.config2 & IBS_SW_FILTER_MASK)) &&
perf_ibs_discard_sample(perf_ibs, event, ®s, &ibs_data, br_target_idx)) {
--
2.43.0
On Sun, Mar 8, 2026 at 7:58 PM Ravi Bangoria <ravi.bangoria@amd.com> wrote:
>
> Hi Ian,
>
> >> Does the bit 63 assumption hold for guest operating systems?
> >
> > Yes, this seems to be an issue, even with current swfilt approach. Let
> > me inspect the code and get back.
>
> All mainstream 64 bit OSes use the bit-63 set for kernel addresses and zero
> for userspace addresses. This norm does not apply to 32 bit guests, but
> those are rare, and profiling them with IBS would be even rarer. So, I'll
> document this limitation in the perf-amd-ibs man page.
>
> While looking at this, I found some issues in IBS. Below patch fixes it:
>
> ---
>
> From deb6cdcbc60778b57a6eef60b2b7bd1b8e3cea74 Mon Sep 17 00:00:00 2001
> From: Ravi Bangoria <ravi.bangoria@amd.com>
> Date: Fri, 6 Mar 2026 04:52:00 +0000
> Subject: [PATCH] perf/amd/ibs: Improve guest profiling
>
> IBS captures the RIP but not its privilege level. Since the NMI is
> delivered with delay, CPL can change between the IBS tag and NMI
> delivery. Add a check to catch and discard invalid guest samples
> using CPL stored in vCPU save area. This will work when there is
> user/kernel CPL change in between IBS tag and NMI delivery within
> the guest boundary. But it won't work when there is a guest entry
> or exit in between IBS tag and NMI delivery.
>
> When profiling a guest and the IBS RIP is valid, assign the sample
> IP from the IBS-captured RIP and set PERF_SAMPLE_IP in sample_flags
> so that perf_prepare_sample() do not overwrite the RIP with
> perf_guest_get_ip() from the vCPU save area. This keeps the perf
> sample IP consistent with IBS raw data, data_src, weight, phy_addr
> etc. The privilege level in the perf "misc" field can now go out
> of sync, as it is taken from the vCPU save area.
>
> Reported-by: Ian Rogers <irogers@google.com>
> Closes: https://lore.kernel.org/r/CAP-5=fV_cJskvLRZhQQXMGAcPUb_Rg_b30PDJNXzxL49JK4B5g@mail.gmail.com
> Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Thanks Ravi!
Reviewed-by: Ian Rogers <irogers@google.com>
Thanks,
Ian
> ---
> arch/x86/events/amd/ibs.c | 37 +++++++++++++++++++++++++++++++++++++
> 1 file changed, 37 insertions(+)
>
> diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
> index eeb607b84dda..70408b0b1597 100644
> --- a/arch/x86/events/amd/ibs.c
> +++ b/arch/x86/events/amd/ibs.c
> @@ -1415,6 +1415,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
> unsigned int msr;
> u64 *buf, *config, period, new_config = 0;
> int br_target_idx = -1;
> + unsigned int guest_state;
>
> if (!test_bit(IBS_STARTED, pcpu->state)) {
> fail:
> @@ -1526,6 +1527,42 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
> regs.flags |= PERF_EFLAGS_EXACT;
> }
>
> + guest_state = perf_guest_state();
> + if (!event->attr.exclude_guest && guest_state & PERF_GUEST_ACTIVE) {
> + /*
> + * IBS captures the RIP but not its privilege level. Since
> + * NMI arrives delayed, CPL might change in between IBS tag
> + * and the NMI delivery. Below checks can identify and filter
> + * out invalid samples when the CPL changes are within the
> + * guest boundary. However, these checks fail to handle cases
> + * where the CPU performs a guest entry or exit in between
> + * the IBS tag and the NMI delivery.
> + */
> + if (event->attr.exclude_kernel && !(guest_state & PERF_GUEST_USER)) {
> + throttle = perf_event_account_interrupt(event);
> + goto out;
> + }
> + if (event->attr.exclude_user && guest_state & PERF_GUEST_USER) {
> + throttle = perf_event_account_interrupt(event);
> + goto out;
> + }
> +
> + /*
> + * Assign the IBS RIP value directly in the perf sample here
> + * to prevent perf_prepare_sample() from retrieving it from
> + * the vCPU save-area. With this, rest of the perf sample
> + * fields (raw data, data_src, weight, phy_addr, etc.) will
> + * remain in sync with sample IP. However, privilege level
> + * captured as part of perf sample "misc" field could now
> + * go out of sync since privilege level is fetched from the
> + * vCPU save area.
> + */
> + if (regs.flags & PERF_EFLAGS_EXACT) {
> + data.ip = regs.ip;
> + data.sample_flags |= PERF_SAMPLE_IP;
> + }
> + }
> +
> if (((ibs_caps & IBS_CAPS_BIT63_FILTER) ||
> (event->attr.config2 & IBS_SW_FILTER_MASK)) &&
> perf_ibs_discard_sample(perf_ibs, event, ®s, &ibs_data, br_target_idx)) {
> --
> 2.43.0
>
>
On Thu, Feb 26, 2026 at 1:20 AM Ravi Bangoria <ravi.bangoria@amd.com> wrote: > > Hi Ian, > > >> IBS on future hardware adds the ability to filter IBS events by examining > >> RIP bit 63. Because Linux kernel addresses always have bit 63 set while > >> user-space addresses never do, this capability can be used as a privilege > >> filter. > > > > Since x86's top-byte-ignore/linear-address-masking leaves bit 63 could > > this break in the future if the kernel later ignores all bits, > > including bit 63, and user space wants to use bit 63 of the pointer > > for metadata? > > The AMD equivalent feature is called Upper Address Ignore (UAI). > > o Identifying whether an address is in kernel or user space by examining > bit 63 is so fundamental in Linux that the UAI design was revisited in > UAIv2 to restore bit 63 as canonical: > https://lore.kernel.org/lkml/6a5076ad-405e-4e5e-af55-fe2a6b01467d@www.fastmail.com Fwiw, throwing in my 2 cents. IIRC bit 63 wasn't canonical in the original ARM64 top byte ignore work, it seemed like a legacy thing added to keep older x86 drivers working. Imo, this feels crufty like passing the number of float registers used for varargs in '%al' and the lack of non-GPR callee saves on x86. Given APX means we'll be recompiling all x86 binaries for the extra registers, it would be nice to have made these issues history, given runtimes an extra bit (2x the encoding space for metadata), etc. Thanks, Ian > o UAI applies only to data addresses; instruction addresses must remain > canonical. > > So I assume this should not be an issue, at least for now. > > > Does the bit 63 assumption hold for guest operating systems? > > Yes, this seems to be an issue, even with current swfilt approach. Let > me inspect the code and get back. > > Thanks for the review, > Ravi
The following commit has been merged into the perf/core branch of tip:
Commit-ID: 8c63c4af92ac5f041ce437c1f2a31ce3ef03c585
Gitweb: https://git.kernel.org/tip/8c63c4af92ac5f041ce437c1f2a31ce3ef03c585
Author: Ravi Bangoria <ravi.bangoria@amd.com>
AuthorDate: Mon, 16 Feb 2026 04:25:28
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Sat, 28 Feb 2026 12:03:29 +01:00
perf/amd/ibs: Enable RIP bit63 hardware filtering
IBS on future hardware adds the ability to filter IBS events by examining
RIP bit 63. Because Linux kernel addresses always have bit 63 set while
user-space addresses never do, this capability can be used as a privilege
filter.
So far, IBS supports privilege filtering in software (swfilt=1), where
samples are dropped in the NMI handler. The RIP bit63 hardware filter
enables IBS to be usable by unprivileged users without passing swfilt
flag. So, swfilt flag will silently be ignored when the hardware
filtering capability is present.
Example (non-root user):
$ perf record -e ibs_op//u -- <workload>
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260216042530.1546-6-ravi.bangoria@amd.com
---
arch/x86/events/amd/ibs.c | 46 +++++++++++++++++++++++++++++++-------
1 file changed, 38 insertions(+), 8 deletions(-)
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index cb3ae4e..13ecc8d 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -321,11 +321,6 @@ static int perf_ibs_init(struct perf_event *event)
event->attr.exclude_idle)
return -EINVAL;
- if (!(event->attr.config2 & IBS_SW_FILTER_MASK) &&
- (event->attr.exclude_kernel || event->attr.exclude_user ||
- event->attr.exclude_hv))
- return -EINVAL;
-
ret = validate_group(event);
if (ret)
return ret;
@@ -338,6 +333,32 @@ static int perf_ibs_init(struct perf_event *event)
hwc->extra_reg.reg = perf_ibs->msr2;
}
+ if (ibs_caps & IBS_CAPS_BIT63_FILTER) {
+ if (perf_ibs == &perf_ibs_fetch) {
+ if (event->attr.exclude_kernel) {
+ hwc->extra_reg.config |= IBS_FETCH_2_EXCL_RIP_63_EQ_1;
+ hwc->extra_reg.reg = perf_ibs->msr2;
+ }
+ if (event->attr.exclude_user) {
+ hwc->extra_reg.config |= IBS_FETCH_2_EXCL_RIP_63_EQ_0;
+ hwc->extra_reg.reg = perf_ibs->msr2;
+ }
+ } else {
+ if (event->attr.exclude_kernel) {
+ hwc->extra_reg.config |= IBS_OP_2_EXCL_RIP_63_EQ_1;
+ hwc->extra_reg.reg = perf_ibs->msr2;
+ }
+ if (event->attr.exclude_user) {
+ hwc->extra_reg.config |= IBS_OP_2_EXCL_RIP_63_EQ_0;
+ hwc->extra_reg.reg = perf_ibs->msr2;
+ }
+ }
+ } else if (!(event->attr.config2 & IBS_SW_FILTER_MASK) &&
+ (event->attr.exclude_kernel || event->attr.exclude_user ||
+ event->attr.exclude_hv)) {
+ return -EINVAL;
+ }
+
if (hwc->sample_period) {
if (config & perf_ibs->cnt_mask)
/* raw max_cnt may not be set */
@@ -1280,7 +1301,7 @@ static bool perf_ibs_is_kernel_br_target(struct perf_event *event,
op_data.op_brn_ret && kernel_ip(br_target));
}
-static bool perf_ibs_swfilt_discard(struct perf_ibs *perf_ibs, struct perf_event *event,
+static bool perf_ibs_discard_sample(struct perf_ibs *perf_ibs, struct perf_event *event,
struct pt_regs *regs, struct perf_ibs_data *ibs_data,
int br_target_idx)
{
@@ -1435,8 +1456,9 @@ fail:
regs.flags |= PERF_EFLAGS_EXACT;
}
- if ((event->attr.config2 & IBS_SW_FILTER_MASK) &&
- perf_ibs_swfilt_discard(perf_ibs, event, ®s, &ibs_data, br_target_idx)) {
+ if (((ibs_caps & IBS_CAPS_BIT63_FILTER) ||
+ (event->attr.config2 & IBS_SW_FILTER_MASK)) &&
+ perf_ibs_discard_sample(perf_ibs, event, ®s, &ibs_data, br_target_idx)) {
throttle = perf_event_account_interrupt(event);
goto out;
}
@@ -1899,6 +1921,14 @@ static __init int amd_ibs_init(void)
perf_ibs_pm_init();
+#ifdef CONFIG_X86_32
+ /*
+ * IBS_CAPS_BIT63_FILTER is used for exclude_kernel/user filtering,
+ * which obviously won't work for 32 bit kernel.
+ */
+ caps &= ~IBS_CAPS_BIT63_FILTER;
+#endif
+
ibs_caps = caps;
/* make ibs_caps visible to other cpus: */
smp_mb();
The following commit has been merged into the perf/core branch of tip:
Commit-ID: 62ed6df2c8078725ba5bfb6ca2d06acc8d15bd36
Gitweb: https://git.kernel.org/tip/62ed6df2c8078725ba5bfb6ca2d06acc8d15bd36
Author: Ravi Bangoria <ravi.bangoria@amd.com>
AuthorDate: Mon, 16 Feb 2026 04:25:28
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Fri, 27 Feb 2026 16:40:25 +01:00
perf/amd/ibs: Enable RIP bit63 hardware filtering
IBS on future hardware adds the ability to filter IBS events by examining
RIP bit 63. Because Linux kernel addresses always have bit 63 set while
user-space addresses never do, this capability can be used as a privilege
filter.
So far, IBS supports privilege filtering in software (swfilt=1), where
samples are dropped in the NMI handler. The RIP bit63 hardware filter
enables IBS to be usable by unprivileged users without passing swfilt
flag. So, swfilt flag will silently be ignored when the hardware
filtering capability is present.
Example (non-root user):
$ perf record -e ibs_op//u -- <workload>
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260216042530.1546-6-ravi.bangoria@amd.com
---
arch/x86/events/amd/ibs.c | 46 +++++++++++++++++++++++++++++++-------
1 file changed, 38 insertions(+), 8 deletions(-)
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index cb3ae4e..13ecc8d 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -321,11 +321,6 @@ static int perf_ibs_init(struct perf_event *event)
event->attr.exclude_idle)
return -EINVAL;
- if (!(event->attr.config2 & IBS_SW_FILTER_MASK) &&
- (event->attr.exclude_kernel || event->attr.exclude_user ||
- event->attr.exclude_hv))
- return -EINVAL;
-
ret = validate_group(event);
if (ret)
return ret;
@@ -338,6 +333,32 @@ static int perf_ibs_init(struct perf_event *event)
hwc->extra_reg.reg = perf_ibs->msr2;
}
+ if (ibs_caps & IBS_CAPS_BIT63_FILTER) {
+ if (perf_ibs == &perf_ibs_fetch) {
+ if (event->attr.exclude_kernel) {
+ hwc->extra_reg.config |= IBS_FETCH_2_EXCL_RIP_63_EQ_1;
+ hwc->extra_reg.reg = perf_ibs->msr2;
+ }
+ if (event->attr.exclude_user) {
+ hwc->extra_reg.config |= IBS_FETCH_2_EXCL_RIP_63_EQ_0;
+ hwc->extra_reg.reg = perf_ibs->msr2;
+ }
+ } else {
+ if (event->attr.exclude_kernel) {
+ hwc->extra_reg.config |= IBS_OP_2_EXCL_RIP_63_EQ_1;
+ hwc->extra_reg.reg = perf_ibs->msr2;
+ }
+ if (event->attr.exclude_user) {
+ hwc->extra_reg.config |= IBS_OP_2_EXCL_RIP_63_EQ_0;
+ hwc->extra_reg.reg = perf_ibs->msr2;
+ }
+ }
+ } else if (!(event->attr.config2 & IBS_SW_FILTER_MASK) &&
+ (event->attr.exclude_kernel || event->attr.exclude_user ||
+ event->attr.exclude_hv)) {
+ return -EINVAL;
+ }
+
if (hwc->sample_period) {
if (config & perf_ibs->cnt_mask)
/* raw max_cnt may not be set */
@@ -1280,7 +1301,7 @@ static bool perf_ibs_is_kernel_br_target(struct perf_event *event,
op_data.op_brn_ret && kernel_ip(br_target));
}
-static bool perf_ibs_swfilt_discard(struct perf_ibs *perf_ibs, struct perf_event *event,
+static bool perf_ibs_discard_sample(struct perf_ibs *perf_ibs, struct perf_event *event,
struct pt_regs *regs, struct perf_ibs_data *ibs_data,
int br_target_idx)
{
@@ -1435,8 +1456,9 @@ fail:
regs.flags |= PERF_EFLAGS_EXACT;
}
- if ((event->attr.config2 & IBS_SW_FILTER_MASK) &&
- perf_ibs_swfilt_discard(perf_ibs, event, ®s, &ibs_data, br_target_idx)) {
+ if (((ibs_caps & IBS_CAPS_BIT63_FILTER) ||
+ (event->attr.config2 & IBS_SW_FILTER_MASK)) &&
+ perf_ibs_discard_sample(perf_ibs, event, ®s, &ibs_data, br_target_idx)) {
throttle = perf_event_account_interrupt(event);
goto out;
}
@@ -1899,6 +1921,14 @@ static __init int amd_ibs_init(void)
perf_ibs_pm_init();
+#ifdef CONFIG_X86_32
+ /*
+ * IBS_CAPS_BIT63_FILTER is used for exclude_kernel/user filtering,
+ * which obviously won't work for 32 bit kernel.
+ */
+ caps &= ~IBS_CAPS_BIT63_FILTER;
+#endif
+
ibs_caps = caps;
/* make ibs_caps visible to other cpus: */
smp_mb();
© 2016 - 2026 Red Hat, Inc.