The existing IBS_{FETCH|OP}_CTL MSRs combine control and status bits
which leads to RMW race between HW and SW:
HW SW
------------------------ ------------------------------
config = rdmsr(IBS_OP_CTL);
config &= ~EN;
Set IBS_OP_CTL[Val] to 1
trigger NMI
wrmsr(IBS_OP_CTL, config);
// Val is accidentally cleared
Future hardware adds a control-only MSR, IBS_{FETCH|OP}_CTL2, which
provides a second-level "disable" bit (Dis). IBS is now:
Enabled: IBS_{FETCH|OP}_CTL[En] = 1 && IBS_{FETCH|OP}_CTL2[Dis] = 0
Disabled: IBS_{FETCH|OP}_CTL[En] = 0 || IBS_{FETCH|OP}_CTL2[Dis] = 1
The separate "Dis" bit lets software disable IBS without touching any
status fields, eliminating the hardware/software race.
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
---
arch/x86/events/amd/ibs.c | 45 +++++++++++++++++++++++++++++++++++++++
1 file changed, 45 insertions(+)
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 02e7bffe1208..d8216048be84 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -86,9 +86,11 @@ struct cpu_perf_ibs {
struct perf_ibs {
struct pmu pmu;
unsigned int msr;
+ unsigned int msr2;
u64 config_mask;
u64 cnt_mask;
u64 enable_mask;
+ u64 disable_mask;
u64 valid_mask;
u16 min_period;
u64 max_period;
@@ -292,6 +294,8 @@ static int perf_ibs_init(struct perf_event *event)
return -ENOENT;
config = event->attr.config;
+ hwc->extra_reg.config = 0;
+ hwc->extra_reg.reg = 0;
if (event->pmu != &perf_ibs->pmu)
return -ENOENT;
@@ -316,6 +320,11 @@ static int perf_ibs_init(struct perf_event *event)
if (ret)
return ret;
+ if (ibs_caps & IBS_CAPS_DIS) {
+ hwc->extra_reg.config &= ~perf_ibs->disable_mask;
+ hwc->extra_reg.reg = perf_ibs->msr2;
+ }
+
if (hwc->sample_period) {
if (config & perf_ibs->cnt_mask)
/* raw max_cnt may not be set */
@@ -445,6 +454,9 @@ static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
wrmsrq(hwc->config_base, tmp & ~perf_ibs->enable_mask);
wrmsrq(hwc->config_base, tmp | perf_ibs->enable_mask);
+
+ if (hwc->extra_reg.reg)
+ wrmsrq(hwc->extra_reg.reg, hwc->extra_reg.config);
}
/*
@@ -457,6 +469,11 @@ static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
struct hw_perf_event *hwc, u64 config)
{
+ if (ibs_caps & IBS_CAPS_DIS) {
+ wrmsrq(hwc->extra_reg.reg, perf_ibs->disable_mask);
+ return;
+ }
+
config &= ~perf_ibs->cnt_mask;
if (boot_cpu_data.x86 == 0x10)
wrmsrq(hwc->config_base, config);
@@ -809,6 +826,7 @@ static struct perf_ibs perf_ibs_fetch = {
.check_period = perf_ibs_check_period,
},
.msr = MSR_AMD64_IBSFETCHCTL,
+ .msr2 = MSR_AMD64_IBSFETCHCTL2,
.config_mask = IBS_FETCH_MAX_CNT | IBS_FETCH_RAND_EN,
.cnt_mask = IBS_FETCH_MAX_CNT,
.enable_mask = IBS_FETCH_ENABLE,
@@ -834,6 +852,7 @@ static struct perf_ibs perf_ibs_op = {
.check_period = perf_ibs_check_period,
},
.msr = MSR_AMD64_IBSOPCTL,
+ .msr2 = MSR_AMD64_IBSOPCTL2,
.config_mask = IBS_OP_MAX_CNT,
.cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT |
IBS_OP_CUR_CNT_RAND,
@@ -1389,6 +1408,9 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
out:
if (!throttle) {
+ if (ibs_caps & IBS_CAPS_DIS)
+ wrmsrq(hwc->extra_reg.reg, perf_ibs->disable_mask);
+
if (perf_ibs == &perf_ibs_op) {
if (ibs_caps & IBS_CAPS_OPCNTEXT) {
new_config = period & IBS_OP_MAX_CNT_EXT_MASK;
@@ -1460,6 +1482,9 @@ static __init int perf_ibs_fetch_init(void)
if (ibs_caps & IBS_CAPS_ZEN4)
perf_ibs_fetch.config_mask |= IBS_FETCH_L3MISSONLY;
+ if (ibs_caps & IBS_CAPS_DIS)
+ perf_ibs_fetch.disable_mask = IBS_FETCH_2_DIS;
+
perf_ibs_fetch.pmu.attr_groups = fetch_attr_groups;
perf_ibs_fetch.pmu.attr_update = fetch_attr_update;
@@ -1481,6 +1506,9 @@ static __init int perf_ibs_op_init(void)
if (ibs_caps & IBS_CAPS_ZEN4)
perf_ibs_op.config_mask |= IBS_OP_L3MISSONLY;
+ if (ibs_caps & IBS_CAPS_DIS)
+ perf_ibs_op.disable_mask = IBS_OP_2_DIS;
+
perf_ibs_op.pmu.attr_groups = op_attr_groups;
perf_ibs_op.pmu.attr_update = op_attr_update;
@@ -1727,6 +1755,23 @@ static void clear_APIC_ibs(void)
static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu)
{
setup_APIC_ibs();
+
+ if (ibs_caps & IBS_CAPS_DIS) {
+ /*
+ * IBS enable sequence:
+ * CTL[En] = 1;
+ * CTL2[Dis] = 0;
+ *
+ * IBS disable sequence:
+ * CTL2[Dis] = 1;
+ *
+ * Set CTL2[Dis] when CPU comes up. This is needed to make
+ * enable sequence effective.
+ */
+ wrmsrq(MSR_AMD64_IBSFETCHCTL2, 1);
+ wrmsrq(MSR_AMD64_IBSOPCTL2, 1);
+ }
+
return 0;
}
--
2.43.0
On 1/16/2026 11:34 AM, Ravi Bangoria wrote:
> The existing IBS_{FETCH|OP}_CTL MSRs combine control and status bits
> which leads to RMW race between HW and SW:
>
> HW SW
> ------------------------ ------------------------------
> config = rdmsr(IBS_OP_CTL);
> config &= ~EN;
> Set IBS_OP_CTL[Val] to 1
> trigger NMI
> wrmsr(IBS_OP_CTL, config);
> // Val is accidentally cleared
>
> Future hardware adds a control-only MSR, IBS_{FETCH|OP}_CTL2, which
> provides a second-level "disable" bit (Dis). IBS is now:
>
> Enabled: IBS_{FETCH|OP}_CTL[En] = 1 && IBS_{FETCH|OP}_CTL2[Dis] = 0
> Disabled: IBS_{FETCH|OP}_CTL[En] = 0 || IBS_{FETCH|OP}_CTL2[Dis] = 1
>
> The separate "Dis" bit lets software disable IBS without touching any
> status fields, eliminating the hardware/software race.
>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
> ---
> arch/x86/events/amd/ibs.c | 45 +++++++++++++++++++++++++++++++++++++++
> 1 file changed, 45 insertions(+)
>
> diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
> index 02e7bffe1208..d8216048be84 100644
> --- a/arch/x86/events/amd/ibs.c
> +++ b/arch/x86/events/amd/ibs.c
> @@ -86,9 +86,11 @@ struct cpu_perf_ibs {
> struct perf_ibs {
> struct pmu pmu;
> unsigned int msr;
> + unsigned int msr2;
> u64 config_mask;
> u64 cnt_mask;
> u64 enable_mask;
> + u64 disable_mask;
> u64 valid_mask;
> u16 min_period;
> u64 max_period;
> @@ -292,6 +294,8 @@ static int perf_ibs_init(struct perf_event *event)
> return -ENOENT;
>
> config = event->attr.config;
> + hwc->extra_reg.config = 0;
> + hwc->extra_reg.reg = 0;
>
> if (event->pmu != &perf_ibs->pmu)
> return -ENOENT;
> @@ -316,6 +320,11 @@ static int perf_ibs_init(struct perf_event *event)
> if (ret)
> return ret;
>
> + if (ibs_caps & IBS_CAPS_DIS) {
> + hwc->extra_reg.config &= ~perf_ibs->disable_mask;
> + hwc->extra_reg.reg = perf_ibs->msr2;
> + }
> +
> if (hwc->sample_period) {
> if (config & perf_ibs->cnt_mask)
> /* raw max_cnt may not be set */
> @@ -445,6 +454,9 @@ static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
> wrmsrq(hwc->config_base, tmp & ~perf_ibs->enable_mask);
>
> wrmsrq(hwc->config_base, tmp | perf_ibs->enable_mask);
> +
> + if (hwc->extra_reg.reg)
> + wrmsrq(hwc->extra_reg.reg, hwc->extra_reg.config);
> }
>
> /*
> @@ -457,6 +469,11 @@ static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
> static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
> struct hw_perf_event *hwc, u64 config)
> {
> + if (ibs_caps & IBS_CAPS_DIS) {
> + wrmsrq(hwc->extra_reg.reg, perf_ibs->disable_mask);
> + return;
> + }
> +
> config &= ~perf_ibs->cnt_mask;
> if (boot_cpu_data.x86 == 0x10)
> wrmsrq(hwc->config_base, config);
> @@ -809,6 +826,7 @@ static struct perf_ibs perf_ibs_fetch = {
> .check_period = perf_ibs_check_period,
> },
> .msr = MSR_AMD64_IBSFETCHCTL,
> + .msr2 = MSR_AMD64_IBSFETCHCTL2,
> .config_mask = IBS_FETCH_MAX_CNT | IBS_FETCH_RAND_EN,
> .cnt_mask = IBS_FETCH_MAX_CNT,
> .enable_mask = IBS_FETCH_ENABLE,
> @@ -834,6 +852,7 @@ static struct perf_ibs perf_ibs_op = {
> .check_period = perf_ibs_check_period,
> },
> .msr = MSR_AMD64_IBSOPCTL,
> + .msr2 = MSR_AMD64_IBSOPCTL2,
> .config_mask = IBS_OP_MAX_CNT,
> .cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT |
> IBS_OP_CUR_CNT_RAND,
> @@ -1389,6 +1408,9 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
>
> out:
> if (!throttle) {
> + if (ibs_caps & IBS_CAPS_DIS)
> + wrmsrq(hwc->extra_reg.reg, perf_ibs->disable_mask);
> +
> if (perf_ibs == &perf_ibs_op) {
> if (ibs_caps & IBS_CAPS_OPCNTEXT) {
> new_config = period & IBS_OP_MAX_CNT_EXT_MASK;
> @@ -1460,6 +1482,9 @@ static __init int perf_ibs_fetch_init(void)
> if (ibs_caps & IBS_CAPS_ZEN4)
> perf_ibs_fetch.config_mask |= IBS_FETCH_L3MISSONLY;
>
> + if (ibs_caps & IBS_CAPS_DIS)
> + perf_ibs_fetch.disable_mask = IBS_FETCH_2_DIS;
> +
> perf_ibs_fetch.pmu.attr_groups = fetch_attr_groups;
> perf_ibs_fetch.pmu.attr_update = fetch_attr_update;
>
> @@ -1481,6 +1506,9 @@ static __init int perf_ibs_op_init(void)
> if (ibs_caps & IBS_CAPS_ZEN4)
> perf_ibs_op.config_mask |= IBS_OP_L3MISSONLY;
>
> + if (ibs_caps & IBS_CAPS_DIS)
> + perf_ibs_op.disable_mask = IBS_OP_2_DIS;
> +
> perf_ibs_op.pmu.attr_groups = op_attr_groups;
> perf_ibs_op.pmu.attr_update = op_attr_update;
>
> @@ -1727,6 +1755,23 @@ static void clear_APIC_ibs(void)
> static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu)
> {
> setup_APIC_ibs();
> +
> + if (ibs_caps & IBS_CAPS_DIS) {
> + /*
> + * IBS enable sequence:
> + * CTL[En] = 1;
> + * CTL2[Dis] = 0;
> + *
> + * IBS disable sequence:
> + * CTL2[Dis] = 1;
> + *
> + * Set CTL2[Dis] when CPU comes up. This is needed to make
> + * enable sequence effective.
> + */
> + wrmsrq(MSR_AMD64_IBSFETCHCTL2, 1);
> + wrmsrq(MSR_AMD64_IBSOPCTL2, 1);
What does the BIT 0 of these 2 MSRs mean? Disable? Better define a macro
instead of using the magic number "1".
> + }
> +
> return 0;
> }
>
Hi Dapeng,
>> static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu)
>> {
>> setup_APIC_ibs();
>> +
>> + if (ibs_caps & IBS_CAPS_DIS) {
>> + /*
>> + * IBS enable sequence:
>> + * CTL[En] = 1;
>> + * CTL2[Dis] = 0;
>> + *
>> + * IBS disable sequence:
>> + * CTL2[Dis] = 1;
>> + *
>> + * Set CTL2[Dis] when CPU comes up. This is needed to make
>> + * enable sequence effective.
>> + */
>> + wrmsrq(MSR_AMD64_IBSFETCHCTL2, 1);
>> + wrmsrq(MSR_AMD64_IBSOPCTL2, 1);
>
> What does the BIT 0 of these 2 MSRs mean? Disable? Better define a macro
> instead of using the magic number "1".
Right, those are disable bits. I'll replace those magic numbers with
IBS_FETCH_2_DIS and IBS_OP_2_DIS.
Thanks,
Ravi
© 2016 - 2026 Red Hat, Inc.