[PATCH 07/11] perf/amd/ibs: Support IBS_{FETCH|OP}_CTL2[Dis] to eliminate RMW race

Ravi Bangoria posted 11 patches 3 weeks, 3 days ago
[PATCH 07/11] perf/amd/ibs: Support IBS_{FETCH|OP}_CTL2[Dis] to eliminate RMW race
Posted by Ravi Bangoria 3 weeks, 3 days ago
The existing IBS_{FETCH|OP}_CTL MSRs combine control and status bits
which leads to RMW race between HW and SW:

  HW                               SW
  ------------------------         ------------------------------
                                   config = rdmsr(IBS_OP_CTL);
                                   config &= ~EN;
  Set IBS_OP_CTL[Val] to 1
  trigger NMI
                                   wrmsr(IBS_OP_CTL, config);
                                   // Val is accidentally cleared

Future hardware adds a control-only MSR, IBS_{FETCH|OP}_CTL2, which
provides a second-level "disable" bit (Dis). IBS is now:

  Enabled:  IBS_{FETCH|OP}_CTL[En] = 1 && IBS_{FETCH|OP}_CTL2[Dis] = 0
  Disabled: IBS_{FETCH|OP}_CTL[En] = 0 || IBS_{FETCH|OP}_CTL2[Dis] = 1

The separate "Dis" bit lets software disable IBS without touching any
status fields, eliminating the hardware/software race.

Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
---
 arch/x86/events/amd/ibs.c | 45 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 02e7bffe1208..d8216048be84 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -86,9 +86,11 @@ struct cpu_perf_ibs {
 struct perf_ibs {
 	struct pmu			pmu;
 	unsigned int			msr;
+	unsigned int			msr2;
 	u64				config_mask;
 	u64				cnt_mask;
 	u64				enable_mask;
+	u64				disable_mask;
 	u64				valid_mask;
 	u16				min_period;
 	u64				max_period;
@@ -292,6 +294,8 @@ static int perf_ibs_init(struct perf_event *event)
 		return -ENOENT;
 
 	config = event->attr.config;
+	hwc->extra_reg.config = 0;
+	hwc->extra_reg.reg = 0;
 
 	if (event->pmu != &perf_ibs->pmu)
 		return -ENOENT;
@@ -316,6 +320,11 @@ static int perf_ibs_init(struct perf_event *event)
 	if (ret)
 		return ret;
 
+	if (ibs_caps & IBS_CAPS_DIS) {
+		hwc->extra_reg.config &= ~perf_ibs->disable_mask;
+		hwc->extra_reg.reg = perf_ibs->msr2;
+	}
+
 	if (hwc->sample_period) {
 		if (config & perf_ibs->cnt_mask)
 			/* raw max_cnt may not be set */
@@ -445,6 +454,9 @@ static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
 		wrmsrq(hwc->config_base, tmp & ~perf_ibs->enable_mask);
 
 	wrmsrq(hwc->config_base, tmp | perf_ibs->enable_mask);
+
+	if (hwc->extra_reg.reg)
+		wrmsrq(hwc->extra_reg.reg, hwc->extra_reg.config);
 }
 
 /*
@@ -457,6 +469,11 @@ static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
 static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
 					  struct hw_perf_event *hwc, u64 config)
 {
+	if (ibs_caps & IBS_CAPS_DIS) {
+		wrmsrq(hwc->extra_reg.reg, perf_ibs->disable_mask);
+		return;
+	}
+
 	config &= ~perf_ibs->cnt_mask;
 	if (boot_cpu_data.x86 == 0x10)
 		wrmsrq(hwc->config_base, config);
@@ -809,6 +826,7 @@ static struct perf_ibs perf_ibs_fetch = {
 		.check_period	= perf_ibs_check_period,
 	},
 	.msr			= MSR_AMD64_IBSFETCHCTL,
+	.msr2			= MSR_AMD64_IBSFETCHCTL2,
 	.config_mask		= IBS_FETCH_MAX_CNT | IBS_FETCH_RAND_EN,
 	.cnt_mask		= IBS_FETCH_MAX_CNT,
 	.enable_mask		= IBS_FETCH_ENABLE,
@@ -834,6 +852,7 @@ static struct perf_ibs perf_ibs_op = {
 		.check_period	= perf_ibs_check_period,
 	},
 	.msr			= MSR_AMD64_IBSOPCTL,
+	.msr2			= MSR_AMD64_IBSOPCTL2,
 	.config_mask		= IBS_OP_MAX_CNT,
 	.cnt_mask		= IBS_OP_MAX_CNT | IBS_OP_CUR_CNT |
 				  IBS_OP_CUR_CNT_RAND,
@@ -1389,6 +1408,9 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
 
 out:
 	if (!throttle) {
+		if (ibs_caps & IBS_CAPS_DIS)
+			wrmsrq(hwc->extra_reg.reg, perf_ibs->disable_mask);
+
 		if (perf_ibs == &perf_ibs_op) {
 			if (ibs_caps & IBS_CAPS_OPCNTEXT) {
 				new_config = period & IBS_OP_MAX_CNT_EXT_MASK;
@@ -1460,6 +1482,9 @@ static __init int perf_ibs_fetch_init(void)
 	if (ibs_caps & IBS_CAPS_ZEN4)
 		perf_ibs_fetch.config_mask |= IBS_FETCH_L3MISSONLY;
 
+	if (ibs_caps & IBS_CAPS_DIS)
+		perf_ibs_fetch.disable_mask = IBS_FETCH_2_DIS;
+
 	perf_ibs_fetch.pmu.attr_groups = fetch_attr_groups;
 	perf_ibs_fetch.pmu.attr_update = fetch_attr_update;
 
@@ -1481,6 +1506,9 @@ static __init int perf_ibs_op_init(void)
 	if (ibs_caps & IBS_CAPS_ZEN4)
 		perf_ibs_op.config_mask |= IBS_OP_L3MISSONLY;
 
+	if (ibs_caps & IBS_CAPS_DIS)
+		perf_ibs_op.disable_mask = IBS_OP_2_DIS;
+
 	perf_ibs_op.pmu.attr_groups = op_attr_groups;
 	perf_ibs_op.pmu.attr_update = op_attr_update;
 
@@ -1727,6 +1755,23 @@ static void clear_APIC_ibs(void)
 static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu)
 {
 	setup_APIC_ibs();
+
+	if (ibs_caps & IBS_CAPS_DIS) {
+		/*
+		 * IBS enable sequence:
+		 *   CTL[En] = 1;
+		 *   CTL2[Dis] = 0;
+		 *
+		 * IBS disable sequence:
+		 *   CTL2[Dis] = 1;
+		 *
+		 * Set CTL2[Dis] when CPU comes up. This is needed to make
+		 * enable sequence effective.
+		 */
+		wrmsrq(MSR_AMD64_IBSFETCHCTL2, 1);
+		wrmsrq(MSR_AMD64_IBSOPCTL2, 1);
+	}
+
 	return 0;
 }
 
-- 
2.43.0
Re: [PATCH 07/11] perf/amd/ibs: Support IBS_{FETCH|OP}_CTL2[Dis] to eliminate RMW race
Posted by Mi, Dapeng 3 weeks ago
On 1/16/2026 11:34 AM, Ravi Bangoria wrote:
> The existing IBS_{FETCH|OP}_CTL MSRs combine control and status bits
> which leads to RMW race between HW and SW:
>
>   HW                               SW
>   ------------------------         ------------------------------
>                                    config = rdmsr(IBS_OP_CTL);
>                                    config &= ~EN;
>   Set IBS_OP_CTL[Val] to 1
>   trigger NMI
>                                    wrmsr(IBS_OP_CTL, config);
>                                    // Val is accidentally cleared
>
> Future hardware adds a control-only MSR, IBS_{FETCH|OP}_CTL2, which
> provides a second-level "disable" bit (Dis). IBS is now:
>
>   Enabled:  IBS_{FETCH|OP}_CTL[En] = 1 && IBS_{FETCH|OP}_CTL2[Dis] = 0
>   Disabled: IBS_{FETCH|OP}_CTL[En] = 0 || IBS_{FETCH|OP}_CTL2[Dis] = 1
>
> The separate "Dis" bit lets software disable IBS without touching any
> status fields, eliminating the hardware/software race.
>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
> ---
>  arch/x86/events/amd/ibs.c | 45 +++++++++++++++++++++++++++++++++++++++
>  1 file changed, 45 insertions(+)
>
> diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
> index 02e7bffe1208..d8216048be84 100644
> --- a/arch/x86/events/amd/ibs.c
> +++ b/arch/x86/events/amd/ibs.c
> @@ -86,9 +86,11 @@ struct cpu_perf_ibs {
>  struct perf_ibs {
>  	struct pmu			pmu;
>  	unsigned int			msr;
> +	unsigned int			msr2;
>  	u64				config_mask;
>  	u64				cnt_mask;
>  	u64				enable_mask;
> +	u64				disable_mask;
>  	u64				valid_mask;
>  	u16				min_period;
>  	u64				max_period;
> @@ -292,6 +294,8 @@ static int perf_ibs_init(struct perf_event *event)
>  		return -ENOENT;
>  
>  	config = event->attr.config;
> +	hwc->extra_reg.config = 0;
> +	hwc->extra_reg.reg = 0;
>  
>  	if (event->pmu != &perf_ibs->pmu)
>  		return -ENOENT;
> @@ -316,6 +320,11 @@ static int perf_ibs_init(struct perf_event *event)
>  	if (ret)
>  		return ret;
>  
> +	if (ibs_caps & IBS_CAPS_DIS) {
> +		hwc->extra_reg.config &= ~perf_ibs->disable_mask;
> +		hwc->extra_reg.reg = perf_ibs->msr2;
> +	}
> +
>  	if (hwc->sample_period) {
>  		if (config & perf_ibs->cnt_mask)
>  			/* raw max_cnt may not be set */
> @@ -445,6 +454,9 @@ static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
>  		wrmsrq(hwc->config_base, tmp & ~perf_ibs->enable_mask);
>  
>  	wrmsrq(hwc->config_base, tmp | perf_ibs->enable_mask);
> +
> +	if (hwc->extra_reg.reg)
> +		wrmsrq(hwc->extra_reg.reg, hwc->extra_reg.config);
>  }
>  
>  /*
> @@ -457,6 +469,11 @@ static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
>  static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
>  					  struct hw_perf_event *hwc, u64 config)
>  {
> +	if (ibs_caps & IBS_CAPS_DIS) {
> +		wrmsrq(hwc->extra_reg.reg, perf_ibs->disable_mask);
> +		return;
> +	}
> +
>  	config &= ~perf_ibs->cnt_mask;
>  	if (boot_cpu_data.x86 == 0x10)
>  		wrmsrq(hwc->config_base, config);
> @@ -809,6 +826,7 @@ static struct perf_ibs perf_ibs_fetch = {
>  		.check_period	= perf_ibs_check_period,
>  	},
>  	.msr			= MSR_AMD64_IBSFETCHCTL,
> +	.msr2			= MSR_AMD64_IBSFETCHCTL2,
>  	.config_mask		= IBS_FETCH_MAX_CNT | IBS_FETCH_RAND_EN,
>  	.cnt_mask		= IBS_FETCH_MAX_CNT,
>  	.enable_mask		= IBS_FETCH_ENABLE,
> @@ -834,6 +852,7 @@ static struct perf_ibs perf_ibs_op = {
>  		.check_period	= perf_ibs_check_period,
>  	},
>  	.msr			= MSR_AMD64_IBSOPCTL,
> +	.msr2			= MSR_AMD64_IBSOPCTL2,
>  	.config_mask		= IBS_OP_MAX_CNT,
>  	.cnt_mask		= IBS_OP_MAX_CNT | IBS_OP_CUR_CNT |
>  				  IBS_OP_CUR_CNT_RAND,
> @@ -1389,6 +1408,9 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
>  
>  out:
>  	if (!throttle) {
> +		if (ibs_caps & IBS_CAPS_DIS)
> +			wrmsrq(hwc->extra_reg.reg, perf_ibs->disable_mask);
> +
>  		if (perf_ibs == &perf_ibs_op) {
>  			if (ibs_caps & IBS_CAPS_OPCNTEXT) {
>  				new_config = period & IBS_OP_MAX_CNT_EXT_MASK;
> @@ -1460,6 +1482,9 @@ static __init int perf_ibs_fetch_init(void)
>  	if (ibs_caps & IBS_CAPS_ZEN4)
>  		perf_ibs_fetch.config_mask |= IBS_FETCH_L3MISSONLY;
>  
> +	if (ibs_caps & IBS_CAPS_DIS)
> +		perf_ibs_fetch.disable_mask = IBS_FETCH_2_DIS;
> +
>  	perf_ibs_fetch.pmu.attr_groups = fetch_attr_groups;
>  	perf_ibs_fetch.pmu.attr_update = fetch_attr_update;
>  
> @@ -1481,6 +1506,9 @@ static __init int perf_ibs_op_init(void)
>  	if (ibs_caps & IBS_CAPS_ZEN4)
>  		perf_ibs_op.config_mask |= IBS_OP_L3MISSONLY;
>  
> +	if (ibs_caps & IBS_CAPS_DIS)
> +		perf_ibs_op.disable_mask = IBS_OP_2_DIS;
> +
>  	perf_ibs_op.pmu.attr_groups = op_attr_groups;
>  	perf_ibs_op.pmu.attr_update = op_attr_update;
>  
> @@ -1727,6 +1755,23 @@ static void clear_APIC_ibs(void)
>  static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu)
>  {
>  	setup_APIC_ibs();
> +
> +	if (ibs_caps & IBS_CAPS_DIS) {
> +		/*
> +		 * IBS enable sequence:
> +		 *   CTL[En] = 1;
> +		 *   CTL2[Dis] = 0;
> +		 *
> +		 * IBS disable sequence:
> +		 *   CTL2[Dis] = 1;
> +		 *
> +		 * Set CTL2[Dis] when CPU comes up. This is needed to make
> +		 * enable sequence effective.
> +		 */
> +		wrmsrq(MSR_AMD64_IBSFETCHCTL2, 1);
> +		wrmsrq(MSR_AMD64_IBSOPCTL2, 1);

What does the BIT 0 of these 2 MSRs mean? Disable? Better define a macro
instead of using the magic number "1". 


> +	}
> +
>  	return 0;
>  }
>  
Re: [PATCH 07/11] perf/amd/ibs: Support IBS_{FETCH|OP}_CTL2[Dis] to eliminate RMW race
Posted by Ravi Bangoria 3 weeks ago
Hi Dapeng,

>>  static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu)
>>  {
>>  	setup_APIC_ibs();
>> +
>> +	if (ibs_caps & IBS_CAPS_DIS) {
>> +		/*
>> +		 * IBS enable sequence:
>> +		 *   CTL[En] = 1;
>> +		 *   CTL2[Dis] = 0;
>> +		 *
>> +		 * IBS disable sequence:
>> +		 *   CTL2[Dis] = 1;
>> +		 *
>> +		 * Set CTL2[Dis] when CPU comes up. This is needed to make
>> +		 * enable sequence effective.
>> +		 */
>> +		wrmsrq(MSR_AMD64_IBSFETCHCTL2, 1);
>> +		wrmsrq(MSR_AMD64_IBSOPCTL2, 1);
> 
> What does the BIT 0 of these 2 MSRs mean? Disable? Better define a macro
> instead of using the magic number "1". 

Right, those are disable bits. I'll replace those magic numbers with
IBS_FETCH_2_DIS and IBS_OP_2_DIS.

Thanks,
Ravi