[PATCH 1/7] stop_machine: Introduce stop_machine_nmi()

Chang S. Bae posted 7 patches 2 weeks ago
[PATCH 1/7] stop_machine: Introduce stop_machine_nmi()
Posted by Chang S. Bae 2 weeks ago
From: David Kaplan <david.kaplan@amd.com>

stop_machine_nmi() is a variant of stop_machine() that runs the specified
function in NMI context.  This is useful for flows that cannot tolerate any
risk of interruption even due to an NMI.  Arch-specific code must handle
sending the actual NMI and running the stop_machine_nmi_handler().

Signed-off-by: David Kaplan <david.kaplan@amd.com>
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
---
Update from the original version:
* Move static key handling into stop_machine_cpuslocked_nmi() to support
  core-code users that already hold cpu hotplug locks
* Tweak the subject to better reflect the new interface and changelog a
  bit as well
---
 include/linux/stop_machine.h | 50 +++++++++++++++++++++
 kernel/stop_machine.c        | 84 ++++++++++++++++++++++++++++++++++--
 2 files changed, 130 insertions(+), 4 deletions(-)

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 72820503514c..86113084456a 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -141,6 +141,29 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
  */
 int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
 
+/**
+ * stop_machine_nmi: freeze the machine and run this function in NMI context
+ * @fn: the function to run
+ * @data: the data ptr for the @fn()
+ * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
+ *
+ * Like stop_machine() but runs the function in NMI context to avoid any risk of
+ * interruption due to NMIs.
+ *
+ * Protects against CPU hotplug.
+ */
+int stop_machine_nmi(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
+
+/**
+ * stop_machine_cpuslocked_nmi: freeze and run this function in NMI context
+ * @fn: the function to run
+ * @data: the data ptr for the @fn()
+ * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
+ *
+ * Same as above. Must be called from within a cpus_read_lock() protected
+ * region. Avoids nested calls to cpus_read_lock().
+ */
+int stop_machine_cpuslocked_nmi(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
 /**
  * stop_core_cpuslocked: - stop all threads on just one core
  * @cpu: any cpu in the targeted core
@@ -160,6 +183,14 @@ int stop_core_cpuslocked(unsigned int cpu, cpu_stop_fn_t fn, void *data);
 
 int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
 				   const struct cpumask *cpus);
+
+bool noinstr stop_machine_nmi_handler(void);
+DECLARE_STATIC_KEY_FALSE(stop_machine_nmi_handler_enable);
+static __always_inline bool stop_machine_nmi_handler_enabled(void)
+{
+	return static_branch_unlikely(&stop_machine_nmi_handler_enable);
+}
+
 #else	/* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
 
 static __always_inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
@@ -186,5 +217,24 @@ stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
 	return stop_machine(fn, data, cpus);
 }
 
+/* stop_machine_nmi() is only supported in SMP systems. */
+static __always_inline int stop_machine_nmi(cpu_stop_fn_t fn, void *data,
+					const struct cpumask *cpus)
+{
+	return -EINVAL;
+}
+
+static __always_inline bool stop_machine_nmi_handler_enabled(void)
+{
+	return false;
+}
+
+static __always_inline bool stop_machine_nmi_handler(void)
+{
+	return false;
+}
+
 #endif	/* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
+
+void arch_send_self_nmi(void);
 #endif	/* _LINUX_STOP_MACHINE */
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 3fe6b0c99f3d..189b4b108d13 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -174,6 +174,8 @@ struct multi_stop_data {
 
 	enum multi_stop_state	state;
 	atomic_t		thread_ack;
+
+	bool			use_nmi;
 };
 
 static void set_state(struct multi_stop_data *msdata,
@@ -197,6 +199,42 @@ notrace void __weak stop_machine_yield(const struct cpumask *cpumask)
 	cpu_relax();
 }
 
+struct stop_machine_nmi_ctrl {
+	bool nmi_enabled;
+	struct multi_stop_data *msdata;
+	int err;
+};
+
+DEFINE_STATIC_KEY_FALSE(stop_machine_nmi_handler_enable);
+static DEFINE_PER_CPU(struct stop_machine_nmi_ctrl, stop_machine_nmi_ctrl);
+
+static void enable_nmi_handler(struct multi_stop_data *msdata)
+{
+	this_cpu_write(stop_machine_nmi_ctrl.msdata, msdata);
+	this_cpu_write(stop_machine_nmi_ctrl.nmi_enabled, true);
+}
+
+void __weak arch_send_self_nmi(void)
+{
+	/* Arch code must implement this to support stop_machine_nmi() */
+}
+
+bool noinstr stop_machine_nmi_handler(void)
+{
+	struct multi_stop_data *msdata;
+	int err;
+
+	if (!raw_cpu_read(stop_machine_nmi_ctrl.nmi_enabled))
+		return false;
+
+	raw_cpu_write(stop_machine_nmi_ctrl.nmi_enabled, false);
+
+	msdata = raw_cpu_read(stop_machine_nmi_ctrl.msdata);
+	err = msdata->fn(msdata->data);
+	raw_cpu_write(stop_machine_nmi_ctrl.err, err);
+	return true;
+}
+
 /* This is the cpu_stop function which stops the CPU. */
 static int multi_cpu_stop(void *data)
 {
@@ -234,8 +272,15 @@ static int multi_cpu_stop(void *data)
 				hard_irq_disable();
 				break;
 			case MULTI_STOP_RUN:
-				if (is_active)
-					err = msdata->fn(msdata->data);
+				if (is_active) {
+					if (msdata->use_nmi) {
+						enable_nmi_handler(msdata);
+						arch_send_self_nmi();
+						err = raw_cpu_read(stop_machine_nmi_ctrl.err);
+					} else {
+						err = msdata->fn(msdata->data);
+					}
+				}
 				break;
 			default:
 				break;
@@ -584,14 +629,15 @@ static int __init cpu_stop_init(void)
 }
 early_initcall(cpu_stop_init);
 
-int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
-			    const struct cpumask *cpus)
+static int __stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
+			    const struct cpumask *cpus, bool use_nmi)
 {
 	struct multi_stop_data msdata = {
 		.fn = fn,
 		.data = data,
 		.num_threads = num_online_cpus(),
 		.active_cpus = cpus,
+		.use_nmi = use_nmi,
 	};
 
 	lockdep_assert_cpus_held();
@@ -620,6 +666,24 @@ int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
 	return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata);
 }
 
+int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
+			    const struct cpumask *cpus)
+{
+	return __stop_machine_cpuslocked(fn, data, cpus, false);
+}
+
+int stop_machine_cpuslocked_nmi(cpu_stop_fn_t fn, void *data,
+				const struct cpumask *cpus)
+{
+	int ret;
+
+	static_branch_enable_cpuslocked(&stop_machine_nmi_handler_enable);
+	ret = __stop_machine_cpuslocked(fn, data, cpus, true);
+	static_branch_disable_cpuslocked(&stop_machine_nmi_handler_enable);
+
+	return ret;
+}
+
 int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
 {
 	int ret;
@@ -632,6 +696,18 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
 }
 EXPORT_SYMBOL_GPL(stop_machine);
 
+int stop_machine_nmi(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
+{
+	int ret;
+
+	cpus_read_lock();
+	ret = stop_machine_cpuslocked_nmi(fn, data, cpus);
+	cpus_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(stop_machine_nmi);
+
 #ifdef CONFIG_SCHED_SMT
 int stop_core_cpuslocked(unsigned int cpu, cpu_stop_fn_t fn, void *data)
 {
-- 
2.51.0
Re: [PATCH 1/7] stop_machine: Introduce stop_machine_nmi()
Posted by Thomas Gleixner 1 week, 4 days ago
On Sun, Jan 25 2026 at 01:42, Chang S. Bae wrote:
> +/**
> + * stop_machine_nmi: freeze the machine and run this function in NMI context
> + * @fn: the function to run
> + * @data: the data ptr for the @fn()
> + * @cpus: the cpus to run the @fn() on (NULL = any online cpu)

Please format these tabular, use uppercase letters to start the
explanation, use CPU[s] all over the place and write out words instead
of using made up abbreviations. This is documentation not twitter.

 * @fn:		The function to invoke
 * @data:	The data pointer for @fn()
 * @cpus:	A cpumask containing the CPUs to run fn() on

Also this NULL == any online CPU is just made up. What's wrong with
cpu_online_mask?

> +
> +bool noinstr stop_machine_nmi_handler(void);
> +DECLARE_STATIC_KEY_FALSE(stop_machine_nmi_handler_enable);
> +static __always_inline bool stop_machine_nmi_handler_enabled(void)

Can you please separate the declarations from the inline with an empty
new line? This glued together way to write it is unreadable.

> +{
> +	return static_branch_unlikely(&stop_machine_nmi_handler_enable);
> +}
> +
>  #else	/* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
>  
>  static __always_inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
> @@ -186,5 +217,24 @@ stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
>  	return stop_machine(fn, data, cpus);
>  }
>  
> +/* stop_machine_nmi() is only supported in SMP systems. */
> +static __always_inline int stop_machine_nmi(cpu_stop_fn_t fn, void *data,
> +					const struct cpumask *cpus)

Align the second line argument with the first argument above.

See https://www.kernel.org/doc/html/latest/process/maintainer-tip.html

> +{
> +	return -EINVAL;
> +}
> +

> +
> +void arch_send_self_nmi(void);
>  #endif	/* _LINUX_STOP_MACHINE */
> diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
> index 3fe6b0c99f3d..189b4b108d13 100644
> --- a/kernel/stop_machine.c
> +++ b/kernel/stop_machine.c
> @@ -174,6 +174,8 @@ struct multi_stop_data {
>  
>  	enum multi_stop_state	state;
>  	atomic_t		thread_ack;
> +
> +	bool			use_nmi;
>  };
>  
>  static void set_state(struct multi_stop_data *msdata,
> @@ -197,6 +199,42 @@ notrace void __weak stop_machine_yield(const struct cpumask *cpumask)
>  	cpu_relax();
>  }
>  
> +struct stop_machine_nmi_ctrl {
> +	bool nmi_enabled;
> +	struct multi_stop_data *msdata;
> +	int err;

Please align the struct member names tabular. See documentation.

> +};
> +
> +DEFINE_STATIC_KEY_FALSE(stop_machine_nmi_handler_enable);
> +static DEFINE_PER_CPU(struct stop_machine_nmi_ctrl, stop_machine_nmi_ctrl);
> +
> +static void enable_nmi_handler(struct multi_stop_data *msdata)
> +{
> +	this_cpu_write(stop_machine_nmi_ctrl.msdata, msdata);
> +	this_cpu_write(stop_machine_nmi_ctrl.nmi_enabled, true);
> +}
> +
> +void __weak arch_send_self_nmi(void)
> +{
> +	/* Arch code must implement this to support stop_machine_nmi() */

Architecture

> +}

Also this weak function is wrong.

All of this NMI mode needs to be guarded with a config option as it
otherwise is compiled in unconditionally and any accidental usage on an
architecture which does not support this will result in a undecodable
malfunction. There is a world outside of x86.

With that arch_send_self_nmi() becomes a plain declaration in a header.

> +
> +bool noinstr stop_machine_nmi_handler(void)
> +{
> +	struct multi_stop_data *msdata;
> +	int err;
> +
> +	if (!raw_cpu_read(stop_machine_nmi_ctrl.nmi_enabled))
> +		return false;
> +
> +	raw_cpu_write(stop_machine_nmi_ctrl.nmi_enabled, false);
> +
> +	msdata = raw_cpu_read(stop_machine_nmi_ctrl.msdata);
> +	err = msdata->fn(msdata->data);
> +	raw_cpu_write(stop_machine_nmi_ctrl.err, err);
> +	return true;
> +}
> +
>  /* This is the cpu_stop function which stops the CPU. */
>  static int multi_cpu_stop(void *data)
>  {
> @@ -234,8 +272,15 @@ static int multi_cpu_stop(void *data)
>  				hard_irq_disable();
>  				break;
>  			case MULTI_STOP_RUN:
> -				if (is_active)
> -					err = msdata->fn(msdata->data);
> +				if (is_active) {
> +					if (msdata->use_nmi) {
> +						enable_nmi_handler(msdata);
> +						arch_send_self_nmi();
> +						err = raw_cpu_read(stop_machine_nmi_ctrl.err);
> +					} else {
> +						err = msdata->fn(msdata->data);
> +					}

And this wants to become

    if (IS_ENABLED(CONFIG_STOMP_MACHINE_NMI) && msdata->use_nmi)
    	err = stop_this_cpu_nmi(msdata);
    else
	err = msdata->fn(msdata->data);

> +				}
>  				break;
>  			default:
>  				break;
> @@ -584,14 +629,15 @@ static int __init cpu_stop_init(void)
>  }
>  early_initcall(cpu_stop_init);
>  
> -int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
> -			    const struct cpumask *cpus)
> +static int __stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
> +			    const struct cpumask *cpus, bool use_nmi)

The argument alignment was correct before....

>  {
>  	struct multi_stop_data msdata = {
>  		.fn = fn,
>  		.data = data,
>  		.num_threads = num_online_cpus(),
>  		.active_cpus = cpus,
> +		.use_nmi = use_nmi,
>  	};
>  
>  	lockdep_assert_cpus_held();
> @@ -620,6 +666,24 @@ int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
>  	return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata);
>  }

> +int stop_machine_nmi(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
> +{
> +	int ret;
> +
> +	cpus_read_lock();
> +	ret = stop_machine_cpuslocked_nmi(fn, data, cpus);
> +	cpus_read_unlock();
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(stop_machine_nmi);

Why needs this to be exported? No module has any business with stomp
machine.

Thanks

        tglx
Re: [PATCH 1/7] stop_machine: Introduce stop_machine_nmi()
Posted by Chang S. Bae 1 week, 3 days ago
On 1/28/2026 12:02 AM, Thomas Gleixner wrote:
> On Sun, Jan 25 2026 at 01:42, Chang S. Bae wrote:
>> +/**
>> + * stop_machine_nmi: freeze the machine and run this function in NMI context
>> + * @fn: the function to run
>> + * @data: the data ptr for the @fn()
>> + * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
> 
> Please format these tabular, use uppercase letters to start the
> explanation, use CPU[s] all over the place and write out words instead
> of using made up abbreviations. This is documentation not twitter.
> 
>   * @fn:		The function to invoke
>   * @data:	The data pointer for @fn()
>   * @cpus:	A cpumask containing the CPUs to run fn() on
> 
> Also this NULL == any online CPU is just made up. What's wrong with
> cpu_online_mask?
> 
>> +
>> +bool noinstr stop_machine_nmi_handler(void);
>> +DECLARE_STATIC_KEY_FALSE(stop_machine_nmi_handler_enable);
>> +static __always_inline bool stop_machine_nmi_handler_enabled(void)
> 
> Can you please separate the declarations from the inline with an empty
> new line? This glued together way to write it is unreadable.

Yes, I fixed them all on my local right now.

> 
>> +{
>> +	return static_branch_unlikely(&stop_machine_nmi_handler_enable);
>> +}
>> +
>>   #else	/* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
>>   
>>   static __always_inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
>> @@ -186,5 +217,24 @@ stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
>>   	return stop_machine(fn, data, cpus);
>>   }
>>   
>> +/* stop_machine_nmi() is only supported in SMP systems. */
>> +static __always_inline int stop_machine_nmi(cpu_stop_fn_t fn, void *data,
>> +					const struct cpumask *cpus)
> 
> Align the second line argument with the first argument above.
> 
> See https://www.kernel.org/doc/html/latest/process/maintainer-tip.html

Sorry for lot of misalignment issues in this change that I missed out.

>> +{
>> +	return -EINVAL;
>> +}
>> +
> 
>> +
>> +void arch_send_self_nmi(void);
>>   #endif	/* _LINUX_STOP_MACHINE */
>> diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
>> index 3fe6b0c99f3d..189b4b108d13 100644
>> --- a/kernel/stop_machine.c
>> +++ b/kernel/stop_machine.c
>> @@ -174,6 +174,8 @@ struct multi_stop_data {
>>   
>>   	enum multi_stop_state	state;
>>   	atomic_t		thread_ack;
>> +
>> +	bool			use_nmi;
>>   };
>>   
>>   static void set_state(struct multi_stop_data *msdata,
>> @@ -197,6 +199,42 @@ notrace void __weak stop_machine_yield(const struct cpumask *cpumask)
>>   	cpu_relax();
>>   }
>>   
>> +struct stop_machine_nmi_ctrl {
>> +	bool nmi_enabled;
>> +	struct multi_stop_data *msdata;
>> +	int err;
> 
> Please align the struct member names tabular. See documentation.

Fixed.

> 
>> +};
>> +
>> +DEFINE_STATIC_KEY_FALSE(stop_machine_nmi_handler_enable);
>> +static DEFINE_PER_CPU(struct stop_machine_nmi_ctrl, stop_machine_nmi_ctrl);
>> +
>> +static void enable_nmi_handler(struct multi_stop_data *msdata)
>> +{
>> +	this_cpu_write(stop_machine_nmi_ctrl.msdata, msdata);
>> +	this_cpu_write(stop_machine_nmi_ctrl.nmi_enabled, true);
>> +}
>> +
>> +void __weak arch_send_self_nmi(void)
>> +{
>> +	/* Arch code must implement this to support stop_machine_nmi() */
> 
> Architecture

Fixed.

> 
>> +}
> 
> Also this weak function is wrong.
> 
> All of this NMI mode needs to be guarded with a config option as it
> otherwise is compiled in unconditionally and any accidental usage on an
> architecture which does not support this will result in a undecodable
> malfunction. There is a world outside of x86.
> 
> With that arch_send_self_nmi() becomes a plain declaration in a header.

I see.

> 
>> +
>> +bool noinstr stop_machine_nmi_handler(void)
>> +{
>> +	struct multi_stop_data *msdata;
>> +	int err;
>> +
>> +	if (!raw_cpu_read(stop_machine_nmi_ctrl.nmi_enabled))
>> +		return false;
>> +
>> +	raw_cpu_write(stop_machine_nmi_ctrl.nmi_enabled, false);
>> +
>> +	msdata = raw_cpu_read(stop_machine_nmi_ctrl.msdata);
>> +	err = msdata->fn(msdata->data);
>> +	raw_cpu_write(stop_machine_nmi_ctrl.err, err);
>> +	return true;
>> +}
>> +
>>   /* This is the cpu_stop function which stops the CPU. */
>>   static int multi_cpu_stop(void *data)
>>   {
>> @@ -234,8 +272,15 @@ static int multi_cpu_stop(void *data)
>>   				hard_irq_disable();
>>   				break;
>>   			case MULTI_STOP_RUN:
>> -				if (is_active)
>> -					err = msdata->fn(msdata->data);
>> +				if (is_active) {
>> +					if (msdata->use_nmi) {
>> +						enable_nmi_handler(msdata);
>> +						arch_send_self_nmi();
>> +						err = raw_cpu_read(stop_machine_nmi_ctrl.err);
>> +					} else {
>> +						err = msdata->fn(msdata->data);
>> +					}
> 
> And this wants to become
> 
>      if (IS_ENABLED(CONFIG_STOMP_MACHINE_NMI) && msdata->use_nmi)
>      	err = stop_this_cpu_nmi(msdata);
>      else
> 	err = msdata->fn(msdata->data);

Although that config option is very clear and makes tons of sense, the 
latter reads like a (silent) fallback path for a stop_machine_nmi() 
invocation with CONFIG_STOMP_MACHINE_NMI=n.

Maybe this might be clear to reject the NMI option right away with 
something like:

stop_machine_cpuslocked_nmi(...)
{
	if (!IS_ENABLED(CONFIG_STOMP_MACHINE_NMI))
		return -EOPNOTSUPP;
	...
};

>>   
>> -int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
>> -			    const struct cpumask *cpus)
>> +static int __stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
>> +			    const struct cpumask *cpus, bool use_nmi)
> 
> The argument alignment was correct before....

Sigh... fixed, again.

>> +int stop_machine_nmi(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
>> +{
>> +	int ret;
>> +
>> +	cpus_read_lock();
>> +	ret = stop_machine_cpuslocked_nmi(fn, data, cpus);
>> +	cpus_read_unlock();
>> +
>> +	return ret;
>> +}
>> +EXPORT_SYMBOL_GPL(stop_machine_nmi);
> 
> Why needs this to be exported? No module has any business with stomp
> machine.

Not at all. Removed.

I really appreciate your time and effort for the review!
Re: [PATCH 1/7] stop_machine: Introduce stop_machine_nmi()
Posted by Thomas Gleixner 1 week, 2 days ago
On Thu, Jan 29 2026 at 09:07, Chang S. Bae wrote:
> On 1/28/2026 12:02 AM, Thomas Gleixner wrote:
>> And this wants to become
>> 
>>      if (IS_ENABLED(CONFIG_STOMP_MACHINE_NMI) && msdata->use_nmi)
>>      	err = stop_this_cpu_nmi(msdata);
>>      else
>> 	err = msdata->fn(msdata->data);
>
> Although that config option is very clear and makes tons of sense, the 
> latter reads like a (silent) fallback path for a stop_machine_nmi() 
> invocation with CONFIG_STOMP_MACHINE_NMI=n.
>
> Maybe this might be clear to reject the NMI option right away with 
> something like:
>
> stop_machine_cpuslocked_nmi(...)
> {
> 	if (!IS_ENABLED(CONFIG_STOMP_MACHINE_NMI))
> 		return -EOPNOTSUPP;
> 	...
> };

That function should not be exposed at all when the config switch is
off. Hide it behind #ifdef CONFIG...

It really should not be used in generic code at all.

Thanks,

        tglx
Re: [PATCH 1/7] stop_machine: Introduce stop_machine_nmi()
Posted by Borislav Petkov 1 week, 5 days ago
On Sun, Jan 25, 2026 at 01:42:16AM +0000, Chang S. Bae wrote:
> +/**
> + * stop_machine_nmi: freeze the machine and run this function in NMI context
> + * @fn: the function to run
> + * @data: the data ptr for the @fn()
> + * @cpus: the cpus to run the @fn() on (NULL = any online cpu)

s/cpu/CPU/g in all text.

> + *
> + * Like stop_machine() but runs the function in NMI context to avoid any risk of
> + * interruption due to NMIs.
> + *
> + * Protects against CPU hotplug.
> + */
> +int stop_machine_nmi(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
> +
> +/**
> + * stop_machine_cpuslocked_nmi: freeze and run this function in NMI context
> + * @fn: the function to run
> + * @data: the data ptr for the @fn()
> + * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
> + *
> + * Same as above. Must be called from within a cpus_read_lock() protected
> + * region. Avoids nested calls to cpus_read_lock().
> + */
> +int stop_machine_cpuslocked_nmi(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
>  /**
>   * stop_core_cpuslocked: - stop all threads on just one core
>   * @cpu: any cpu in the targeted core
> @@ -160,6 +183,14 @@ int stop_core_cpuslocked(unsigned int cpu, cpu_stop_fn_t fn, void *data);
>  
>  int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
>  				   const struct cpumask *cpus);
> +
> +bool noinstr stop_machine_nmi_handler(void);
> +DECLARE_STATIC_KEY_FALSE(stop_machine_nmi_handler_enable);

Why is the static key in the header if you have an accessor below?

> +static __always_inline bool stop_machine_nmi_handler_enabled(void)
> +{
> +	return static_branch_unlikely(&stop_machine_nmi_handler_enable);
> +}

Just make the accessor the only thing that external code calls.

...

> +DEFINE_STATIC_KEY_FALSE(stop_machine_nmi_handler_enable);
> +static DEFINE_PER_CPU(struct stop_machine_nmi_ctrl, stop_machine_nmi_ctrl);
> +
> +static void enable_nmi_handler(struct multi_stop_data *msdata)
> +{
> +	this_cpu_write(stop_machine_nmi_ctrl.msdata, msdata);
> +	this_cpu_write(stop_machine_nmi_ctrl.nmi_enabled, true);
> +}

Why do we have to enable the NMI handler?

> +void __weak arch_send_self_nmi(void)
> +{
> +	/* Arch code must implement this to support stop_machine_nmi() */
> +}

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [PATCH 1/7] stop_machine: Introduce stop_machine_nmi()
Posted by kernel test robot 1 week, 6 days ago
Hi Chang,

kernel test robot noticed the following build errors:

[auto build test ERROR on 24d479d26b25bce5faea3ddd9fa8f3a6c3129ea7]

url:    https://github.com/intel-lab-lkp/linux/commits/Chang-S-Bae/stop_machine-Introduce-stop_machine_nmi/20260125-101013
base:   24d479d26b25bce5faea3ddd9fa8f3a6c3129ea7
patch link:    https://lore.kernel.org/r/20260125014224.249901-2-chang.seok.bae%40intel.com
patch subject: [PATCH 1/7] stop_machine: Introduce stop_machine_nmi()
config: x86_64-randconfig-r133-20260126 (https://download.01.org/0day-ci/archive/20260126/202601261901.Yj2kmvgI-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260126/202601261901.Yj2kmvgI-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202601261901.Yj2kmvgI-lkp@intel.com/

All errors (new ones prefixed by >>):

>> vmlinux.o: error: objtool: stop_machine_nmi_handler+0x23: call to {dynamic}() leaves .noinstr.text section

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH 1/7] stop_machine: Introduce stop_machine_nmi()
Posted by Borislav Petkov 1 week, 5 days ago
On Mon, Jan 26, 2026 at 07:51:11PM +0800, kernel test robot wrote:
> Hi Chang,
> 
> kernel test robot noticed the following build errors:
> 
> [auto build test ERROR on 24d479d26b25bce5faea3ddd9fa8f3a6c3129ea7]
> 
> url:    https://github.com/intel-lab-lkp/linux/commits/Chang-S-Bae/stop_machine-Introduce-stop_machine_nmi/20260125-101013
> base:   24d479d26b25bce5faea3ddd9fa8f3a6c3129ea7
> patch link:    https://lore.kernel.org/r/20260125014224.249901-2-chang.seok.bae%40intel.com
> patch subject: [PATCH 1/7] stop_machine: Introduce stop_machine_nmi()
> config: x86_64-randconfig-r133-20260126 (https://download.01.org/0day-ci/archive/20260126/202601261901.Yj2kmvgI-lkp@intel.com/config)
> compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
> reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260126/202601261901.Yj2kmvgI-lkp@intel.com/reproduce)
> 
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <lkp@intel.com>
> | Closes: https://lore.kernel.org/oe-kbuild-all/202601261901.Yj2kmvgI-lkp@intel.com/
> 
> All errors (new ones prefixed by >>):
> 
> >> vmlinux.o: error: objtool: stop_machine_nmi_handler+0x23: call to {dynamic}() leaves .noinstr.text section

This one would be hard to fix because that's the

	err = msdata->fn(msdata->data);

function pointer which objtool can't see through.

So my suggestion would be to do:

struct nmi_multi_stop_data {
	cpu_stop_safe_fn_t           fn;

note the different function pointer type which we will hopefully catch during
review in the sense that only *safe* functions should be callable by the
NMI-specific stomp_machine.

Along with:

diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 189b4b108d13..55a350048d4c 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -230,7 +230,9 @@ bool noinstr stop_machine_nmi_handler(void)
 	raw_cpu_write(stop_machine_nmi_ctrl.nmi_enabled, false);
 
 	msdata = raw_cpu_read(stop_machine_nmi_ctrl.msdata);
+	instrumentation_begin();
 	err = msdata->fn(msdata->data);
+	instrumentation_end();
 	raw_cpu_write(stop_machine_nmi_ctrl.err, err);
 	return true;
 }

ofc.

Unless someone has a better idea ofc...

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [PATCH 1/7] stop_machine: Introduce stop_machine_nmi()
Posted by Chang S. Bae 1 week, 5 days ago
On 1/27/2026 6:49 AM, Borislav Petkov wrote:
>>
>>>> vmlinux.o: error: objtool: stop_machine_nmi_handler+0x23: call to {dynamic}() leaves .noinstr.text section
> 
> This one would be hard to fix because that's the
> 
> 	err = msdata->fn(msdata->data);
> 
> function pointer which objtool can't see through.

Yes. Under certain config, the 0-day bot tirggers this with 
CONFIG_NOINSTR_VALIDATION=y.

Objtool ends up with the following path:

tools/objtool/check.c::validation_call()
-> noinstr_call_dest()
    {
	/*
	 * We can't deal with indirect function calls at present;
	 * assume they're instrumented.
	 */
	if (!func) {
		if (file->pv_ops)
			return pv_call_dest(file, insn);

		return false;
	}
     ...
    }

So every indirect call under noinstr is conservatively treated as 
invalid except for pv_ops which seems to be investigated further. In 
this series, however, the destination code is to in .noinstr.text.

Given this, I considered something to be adjusted on the tool side, e.g. 
by whitelisting the case along the lines of

   if (!func) {
     ...

     if (!strncmp(insn_func(insn)->name, "stop_machine_nmi_handler", 24))
       return true;

     return false;
   }

The existing pv_ops handling also looks somewhat misaligned with large 
objects like vmlinux.o.

> 
> So my suggestion would be to do:
> 
> struct nmi_multi_stop_data {
> 	cpu_stop_safe_fn_t           fn;
> 
> note the different function pointer type which we will hopefully catch during
> review in the sense that only *safe* functions should be callable by the
> NMI-specific stomp_machine.
> 
> Along with:
> 
> diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
> index 189b4b108d13..55a350048d4c 100644
> --- a/kernel/stop_machine.c
> +++ b/kernel/stop_machine.c
> @@ -230,7 +230,9 @@ bool noinstr stop_machine_nmi_handler(void)
>   	raw_cpu_write(stop_machine_nmi_ctrl.nmi_enabled, false);
>   
>   	msdata = raw_cpu_read(stop_machine_nmi_ctrl.msdata);
> +	instrumentation_begin();
>   	err = msdata->fn(msdata->data);
> +	instrumentation_end();
>   	raw_cpu_write(stop_machine_nmi_ctrl.err, err);
>   	return true;
>   }

I also thought this change, but I was a bit hesitant as it could be seen 
as lifting up the no-instrumentation.

Now I suppose the net effect will be the same with other workaround, 
e.g. tool-side whitelisting. So I think I can go with this as you 
suggested (with a clear note on this).

> 
> ofc.
> 
> Unless someone has a better idea ofc...

Sure.

Thanks,
Chang