[PATCH 2/6] x86/irq: Extend NMI handler registration interface to include source

Jacob Pan posted 6 patches 1 year, 6 months ago
There is a newer version of this series
[PATCH 2/6] x86/irq: Extend NMI handler registration interface to include source
Posted by Jacob Pan 1 year, 6 months ago
Add a source vector argument to register_nmi_handler() such that designated
NMI originators can leverage NMI source reporting feature. For those who
do not use NMI source reporting, 0 (unknown) is used as the source vector. NMI
source vectors (up to 16) are pre-defined.

Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
---
 arch/x86/events/amd/ibs.c          |  2 +-
 arch/x86/events/core.c             |  3 ++-
 arch/x86/include/asm/irq_vectors.h | 21 +++++++++++++++++++++
 arch/x86/include/asm/nmi.h         |  4 +++-
 arch/x86/kernel/apic/hw_nmi.c      |  2 +-
 arch/x86/kernel/cpu/mce/inject.c   |  2 +-
 arch/x86/kernel/cpu/mshyperv.c     |  2 +-
 arch/x86/kernel/kgdb.c             |  4 ++--
 arch/x86/kernel/nmi.c              | 16 ++++++++++++++++
 arch/x86/kernel/nmi_selftest.c     |  5 +++--
 arch/x86/kernel/reboot.c           |  2 +-
 arch/x86/kernel/smp.c              |  2 +-
 arch/x86/platform/uv/uv_nmi.c      |  4 ++--
 drivers/acpi/apei/ghes.c           |  2 +-
 drivers/char/ipmi/ipmi_watchdog.c  |  2 +-
 drivers/edac/igen6_edac.c          |  2 +-
 drivers/watchdog/hpwdt.c           |  6 +++---
 17 files changed, 61 insertions(+), 20 deletions(-)

diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index e91970b01d62..20989071f59a 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -1246,7 +1246,7 @@ static __init int perf_event_ibs_init(void)
 	if (ret)
 		goto err_op;
 
-	ret = register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
+	ret = register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs", 0);
 	if (ret)
 		goto err_nmi;
 
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 5b0dd07b1ef1..1ef2201e48ac 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2100,7 +2100,8 @@ static int __init init_hw_perf_events(void)
 		x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
 
 	perf_events_lapic_init();
-	register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
+
+	register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI", NMI_SOURCE_VEC_PMI);
 
 	unconstrained = (struct event_constraint)
 		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 13aea8fc3d45..b8388bc00cde 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -105,6 +105,27 @@
 
 #define NR_VECTORS			 256
 
+/*
+ * The NMI senders specify the NMI source vector as an 8bit integer in their
+ * vector field with NMI delivery mode. A local APIC receiving an NMI will
+ * set the corresponding bit in a 16bit bitmask, which is accumulated until
+ * the NMI is delivered.
+ * When a sender didn't specify an NMI source vector the source vector will
+ * be 0, which will result in bit 0 of the bitmask being set. For out of
+ * bounds vectors >= 16 bit 0 will also be set.
+ * When bit 0 is set, system software must invoke all registered NMI handlers
+ * as if NMI source feature is not enabled.
+ */
+#define NMI_SOURCE_VEC_UNKNOWN		0
+#define NMI_SOURCE_VEC_PMI		1	/* PerfMon counters */
+#define NMI_SOURCE_VEC_IPI_BT		2	/* CPU backtrace */
+#define NMI_SOURCE_VEC_IPI_MCE		3	/* MCE injection */
+#define NMI_SOURCE_VEC_IPI_KGDB		4
+#define NMI_SOURCE_VEC_IPI_REBOOT	5	/* Crash reboot */
+#define NMI_SOURCE_VEC_IPI_SMP_STOP	6	/* Panic stop CPU */
+#define NMI_SOURCE_VEC_IPI_TEST		7	/* For remote and local IPIs*/
+#define NR_NMI_SOURCE_VECTORS		8
+
 #ifdef CONFIG_X86_LOCAL_APIC
 #define FIRST_SYSTEM_VECTOR		POSTED_MSI_NOTIFICATION_VECTOR
 #else
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 41a0ebb699ec..6fe26fea30eb 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -39,15 +39,17 @@ struct nmiaction {
 	u64			max_duration;
 	unsigned long		flags;
 	const char		*name;
+	unsigned int		source_vec;
 };
 
-#define register_nmi_handler(t, fn, fg, n, init...)	\
+#define register_nmi_handler(t, fn, fg, n, src, init...)	\
 ({							\
 	static struct nmiaction init fn##_na = {	\
 		.list = LIST_HEAD_INIT(fn##_na.list),	\
 		.handler = (fn),			\
 		.name = (n),				\
 		.flags = (fg),				\
+		.source_vec = (src),			\
 	};						\
 	__register_nmi_handler((t), &fn##_na);		\
 })
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 45af535c44a0..9f0125d3b8b0 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -54,7 +54,7 @@ NOKPROBE_SYMBOL(nmi_cpu_backtrace_handler);
 static int __init register_nmi_cpu_backtrace_handler(void)
 {
 	register_nmi_handler(NMI_LOCAL, nmi_cpu_backtrace_handler,
-				0, "arch_bt");
+				0, "arch_bt", NMI_SOURCE_VEC_IPI_BT);
 	return 0;
 }
 early_initcall(register_nmi_cpu_backtrace_handler);
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 94953d749475..365a03f11d06 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -769,7 +769,7 @@ static int __init inject_init(void)
 
 	debugfs_init();
 
-	register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
+	register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify", NMI_SOURCE_VEC_IPI_MCE);
 	mce_register_injector_chain(&inject_nb);
 
 	setup_inj_struct(&i_mce);
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index e0fd57a8ba84..2fb9408a8ba9 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -486,7 +486,7 @@ static void __init ms_hyperv_init_platform(void)
 	}
 
 	register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST,
-			     "hv_nmi_unknown");
+			     "hv_nmi_unknown", 0);
 #endif
 
 #ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 9c9faa1634fb..d167eb23cf13 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -603,12 +603,12 @@ int kgdb_arch_init(void)
 		goto out;
 
 	retval = register_nmi_handler(NMI_LOCAL, kgdb_nmi_handler,
-					0, "kgdb");
+					0, "kgdb", NMI_SOURCE_VEC_IPI_KGDB);
 	if (retval)
 		goto out1;
 
 	retval = register_nmi_handler(NMI_UNKNOWN, kgdb_nmi_handler,
-					0, "kgdb");
+					0, "kgdb", 0);
 
 	if (retval)
 		goto out2;
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index ed163c8c8604..1ff4f7c9f182 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -86,6 +86,12 @@ static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
 
 static int ignore_nmis __read_mostly;
 
+/*
+ * Contains all actions registered by originators with source vector,
+ * excluding UNKNOWN NMI source vector 0.
+ */
+static struct nmiaction *nmiaction_src_table[NR_NMI_SOURCE_VECTORS - 1];
+
 int unknown_nmi_panic;
 /*
  * Prevent NMI reason port (0x61) being accessed simultaneously, can
@@ -163,6 +169,12 @@ static int nmi_handle(unsigned int type, struct pt_regs *regs)
 }
 NOKPROBE_SYMBOL(nmi_handle);
 
+static inline bool use_nmi_source(unsigned int type, struct nmiaction *a)
+{
+	return (cpu_feature_enabled(X86_FEATURE_NMI_SOURCE) &&
+		type == NMI_LOCAL && a->source_vec);
+}
+
 int __register_nmi_handler(unsigned int type, struct nmiaction *action)
 {
 	struct nmi_desc *desc = nmi_to_desc(type);
@@ -173,6 +185,8 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
 
+	if (use_nmi_source(type, action))
+		rcu_assign_pointer(nmiaction_src_table[action->source_vec], action);
 	/*
 	 * Indicate if there are multiple registrations on the
 	 * internal NMI handler call chains (SERR and IO_CHECK).
@@ -210,6 +224,8 @@ void unregister_nmi_handler(unsigned int type, const char *name)
 		if (!strcmp(n->name, name)) {
 			WARN(in_nmi(),
 				"Trying to free NMI (%s) from NMI context!\n", n->name);
+			if (use_nmi_source(type, n))
+				rcu_assign_pointer(nmiaction_src_table[n->source_vec], NULL);
 			list_del_rcu(&n->list);
 			found = n;
 			break;
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
index e93a8545c74d..f014c8a66b0c 100644
--- a/arch/x86/kernel/nmi_selftest.c
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -44,7 +44,7 @@ static void __init init_nmi_testsuite(void)
 {
 	/* trap all the unknown NMIs we may generate */
 	register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk",
-			__initdata);
+			0, __initdata);
 }
 
 static void __init cleanup_nmi_testsuite(void)
@@ -67,7 +67,8 @@ static void __init test_nmi_ipi(struct cpumask *mask)
 	unsigned long timeout;
 
 	if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback,
-				 NMI_FLAG_FIRST, "nmi_selftest", __initdata)) {
+				 NMI_FLAG_FIRST, "nmi_selftest", NMI_SOURCE_VEC_IPI_TEST,
+				 __initdata)) {
 		nmi_fail = FAILURE;
 		return;
 	}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index f3130f762784..acc19c1d3b4f 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -910,7 +910,7 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
 	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
 	/* Would it be better to replace the trap vector here? */
 	if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback,
-				 NMI_FLAG_FIRST, "crash"))
+				 NMI_FLAG_FIRST, "crash", NMI_SOURCE_VEC_IPI_REBOOT))
 		return;		/* Return what? */
 	/*
 	 * Ensure the new callback function is set before sending
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 18266cc3d98c..f27469e40141 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -143,7 +143,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_reboot)
 static int register_stop_handler(void)
 {
 	return register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback,
-				    NMI_FLAG_FIRST, "smp_stop");
+				    NMI_FLAG_FIRST, "smp_stop", NMI_SOURCE_VEC_IPI_SMP_STOP);
 }
 
 static void native_stop_other_cpus(int wait)
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index 5c50e550ab63..473c34eb264c 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -1029,10 +1029,10 @@ static int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs)
 
 static void uv_register_nmi_notifier(void)
 {
-	if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv"))
+	if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv", 0))
 		pr_warn("UV: NMI handler failed to register\n");
 
-	if (register_nmi_handler(NMI_LOCAL, uv_handle_nmi_ping, 0, "uvping"))
+	if (register_nmi_handler(NMI_LOCAL, uv_handle_nmi_ping, 0, "uvping", 0))
 		pr_warn("UV: PING NMI handler failed to register\n");
 }
 
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 623cc0cb4a65..393dca95d2b3 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -1318,7 +1318,7 @@ static void ghes_nmi_add(struct ghes *ghes)
 {
 	mutex_lock(&ghes_list_mutex);
 	if (list_empty(&ghes_nmi))
-		register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes");
+		register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes", 0);
 	list_add_rcu(&ghes->list, &ghes_nmi);
 	mutex_unlock(&ghes_list_mutex);
 }
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 9a459257489f..61bb5dcade5a 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -1272,7 +1272,7 @@ static void check_parms(void)
 	}
 	if (do_nmi && !nmi_handler_registered) {
 		rv = register_nmi_handler(NMI_UNKNOWN, ipmi_nmi, 0,
-						"ipmi");
+						"ipmi", 0);
 		if (rv) {
 			pr_warn("Can't register nmi handler\n");
 			return;
diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c
index cdd8480e7368..e672b38f2c2b 100644
--- a/drivers/edac/igen6_edac.c
+++ b/drivers/edac/igen6_edac.c
@@ -1321,7 +1321,7 @@ static int register_err_handler(void)
 	}
 
 	rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler,
-				  0, IGEN6_NMI_NAME);
+				  0, IGEN6_NMI_NAME, 0);
 	if (rc) {
 		igen6_printk(KERN_ERR, "Failed to register NMI handler\n");
 		return rc;
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index ae30e394d176..5246706afcf6 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -242,13 +242,13 @@ static int hpwdt_init_nmi_decoding(struct pci_dev *dev)
 	/*
 	 * Only one function can register for NMI_UNKNOWN
 	 */
-	retval = register_nmi_handler(NMI_UNKNOWN, hpwdt_pretimeout, 0, "hpwdt");
+	retval = register_nmi_handler(NMI_UNKNOWN, hpwdt_pretimeout, 0, "hpwdt", 0);
 	if (retval)
 		goto error;
-	retval = register_nmi_handler(NMI_SERR, hpwdt_pretimeout, 0, "hpwdt");
+	retval = register_nmi_handler(NMI_SERR, hpwdt_pretimeout, 0, "hpwdt", 0);
 	if (retval)
 		goto error1;
-	retval = register_nmi_handler(NMI_IO_CHECK, hpwdt_pretimeout, 0, "hpwdt");
+	retval = register_nmi_handler(NMI_IO_CHECK, hpwdt_pretimeout, 0, "hpwdt", 0);
 	if (retval)
 		goto error2;
 
-- 
2.25.1
Re: [PATCH 2/6] x86/irq: Extend NMI handler registration interface to include source
Posted by H. Peter Anvin 1 year, 6 months ago
On 5/29/24 13:33, Jacob Pan wrote:
> diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
> index 13aea8fc3d45..b8388bc00cde 100644
> --- a/arch/x86/include/asm/irq_vectors.h
> +++ b/arch/x86/include/asm/irq_vectors.h
> @@ -105,6 +105,27 @@
>   
>   #define NR_VECTORS			 256
>   
> +/*
> + * The NMI senders specify the NMI source vector as an 8bit integer in their
> + * vector field with NMI delivery mode. A local APIC receiving an NMI will
> + * set the corresponding bit in a 16bit bitmask, which is accumulated until
> + * the NMI is delivered.
> + * When a sender didn't specify an NMI source vector the source vector will
> + * be 0, which will result in bit 0 of the bitmask being set. For out of
> + * bounds vectors >= 16 bit 0 will also be set.
> + * When bit 0 is set, system software must invoke all registered NMI handlers
> + * as if NMI source feature is not enabled.
> + */
> +#define NMI_SOURCE_VEC_UNKNOWN		0
> +#define NMI_SOURCE_VEC_PMI		1	/* PerfMon counters */
> +#define NMI_SOURCE_VEC_IPI_BT		2	/* CPU backtrace */
> +#define NMI_SOURCE_VEC_IPI_MCE		3	/* MCE injection */
> +#define NMI_SOURCE_VEC_IPI_KGDB		4
> +#define NMI_SOURCE_VEC_IPI_REBOOT	5	/* Crash reboot */
> +#define NMI_SOURCE_VEC_IPI_SMP_STOP	6	/* Panic stop CPU */
> +#define NMI_SOURCE_VEC_IPI_TEST		7	/* For remote and local IPIs*/
> +#define NR_NMI_SOURCE_VECTORS		8
> +

I would avoid using vector 2; it is at least remotely possible that some 
third-party chipset sends NMI messages with a hardcoded vector of 2. As 
long as you don't actively need that slot, it is better to avoid it.

Even better is to set the LINT1 (= external NMI) vector to 2 and treat 
bit 2 as "other"; use vector 2 for anything that you don't have an 
explicit vector for. You can treat a received bit 2 the same as bit 0, 
except that you can explicitly trust that any event assigned an explicit 
vector number is *NOT* triggered and so do not need to be polled.

I would also recommend sorting these in order of decreasing importance 
(other than 0 and 2). Although the current intent is there to be 16 
vectors indefinitely, defensive design would be to consider that number 
to be potentially variable (up to 64, obviously). Since any out-of-range 
vector will set bit 0, the code will still Just Work[TM].

	-hpa
Re: [PATCH 2/6] x86/irq: Extend NMI handler registration interface to include source
Posted by Jacob Pan 1 year, 6 months ago
Hi H.,

On Wed, 29 May 2024 13:59:51 -0700, "H. Peter Anvin" <hpa@zytor.com> wrote:

> On 5/29/24 13:33, Jacob Pan wrote:
> > diff --git a/arch/x86/include/asm/irq_vectors.h
> > b/arch/x86/include/asm/irq_vectors.h index 13aea8fc3d45..b8388bc00cde
> > 100644 --- a/arch/x86/include/asm/irq_vectors.h
> > +++ b/arch/x86/include/asm/irq_vectors.h
> > @@ -105,6 +105,27 @@
> >   
> >   #define NR_VECTORS			 256
> >   
> > +/*
> > + * The NMI senders specify the NMI source vector as an 8bit integer in
> > their
> > + * vector field with NMI delivery mode. A local APIC receiving an NMI
> > will
> > + * set the corresponding bit in a 16bit bitmask, which is accumulated
> > until
> > + * the NMI is delivered.
> > + * When a sender didn't specify an NMI source vector the source vector
> > will
> > + * be 0, which will result in bit 0 of the bitmask being set. For out
> > of
> > + * bounds vectors >= 16 bit 0 will also be set.
> > + * When bit 0 is set, system software must invoke all registered NMI
> > handlers
> > + * as if NMI source feature is not enabled.
> > + */
> > +#define NMI_SOURCE_VEC_UNKNOWN		0
> > +#define NMI_SOURCE_VEC_PMI		1	/* PerfMon counters
> > */ +#define NMI_SOURCE_VEC_IPI_BT		2	/* CPU
> > backtrace */ +#define NMI_SOURCE_VEC_IPI_MCE		3	/*
> > MCE injection */ +#define NMI_SOURCE_VEC_IPI_KGDB		4
> > +#define NMI_SOURCE_VEC_IPI_REBOOT	5	/* Crash reboot */
> > +#define NMI_SOURCE_VEC_IPI_SMP_STOP	6	/* Panic stop CPU
> > */ +#define NMI_SOURCE_VEC_IPI_TEST		7	/* For
> > remote and local IPIs*/ +#define NR_NMI_SOURCE_VECTORS		8
> > +  
> 
> I would avoid using vector 2; it is at least remotely possible that some 
> third-party chipset sends NMI messages with a hardcoded vector of 2. As 
> long as you don't actively need that slot, it is better to avoid it.
> 
I thought about that as well, then I dropped that when I limit NMIS to
local interrupts. Not aware the hardcoded case as you mentioned.

I will reserve vector #2 to avoid future headache.

> Even better is to set the LINT1 (= external NMI) vector to 2 and treat 
> bit 2 as "other"; use vector 2 for anything that you don't have an 
> explicit vector for. You can treat a received bit 2 the same as bit 0, 
> except that you can explicitly trust that any event assigned an explicit 
> vector number is *NOT* triggered and so do not need to be polled.
> 
Make sense, will set LINT1 vector to 2 and avoid polling known sources.
That is another optimization.

> I would also recommend sorting these in order of decreasing importance 
> (other than 0 and 2). Although the current intent is there to be 16 
> vectors indefinitely, defensive design would be to consider that number 
> to be potentially variable (up to 64, obviously). Since any out-of-range 
> vector will set bit 0, the code will still Just Work[TM].
Will sort them.

Thanks,

Jacob