[GIT pull] x86/apic for v6.12-rc1

Thomas Gleixner posted 1 patch 2 months, 1 week ago
arch/x86/include/asm/apic.h         |  48 +--
arch/x86/include/asm/irq_vectors.h  |   4 +-
arch/x86/kernel/apic/apic.c         |  81 ++--
arch/x86/kernel/apic/apic_flat_64.c | 119 +-----
arch/x86/kernel/apic/io_apic.c      | 749 +++++++++++++++---------------------
arch/x86/kernel/mpparse.c           |  13 +-
drivers/iommu/intel/irq_remapping.c |  11 +-
7 files changed, 378 insertions(+), 647 deletions(-)
[GIT pull] x86/apic for v6.12-rc1
Posted by Thomas Gleixner 2 months, 1 week ago
Linus,

please pull the latest x86/apic branch from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-apic-2024-09-17

up to:  a1fab3e69d9d: x86/irq: Fix comment on IRQ vector layout

Updates for the x86 APIC code:

  - Handle an allocation failure in the IO/APIC code gracefully instead of
    crashing the machine.

  - Remove support for APIC local destination mode on 64bit

    Logical destination mode of the local APIC is used for systems with up
    to 8 CPUs. It has an advantage over physical destination mode as it
    allows to target multiple CPUs at once with IPIs. That advantage was
    definitely worth it when systems with up to 8 CPUs were state of the
    art for servers and workstations, but that's history.

    In the recent past there were quite some reports of new laptops failing
    to boot with logical destination mode, but they work fine with physical
    destination mode. That's not a suprise because physical destination
    mode is guaranteed to work as it's the only way to get a CPU up and
    running via the INIT/INIT/STARTUP sequence. Some of the affected
    systems were cured by BIOS updates, but not all OEMs provide them.

    As the number of CPUs keep increasing, logical destination mode becomes
    less used and the benefit for small systems, like laptops, is not
    really worth the trouble. So just remove logical destination mode
    support for 64bit and be done with it.

  - Code and comment cleanups in the APIC area.

Thanks,

	tglx

------------------>
Sohil Mehta (1):
      x86/irq: Fix comment on IRQ vector layout

Thomas Gleixner (16):
      x86/ioapic: Handle allocation failures gracefully
      x86/ioapic: Mark mp_alloc_timer_irq() __init
      x86/ioapic: Cleanup structs
      x86/ioapic: Use guard() for locking where applicable
      x86/apic: Provide apic_printk() helpers
      x86/apic: Cleanup apic_printk()s
      x86/ioapic: Cleanup apic_printk()s
      x86/ioapic: Cleanup guarded debug printk()s
      x86/mpparse: Cleanup apic_printk()s
      iommu/vt-d: Cleanup apic_printk()
      x86/ioapic: Move replace_pin_at_irq_node() to the call site
      x86/ioapic: Cleanup comments
      x86/ioapic: Cleanup bracket usage
      x86/ioapic: Cleanup line breaks
      x86/ioapic: Cleanup remaining coding style issues
      x86/apic: Remove logical destination mode for 64-bit

Yue Haibing (2):
      x86/apic: Remove unused inline function apic_set_eoi_cb()
      x86/apic: Remove unused extern declarations


 arch/x86/include/asm/apic.h         |  48 +--
 arch/x86/include/asm/irq_vectors.h  |   4 +-
 arch/x86/kernel/apic/apic.c         |  81 ++--
 arch/x86/kernel/apic/apic_flat_64.c | 119 +-----
 arch/x86/kernel/apic/io_apic.c      | 749 +++++++++++++++---------------------
 arch/x86/kernel/mpparse.c           |  13 +-
 drivers/iommu/intel/irq_remapping.c |  11 +-
 7 files changed, 378 insertions(+), 647 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 9327eb00e96d..f21ff1932699 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -18,6 +18,11 @@
 
 #define ARCH_APICTIMER_STOPS_ON_C3	1
 
+/* Macros for apic_extnmi which controls external NMI masking */
+#define APIC_EXTNMI_BSP		0 /* Default */
+#define APIC_EXTNMI_ALL		1
+#define APIC_EXTNMI_NONE	2
+
 /*
  * Debugging macros
  */
@@ -25,22 +30,22 @@
 #define APIC_VERBOSE 1
 #define APIC_DEBUG   2
 
-/* Macros for apic_extnmi which controls external NMI masking */
-#define APIC_EXTNMI_BSP		0 /* Default */
-#define APIC_EXTNMI_ALL		1
-#define APIC_EXTNMI_NONE	2
-
 /*
- * Define the default level of output to be very little
- * This can be turned up by using apic=verbose for more
- * information and apic=debug for _lots_ of information.
- * apic_verbosity is defined in apic.c
+ * Define the default level of output to be very little This can be turned
+ * up by using apic=verbose for more information and apic=debug for _lots_
+ * of information.  apic_verbosity is defined in apic.c
  */
-#define apic_printk(v, s, a...) do {       \
-		if ((v) <= apic_verbosity) \
-			printk(s, ##a);    \
-	} while (0)
-
+#define apic_printk(v, s, a...)			\
+do {						\
+	if ((v) <= apic_verbosity)		\
+		printk(s, ##a);			\
+} while (0)
+
+#define apic_pr_verbose(s, a...)	apic_printk(APIC_VERBOSE, KERN_INFO s, ##a)
+#define apic_pr_debug(s, a...)		apic_printk(APIC_DEBUG, KERN_DEBUG s, ##a)
+#define apic_pr_debug_cont(s, a...)	apic_printk(APIC_DEBUG, KERN_CONT s, ##a)
+/* Unconditional debug prints for code which is guarded by apic_verbosity already */
+#define apic_dbg(s, a...)		printk(KERN_DEBUG s, ##a)
 
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
 extern void x86_32_probe_apic(void);
@@ -122,8 +127,6 @@ static inline bool apic_is_x2apic_enabled(void)
 
 extern void enable_IR_x2apic(void);
 
-extern int get_physical_broadcast(void);
-
 extern int lapic_get_maxlvt(void);
 extern void clear_local_APIC(void);
 extern void disconnect_bsp_APIC(int virt_wire_setup);
@@ -345,20 +348,12 @@ extern struct apic *apic;
  * APIC drivers are probed based on how they are listed in the .apicdrivers
  * section. So the order is important and enforced by the ordering
  * of different apic driver files in the Makefile.
- *
- * For the files having two apic drivers, we use apic_drivers()
- * to enforce the order with in them.
  */
 #define apic_driver(sym)					\
 	static const struct apic *__apicdrivers_##sym __used		\
 	__aligned(sizeof(struct apic *))			\
 	__section(".apicdrivers") = { &sym }
 
-#define apic_drivers(sym1, sym2)					\
-	static struct apic *__apicdrivers_##sym1##sym2[2] __used	\
-	__aligned(sizeof(struct apic *))				\
-	__section(".apicdrivers") = { &sym1, &sym2 }
-
 extern struct apic *__apicdrivers[], *__apicdrivers_end[];
 
 /*
@@ -484,7 +479,6 @@ static inline u64 apic_icr_read(void) { return 0; }
 static inline void apic_icr_write(u32 low, u32 high) { }
 static inline void apic_wait_icr_idle(void) { }
 static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
-static inline void apic_set_eoi_cb(void (*eoi)(void)) {}
 static inline void apic_native_eoi(void) { WARN_ON_ONCE(1); }
 static inline void apic_setup_apic_calls(void) { }
 
@@ -512,8 +506,6 @@ static inline bool is_vector_pending(unsigned int vector)
 #define TRAMPOLINE_PHYS_LOW		0x467
 #define TRAMPOLINE_PHYS_HIGH		0x469
 
-extern void generic_bigsmp_probe(void);
-
 #ifdef CONFIG_X86_LOCAL_APIC
 
 #include <asm/smp.h>
@@ -536,8 +528,6 @@ static inline int default_acpi_madt_oem_check(char *a, char *b) { return 0; }
 static inline void x86_64_probe_apic(void) { }
 #endif
 
-extern int default_apic_id_valid(u32 apicid);
-
 extern u32 apic_default_calc_apicid(unsigned int cpu);
 extern u32 apic_flat_calc_apicid(unsigned int cpu);
 
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 13aea8fc3d45..47051871b436 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -18,8 +18,8 @@
  *  Vectors   0 ...  31 : system traps and exceptions - hardcoded events
  *  Vectors  32 ... 127 : device interrupts
  *  Vector  128         : legacy int80 syscall interface
- *  Vectors 129 ... LOCAL_TIMER_VECTOR-1
- *  Vectors LOCAL_TIMER_VECTOR ... 255 : special interrupts
+ *  Vectors 129 ... FIRST_SYSTEM_VECTOR-1 : device interrupts
+ *  Vectors FIRST_SYSTEM_VECTOR ... 255   : special interrupts
  *
  * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
  *
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 66fd4b2a37a3..1838a73b0950 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -677,7 +677,7 @@ calibrate_by_pmtimer(u32 deltapm, long *delta, long *deltatsc)
 	return -1;
 #endif
 
-	apic_printk(APIC_VERBOSE, "... PM-Timer delta = %u\n", deltapm);
+	apic_pr_verbose("... PM-Timer delta = %u\n", deltapm);
 
 	/* Check, if the PM timer is available */
 	if (!deltapm)
@@ -687,14 +687,14 @@ calibrate_by_pmtimer(u32 deltapm, long *delta, long *deltatsc)
 
 	if (deltapm > (pm_100ms - pm_thresh) &&
 	    deltapm < (pm_100ms + pm_thresh)) {
-		apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n");
+		apic_pr_verbose("... PM-Timer result ok\n");
 		return 0;
 	}
 
 	res = (((u64)deltapm) *  mult) >> 22;
 	do_div(res, 1000000);
-	pr_warn("APIC calibration not consistent "
-		"with PM-Timer: %ldms instead of 100ms\n", (long)res);
+	pr_warn("APIC calibration not consistent with PM-Timer: %ldms instead of 100ms\n",
+		(long)res);
 
 	/* Correct the lapic counter value */
 	res = (((u64)(*delta)) * pm_100ms);
@@ -707,9 +707,8 @@ calibrate_by_pmtimer(u32 deltapm, long *delta, long *deltatsc)
 	if (boot_cpu_has(X86_FEATURE_TSC)) {
 		res = (((u64)(*deltatsc)) * pm_100ms);
 		do_div(res, deltapm);
-		apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
-					  "PM-Timer: %lu (%ld)\n",
-					(unsigned long)res, *deltatsc);
+		apic_pr_verbose("TSC delta adjusted to PM-Timer: %lu (%ld)\n",
+				(unsigned long)res, *deltatsc);
 		*deltatsc = (long)res;
 	}
 
@@ -792,8 +791,7 @@ static int __init calibrate_APIC_clock(void)
 	 * in the clockevent structure and return.
 	 */
 	if (!lapic_init_clockevent()) {
-		apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
-			    lapic_timer_period);
+		apic_pr_verbose("lapic timer already calibrated %d\n", lapic_timer_period);
 		/*
 		 * Direct calibration methods must have an always running
 		 * local APIC timer, no need for broadcast timer.
@@ -802,8 +800,7 @@ static int __init calibrate_APIC_clock(void)
 		return 0;
 	}
 
-	apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
-		    "calibrating APIC timer ...\n");
+	apic_pr_verbose("Using local APIC timer interrupts. Calibrating APIC timer ...\n");
 
 	/*
 	 * There are platforms w/o global clockevent devices. Instead of
@@ -866,7 +863,7 @@ static int __init calibrate_APIC_clock(void)
 
 	/* Build delta t1-t2 as apic timer counts down */
 	delta = lapic_cal_t1 - lapic_cal_t2;
-	apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
+	apic_pr_verbose("... lapic delta = %ld\n", delta);
 
 	deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
 
@@ -877,22 +874,19 @@ static int __init calibrate_APIC_clock(void)
 	lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
 	lapic_init_clockevent();
 
-	apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
-	apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
-	apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
-		    lapic_timer_period);
+	apic_pr_verbose("..... delta %ld\n", delta);
+	apic_pr_verbose("..... mult: %u\n", lapic_clockevent.mult);
+	apic_pr_verbose("..... calibration result: %u\n", lapic_timer_period);
 
 	if (boot_cpu_has(X86_FEATURE_TSC)) {
-		apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
-			    "%ld.%04ld MHz.\n",
-			    (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
-			    (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
+		apic_pr_verbose("..... CPU clock speed is %ld.%04ld MHz.\n",
+				(deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
+				(deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
 	}
 
-	apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
-		    "%u.%04u MHz.\n",
-		    lapic_timer_period / (1000000 / HZ),
-		    lapic_timer_period % (1000000 / HZ));
+	apic_pr_verbose("..... host bus clock speed is %u.%04u MHz.\n",
+			lapic_timer_period / (1000000 / HZ),
+			lapic_timer_period % (1000000 / HZ));
 
 	/*
 	 * Do a sanity check on the APIC calibration result
@@ -911,7 +905,7 @@ static int __init calibrate_APIC_clock(void)
 	 * available.
 	 */
 	if (!pm_referenced && global_clock_event) {
-		apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
+		apic_pr_verbose("... verify APIC timer\n");
 
 		/*
 		 * Setup the apic timer manually
@@ -932,11 +926,11 @@ static int __init calibrate_APIC_clock(void)
 
 		/* Jiffies delta */
 		deltaj = lapic_cal_j2 - lapic_cal_j1;
-		apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
+		apic_pr_verbose("... jiffies delta = %lu\n", deltaj);
 
 		/* Check, if the jiffies result is consistent */
 		if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
-			apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
+			apic_pr_verbose("... jiffies result ok\n");
 		else
 			levt->features |= CLOCK_EVT_FEAT_DUMMY;
 	}
@@ -1221,9 +1215,8 @@ void __init sync_Arb_IDs(void)
 	 */
 	apic_wait_icr_idle();
 
-	apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
-	apic_write(APIC_ICR, APIC_DEST_ALLINC |
-			APIC_INT_LEVELTRIG | APIC_DM_INIT);
+	apic_pr_debug("Synchronizing Arb IDs.\n");
+	apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT);
 }
 
 enum apic_intr_mode_id apic_intr_mode __ro_after_init;
@@ -1409,10 +1402,10 @@ static void lapic_setup_esr(void)
 	if (maxlvt > 3)
 		apic_write(APIC_ESR, 0);
 	value = apic_read(APIC_ESR);
-	if (value != oldvalue)
-		apic_printk(APIC_VERBOSE, "ESR value before enabling "
-			"vector: 0x%08x  after: 0x%08x\n",
-			oldvalue, value);
+	if (value != oldvalue) {
+		apic_pr_verbose("ESR value before enabling vector: 0x%08x  after: 0x%08x\n",
+				oldvalue, value);
+	}
 }
 
 #define APIC_IR_REGS		APIC_ISR_NR
@@ -1599,10 +1592,10 @@ static void setup_local_APIC(void)
 	value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
 	if (!cpu && (pic_mode || !value || ioapic_is_disabled)) {
 		value = APIC_DM_EXTINT;
-		apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
+		apic_pr_verbose("Enabled ExtINT on CPU#%d\n", cpu);
 	} else {
 		value = APIC_DM_EXTINT | APIC_LVT_MASKED;
-		apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu);
+		apic_pr_verbose("Masked ExtINT on CPU#%d\n", cpu);
 	}
 	apic_write(APIC_LVT0, value);
 
@@ -2066,8 +2059,7 @@ static __init void apic_set_fixmap(bool read_apic)
 {
 	set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
 	apic_mmio_base = APIC_BASE;
-	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
-		    apic_mmio_base, mp_lapic_addr);
+	apic_pr_verbose("Mapped APIC to %16lx (%16lx)\n", apic_mmio_base, mp_lapic_addr);
 	if (read_apic)
 		apic_read_boot_cpu_id(false);
 }
@@ -2170,18 +2162,17 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_error_interrupt)
 	apic_eoi();
 	atomic_inc(&irq_err_count);
 
-	apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x",
-		    smp_processor_id(), v);
+	apic_pr_debug("APIC error on CPU%d: %02x", smp_processor_id(), v);
 
 	v &= 0xff;
 	while (v) {
 		if (v & 0x1)
-			apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
+			apic_pr_debug_cont(" : %s", error_interrupt_reason[i]);
 		i++;
 		v >>= 1;
 	}
 
-	apic_printk(APIC_DEBUG, KERN_CONT "\n");
+	apic_pr_debug_cont("\n");
 
 	trace_error_apic_exit(ERROR_APIC_VECTOR);
 }
@@ -2201,8 +2192,7 @@ static void __init connect_bsp_APIC(void)
 		 * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
 		 * local APIC to INT and NMI lines.
 		 */
-		apic_printk(APIC_VERBOSE, "leaving PIC mode, "
-				"enabling APIC mode.\n");
+		apic_pr_verbose("Leaving PIC mode, enabling APIC mode.\n");
 		imcr_pic_to_apic();
 	}
 #endif
@@ -2227,8 +2217,7 @@ void disconnect_bsp_APIC(int virt_wire_setup)
 		 * IPIs, won't work beyond this point!  The only exception are
 		 * INIT IPIs.
 		 */
-		apic_printk(APIC_VERBOSE, "disabling APIC mode, "
-				"entering PIC mode.\n");
+		apic_pr_verbose("Disabling APIC mode, entering PIC mode.\n");
 		imcr_apic_to_pic();
 		return;
 	}
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index f37ad3392fec..e0308d8c4e6c 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -8,129 +8,25 @@
  * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
  * James Cleverdon.
  */
-#include <linux/cpumask.h>
 #include <linux/export.h>
-#include <linux/acpi.h>
 
-#include <asm/jailhouse_para.h>
 #include <asm/apic.h>
 
 #include "local.h"
 
-static struct apic apic_physflat;
-static struct apic apic_flat;
-
-struct apic *apic __ro_after_init = &apic_flat;
-EXPORT_SYMBOL_GPL(apic);
-
-static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-	return 1;
-}
-
-static void _flat_send_IPI_mask(unsigned long mask, int vector)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__default_send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL);
-	local_irq_restore(flags);
-}
-
-static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
-{
-	unsigned long mask = cpumask_bits(cpumask)[0];
-
-	_flat_send_IPI_mask(mask, vector);
-}
-
-static void
-flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
-{
-	unsigned long mask = cpumask_bits(cpumask)[0];
-	int cpu = smp_processor_id();
-
-	if (cpu < BITS_PER_LONG)
-		__clear_bit(cpu, &mask);
-
-	_flat_send_IPI_mask(mask, vector);
-}
-
-static u32 flat_get_apic_id(u32 x)
+static u32 physflat_get_apic_id(u32 x)
 {
 	return (x >> 24) & 0xFF;
 }
 
-static int flat_probe(void)
+static int physflat_probe(void)
 {
 	return 1;
 }
 
-static struct apic apic_flat __ro_after_init = {
-	.name				= "flat",
-	.probe				= flat_probe,
-	.acpi_madt_oem_check		= flat_acpi_madt_oem_check,
-
-	.dest_mode_logical		= true,
-
-	.disable_esr			= 0,
-
-	.init_apic_ldr			= default_init_apic_ldr,
-	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
-
-	.max_apic_id			= 0xFE,
-	.get_apic_id			= flat_get_apic_id,
-
-	.calc_dest_apicid		= apic_flat_calc_apicid,
-
-	.send_IPI			= default_send_IPI_single,
-	.send_IPI_mask			= flat_send_IPI_mask,
-	.send_IPI_mask_allbutself	= flat_send_IPI_mask_allbutself,
-	.send_IPI_allbutself		= default_send_IPI_allbutself,
-	.send_IPI_all			= default_send_IPI_all,
-	.send_IPI_self			= default_send_IPI_self,
-	.nmi_to_offline_cpu		= true,
-
-	.read				= native_apic_mem_read,
-	.write				= native_apic_mem_write,
-	.eoi				= native_apic_mem_eoi,
-	.icr_read			= native_apic_icr_read,
-	.icr_write			= native_apic_icr_write,
-	.wait_icr_idle			= apic_mem_wait_icr_idle,
-	.safe_wait_icr_idle		= apic_mem_wait_icr_idle_timeout,
-};
-
-/*
- * Physflat mode is used when there are more than 8 CPUs on a system.
- * We cannot use logical delivery in this case because the mask
- * overflows, so use physical mode.
- */
 static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
-#ifdef CONFIG_ACPI
-	/*
-	 * Quirk: some x86_64 machines can only use physical APIC mode
-	 * regardless of how many processors are present (x86_64 ES7000
-	 * is an example).
-	 */
-	if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
-		(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
-		printk(KERN_DEBUG "system APIC only can use physical flat");
-		return 1;
-	}
-
-	if (!strncmp(oem_id, "IBM", 3) && !strncmp(oem_table_id, "EXA", 3)) {
-		printk(KERN_DEBUG "IBM Summit detected, will use apic physical");
-		return 1;
-	}
-#endif
-
-	return 0;
-}
-
-static int physflat_probe(void)
-{
-	return apic == &apic_physflat || num_possible_cpus() > 8 || jailhouse_paravirt();
+	return 1;
 }
 
 static struct apic apic_physflat __ro_after_init = {
@@ -146,7 +42,7 @@ static struct apic apic_physflat __ro_after_init = {
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 
 	.max_apic_id			= 0xFE,
-	.get_apic_id			= flat_get_apic_id,
+	.get_apic_id			= physflat_get_apic_id,
 
 	.calc_dest_apicid		= apic_default_calc_apicid,
 
@@ -166,8 +62,7 @@ static struct apic apic_physflat __ro_after_init = {
 	.wait_icr_idle			= apic_mem_wait_icr_idle,
 	.safe_wait_icr_idle		= apic_mem_wait_icr_idle_timeout,
 };
+apic_driver(apic_physflat);
 
-/*
- * We need to check for physflat first, so this order is important.
- */
-apic_drivers(apic_physflat, apic_flat);
+struct apic *apic __ro_after_init = &apic_physflat;
+EXPORT_SYMBOL_GPL(apic);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 477b740b2f26..1029ea4ac8ba 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -86,8 +86,8 @@ static unsigned int ioapic_dynirq_base;
 static int ioapic_initialized;
 
 struct irq_pin_list {
-	struct list_head list;
-	int apic, pin;
+	struct list_head	list;
+	int			apic, pin;
 };
 
 struct mp_chip_data {
@@ -96,7 +96,7 @@ struct mp_chip_data {
 	bool				is_level;
 	bool				active_low;
 	bool				isa_irq;
-	u32 count;
+	u32				count;
 };
 
 struct mp_ioapic_gsi {
@@ -105,21 +105,17 @@ struct mp_ioapic_gsi {
 };
 
 static struct ioapic {
-	/*
-	 * # of IRQ routing registers
-	 */
-	int nr_registers;
-	/*
-	 * Saved state during suspend/resume, or while enabling intr-remap.
-	 */
-	struct IO_APIC_route_entry *saved_registers;
+	/* # of IRQ routing registers */
+	int				nr_registers;
+	/* Saved state during suspend/resume, or while enabling intr-remap. */
+	struct IO_APIC_route_entry	*saved_registers;
 	/* I/O APIC config */
-	struct mpc_ioapic mp_config;
+	struct mpc_ioapic		mp_config;
 	/* IO APIC gsi routing info */
-	struct mp_ioapic_gsi  gsi_config;
-	struct ioapic_domain_cfg irqdomain_cfg;
-	struct irq_domain *irqdomain;
-	struct resource *iomem_res;
+	struct mp_ioapic_gsi		gsi_config;
+	struct ioapic_domain_cfg	irqdomain_cfg;
+	struct irq_domain		*irqdomain;
+	struct resource			*iomem_res;
 } ioapics[MAX_IO_APICS];
 
 #define mpc_ioapic_ver(ioapic_idx)	ioapics[ioapic_idx].mp_config.apicver
@@ -205,10 +201,9 @@ void mp_save_irq(struct mpc_intsrc *m)
 {
 	int i;
 
-	apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
-		" IRQ %02x, APIC ID %x, APIC INT %02x\n",
-		m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
-		m->srcbusirq, m->dstapic, m->dstirq);
+	apic_pr_verbose("Int: type %d, pol %d, trig %d, bus %02x, IRQ %02x, APIC ID %x, APIC INT %02x\n",
+			m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
+			m->srcbusirq, m->dstapic, m->dstirq);
 
 	for (i = 0; i < mp_irq_entries; i++) {
 		if (!memcmp(&mp_irqs[i], m, sizeof(*m)))
@@ -269,12 +264,14 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
 static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
 {
 	struct io_apic __iomem *io_apic = io_apic_base(apic);
+
 	writel(vector, &io_apic->eoi);
 }
 
 unsigned int native_io_apic_read(unsigned int apic, unsigned int reg)
 {
 	struct io_apic __iomem *io_apic = io_apic_base(apic);
+
 	writel(reg, &io_apic->index);
 	return readl(&io_apic->data);
 }
@@ -300,14 +297,8 @@ static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin)
 
 static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
 {
-	struct IO_APIC_route_entry entry;
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	entry = __ioapic_read_entry(apic, pin);
-	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	return entry;
+	guard(raw_spinlock_irqsave)(&ioapic_lock);
+	return __ioapic_read_entry(apic, pin);
 }
 
 /*
@@ -324,11 +315,8 @@ static void __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e
 
 static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 {
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	guard(raw_spinlock_irqsave)(&ioapic_lock);
 	__ioapic_write_entry(apic, pin, e);
-	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
 /*
@@ -339,12 +327,10 @@ static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 static void ioapic_mask_entry(int apic, int pin)
 {
 	struct IO_APIC_route_entry e = { .masked = true };
-	unsigned long flags;
 
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	guard(raw_spinlock_irqsave)(&ioapic_lock);
 	io_apic_write(apic, 0x10 + 2*pin, e.w1);
 	io_apic_write(apic, 0x11 + 2*pin, e.w2);
-	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
 /*
@@ -352,68 +338,39 @@ static void ioapic_mask_entry(int apic, int pin)
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-static int __add_pin_to_irq_node(struct mp_chip_data *data,
-				 int node, int apic, int pin)
+static bool add_pin_to_irq_node(struct mp_chip_data *data, int node, int apic, int pin)
 {
 	struct irq_pin_list *entry;
 
-	/* don't allow duplicates */
-	for_each_irq_pin(entry, data->irq_2_pin)
+	/* Don't allow duplicates */
+	for_each_irq_pin(entry, data->irq_2_pin) {
 		if (entry->apic == apic && entry->pin == pin)
-			return 0;
+			return true;
+	}
 
 	entry = kzalloc_node(sizeof(struct irq_pin_list), GFP_ATOMIC, node);
 	if (!entry) {
-		pr_err("can not alloc irq_pin_list (%d,%d,%d)\n",
-		       node, apic, pin);
-		return -ENOMEM;
+		pr_err("Cannot allocate irq_pin_list (%d,%d,%d)\n", node, apic, pin);
+		return false;
 	}
+
 	entry->apic = apic;
 	entry->pin = pin;
 	list_add_tail(&entry->list, &data->irq_2_pin);
-
-	return 0;
+	return true;
 }
 
 static void __remove_pin_from_irq(struct mp_chip_data *data, int apic, int pin)
 {
 	struct irq_pin_list *tmp, *entry;
 
-	list_for_each_entry_safe(entry, tmp, &data->irq_2_pin, list)
+	list_for_each_entry_safe(entry, tmp, &data->irq_2_pin, list) {
 		if (entry->apic == apic && entry->pin == pin) {
 			list_del(&entry->list);
 			kfree(entry);
 			return;
 		}
-}
-
-static void add_pin_to_irq_node(struct mp_chip_data *data,
-				int node, int apic, int pin)
-{
-	if (__add_pin_to_irq_node(data, node, apic, pin))
-		panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
-}
-
-/*
- * Reroute an IRQ to a different pin.
- */
-static void __init replace_pin_at_irq_node(struct mp_chip_data *data, int node,
-					   int oldapic, int oldpin,
-					   int newapic, int newpin)
-{
-	struct irq_pin_list *entry;
-
-	for_each_irq_pin(entry, data->irq_2_pin) {
-		if (entry->apic == oldapic && entry->pin == oldpin) {
-			entry->apic = newapic;
-			entry->pin = newpin;
-			/* every one is different, right? */
-			return;
-		}
 	}
-
-	/* old apic/pin didn't exist, so just add new ones */
-	add_pin_to_irq_node(data, node, newapic, newpin);
 }
 
 static void io_apic_modify_irq(struct mp_chip_data *data, bool masked,
@@ -430,12 +387,12 @@ static void io_apic_modify_irq(struct mp_chip_data *data, bool masked,
 	}
 }
 
+/*
+ * Synchronize the IO-APIC and the CPU by doing a dummy read from the
+ * IO-APIC
+ */
 static void io_apic_sync(struct irq_pin_list *entry)
 {
-	/*
-	 * Synchronize the IO-APIC and the CPU by doing
-	 * a dummy read from the IO-APIC
-	 */
 	struct io_apic __iomem *io_apic;
 
 	io_apic = io_apic_base(entry->apic);
@@ -445,11 +402,9 @@ static void io_apic_sync(struct irq_pin_list *entry)
 static void mask_ioapic_irq(struct irq_data *irq_data)
 {
 	struct mp_chip_data *data = irq_data->chip_data;
-	unsigned long flags;
 
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	guard(raw_spinlock_irqsave)(&ioapic_lock);
 	io_apic_modify_irq(data, true, &io_apic_sync);
-	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
 static void __unmask_ioapic(struct mp_chip_data *data)
@@ -460,11 +415,9 @@ static void __unmask_ioapic(struct mp_chip_data *data)
 static void unmask_ioapic_irq(struct irq_data *irq_data)
 {
 	struct mp_chip_data *data = irq_data->chip_data;
-	unsigned long flags;
 
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	guard(raw_spinlock_irqsave)(&ioapic_lock);
 	__unmask_ioapic(data);
-	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
 /*
@@ -492,30 +445,24 @@ static void __eoi_ioapic_pin(int apic, int pin, int vector)
 
 		entry = entry1 = __ioapic_read_entry(apic, pin);
 
-		/*
-		 * Mask the entry and change the trigger mode to edge.
-		 */
+		/* Mask the entry and change the trigger mode to edge. */
 		entry1.masked = true;
 		entry1.is_level = false;
 
 		__ioapic_write_entry(apic, pin, entry1);
 
-		/*
-		 * Restore the previous level triggered entry.
-		 */
+		/* Restore the previous level triggered entry. */
 		__ioapic_write_entry(apic, pin, entry);
 	}
 }
 
 static void eoi_ioapic_pin(int vector, struct mp_chip_data *data)
 {
-	unsigned long flags;
 	struct irq_pin_list *entry;
 
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	guard(raw_spinlock_irqsave)(&ioapic_lock);
 	for_each_irq_pin(entry, data->irq_2_pin)
 		__eoi_ioapic_pin(entry->apic, entry->pin, vector);
-	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
@@ -538,8 +485,6 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
 	}
 
 	if (entry.irr) {
-		unsigned long flags;
-
 		/*
 		 * Make sure the trigger mode is set to level. Explicit EOI
 		 * doesn't clear the remote-IRR if the trigger mode is not
@@ -549,9 +494,8 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
 			entry.is_level = true;
 			ioapic_write_entry(apic, pin, entry);
 		}
-		raw_spin_lock_irqsave(&ioapic_lock, flags);
+		guard(raw_spinlock_irqsave)(&ioapic_lock);
 		__eoi_ioapic_pin(apic, pin, entry.vector);
-		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 	}
 
 	/*
@@ -586,28 +530,23 @@ static int pirq_entries[MAX_PIRQS] = {
 
 static int __init ioapic_pirq_setup(char *str)
 {
-	int i, max;
-	int ints[MAX_PIRQS+1];
+	int i, max, ints[MAX_PIRQS+1];
 
 	get_options(str, ARRAY_SIZE(ints), ints);
 
-	apic_printk(APIC_VERBOSE, KERN_INFO
-			"PIRQ redirection, working around broken MP-BIOS.\n");
+	apic_pr_verbose("PIRQ redirection, working around broken MP-BIOS.\n");
+
 	max = MAX_PIRQS;
 	if (ints[0] < MAX_PIRQS)
 		max = ints[0];
 
 	for (i = 0; i < max; i++) {
-		apic_printk(APIC_VERBOSE, KERN_DEBUG
-				"... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
-		/*
-		 * PIRQs are mapped upside down, usually.
-		 */
+		apic_pr_verbose("... PIRQ%d -> IRQ %d\n", i, ints[i + 1]);
+		/* PIRQs are mapped upside down, usually */
 		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
 	}
 	return 1;
 }
-
 __setup("pirq=", ioapic_pirq_setup);
 #endif /* CONFIG_X86_32 */
 
@@ -626,8 +565,7 @@ int save_ioapic_entries(void)
 		}
 
 		for_each_pin(apic, pin)
-			ioapics[apic].saved_registers[pin] =
-				ioapic_read_entry(apic, pin);
+			ioapics[apic].saved_registers[pin] = ioapic_read_entry(apic, pin);
 	}
 
 	return err;
@@ -668,8 +606,7 @@ int restore_ioapic_entries(void)
 			continue;
 
 		for_each_pin(apic, pin)
-			ioapic_write_entry(apic, pin,
-					   ioapics[apic].saved_registers[pin]);
+			ioapic_write_entry(apic, pin, ioapics[apic].saved_registers[pin]);
 	}
 	return 0;
 }
@@ -681,12 +618,13 @@ static int find_irq_entry(int ioapic_idx, int pin, int type)
 {
 	int i;
 
-	for (i = 0; i < mp_irq_entries; i++)
+	for (i = 0; i < mp_irq_entries; i++) {
 		if (mp_irqs[i].irqtype == type &&
 		    (mp_irqs[i].dstapic == mpc_ioapic_id(ioapic_idx) ||
 		     mp_irqs[i].dstapic == MP_APIC_ALL) &&
 		    mp_irqs[i].dstirq == pin)
 			return i;
+	}
 
 	return -1;
 }
@@ -701,10 +639,8 @@ static int __init find_isa_irq_pin(int irq, int type)
 	for (i = 0; i < mp_irq_entries; i++) {
 		int lbus = mp_irqs[i].srcbus;
 
-		if (test_bit(lbus, mp_bus_not_pci) &&
-		    (mp_irqs[i].irqtype == type) &&
+		if (test_bit(lbus, mp_bus_not_pci) && (mp_irqs[i].irqtype == type) &&
 		    (mp_irqs[i].srcbusirq == irq))
-
 			return mp_irqs[i].dstirq;
 	}
 	return -1;
@@ -717,8 +653,7 @@ static int __init find_isa_irq_apic(int irq, int type)
 	for (i = 0; i < mp_irq_entries; i++) {
 		int lbus = mp_irqs[i].srcbus;
 
-		if (test_bit(lbus, mp_bus_not_pci) &&
-		    (mp_irqs[i].irqtype == type) &&
+		if (test_bit(lbus, mp_bus_not_pci) && (mp_irqs[i].irqtype == type) &&
 		    (mp_irqs[i].srcbusirq == irq))
 			break;
 	}
@@ -726,9 +661,10 @@ static int __init find_isa_irq_apic(int irq, int type)
 	if (i < mp_irq_entries) {
 		int ioapic_idx;
 
-		for_each_ioapic(ioapic_idx)
+		for_each_ioapic(ioapic_idx) {
 			if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic)
 				return ioapic_idx;
+		}
 	}
 
 	return -1;
@@ -769,8 +705,7 @@ static bool EISA_ELCR(unsigned int irq)
 		unsigned int port = PIC_ELCR1 + (irq >> 3);
 		return (inb(port) >> (irq & 7)) & 1;
 	}
-	apic_printk(APIC_VERBOSE, KERN_INFO
-			"Broken MPtable reports ISA irq %d\n", irq);
+	apic_pr_verbose("Broken MPtable reports ISA irq %d\n", irq);
 	return false;
 }
 
@@ -947,9 +882,9 @@ static bool mp_check_pin_attr(int irq, struct irq_alloc_info *info)
 static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi,
 				 struct irq_alloc_info *info)
 {
+	int type = ioapics[ioapic].irqdomain_cfg.type;
 	bool legacy = false;
 	int irq = -1;
-	int type = ioapics[ioapic].irqdomain_cfg.type;
 
 	switch (type) {
 	case IOAPIC_DOMAIN_LEGACY:
@@ -971,8 +906,7 @@ static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi,
 		return -1;
 	}
 
-	return __irq_domain_alloc_irqs(domain, irq, 1,
-				       ioapic_alloc_attr_node(info),
+	return __irq_domain_alloc_irqs(domain, irq, 1, ioapic_alloc_attr_node(info),
 				       info, legacy, NULL);
 }
 
@@ -986,13 +920,12 @@ static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi,
  * PIRQs instead of reprogramming the interrupt routing logic. Thus there may be
  * multiple pins sharing the same legacy IRQ number when ACPI is disabled.
  */
-static int alloc_isa_irq_from_domain(struct irq_domain *domain,
-				     int irq, int ioapic, int pin,
+static int alloc_isa_irq_from_domain(struct irq_domain *domain, int irq, int ioapic, int pin,
 				     struct irq_alloc_info *info)
 {
-	struct mp_chip_data *data;
 	struct irq_data *irq_data = irq_get_irq_data(irq);
 	int node = ioapic_alloc_attr_node(info);
+	struct mp_chip_data *data;
 
 	/*
 	 * Legacy ISA IRQ has already been allocated, just add pin to
@@ -1002,13 +935,11 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain,
 	if (irq_data && irq_data->parent_data) {
 		if (!mp_check_pin_attr(irq, info))
 			return -EBUSY;
-		if (__add_pin_to_irq_node(irq_data->chip_data, node, ioapic,
-					  info->ioapic.pin))
+		if (!add_pin_to_irq_node(irq_data->chip_data, node, ioapic, info->ioapic.pin))
 			return -ENOMEM;
 	} else {
 		info->flags |= X86_IRQ_ALLOC_LEGACY;
-		irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true,
-					      NULL);
+		irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true, NULL);
 		if (irq >= 0) {
 			irq_data = irq_domain_get_irq_data(domain, irq);
 			data = irq_data->chip_data;
@@ -1022,11 +953,11 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain,
 static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin,
 			     unsigned int flags, struct irq_alloc_info *info)
 {
-	int irq;
-	bool legacy = false;
+	struct irq_domain *domain = mp_ioapic_irqdomain(ioapic);
 	struct irq_alloc_info tmp;
 	struct mp_chip_data *data;
-	struct irq_domain *domain = mp_ioapic_irqdomain(ioapic);
+	bool legacy = false;
+	int irq;
 
 	if (!domain)
 		return -ENOSYS;
@@ -1046,7 +977,7 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin,
 			return -EINVAL;
 	}
 
-	mutex_lock(&ioapic_mutex);
+	guard(mutex)(&ioapic_mutex);
 	if (!(flags & IOAPIC_MAP_ALLOC)) {
 		if (!legacy) {
 			irq = irq_find_mapping(domain, pin);
@@ -1067,8 +998,6 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin,
 			data->count++;
 		}
 	}
-	mutex_unlock(&ioapic_mutex);
-
 	return irq;
 }
 
@@ -1076,26 +1005,20 @@ static int pin_2_irq(int idx, int ioapic, int pin, unsigned int flags)
 {
 	u32 gsi = mp_pin_to_gsi(ioapic, pin);
 
-	/*
-	 * Debugging check, we are in big trouble if this message pops up!
-	 */
+	/* Debugging check, we are in big trouble if this message pops up! */
 	if (mp_irqs[idx].dstirq != pin)
 		pr_err("broken BIOS or MPTABLE parser, ayiee!!\n");
 
 #ifdef CONFIG_X86_32
-	/*
-	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
-	 */
+	/* PCI IRQ command line redirection. Yes, limits are hardcoded. */
 	if ((pin >= 16) && (pin <= 23)) {
-		if (pirq_entries[pin-16] != -1) {
-			if (!pirq_entries[pin-16]) {
-				apic_printk(APIC_VERBOSE, KERN_DEBUG
-						"disabling PIRQ%d\n", pin-16);
+		if (pirq_entries[pin - 16] != -1) {
+			if (!pirq_entries[pin - 16]) {
+				apic_pr_verbose("Disabling PIRQ%d\n", pin - 16);
 			} else {
 				int irq = pirq_entries[pin-16];
-				apic_printk(APIC_VERBOSE, KERN_DEBUG
-						"using PIRQ%d -> IRQ %d\n",
-						pin-16, irq);
+
+				apic_pr_verbose("Using PIRQ%d -> IRQ %d\n", pin - 16, irq);
 				return irq;
 			}
 		}
@@ -1133,10 +1056,9 @@ void mp_unmap_irq(int irq)
 	if (!data || data->isa_irq)
 		return;
 
-	mutex_lock(&ioapic_mutex);
+	guard(mutex)(&ioapic_mutex);
 	if (--data->count == 0)
 		irq_domain_free_irqs(irq, 1);
-	mutex_unlock(&ioapic_mutex);
 }
 
 /*
@@ -1147,12 +1069,10 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 {
 	int irq, i, best_ioapic = -1, best_idx = -1;
 
-	apic_printk(APIC_DEBUG,
-		    "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
-		    bus, slot, pin);
+	apic_pr_debug("Querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+		      bus, slot, pin);
 	if (test_bit(bus, mp_bus_not_pci)) {
-		apic_printk(APIC_VERBOSE,
-			    "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+		apic_pr_verbose("PCI BIOS passed nonexistent PCI bus %d!\n", bus);
 		return -1;
 	}
 
@@ -1197,8 +1117,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 		return -1;
 
 out:
-	return pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq,
-			 IOAPIC_MAP_ALLOC);
+	return pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq, IOAPIC_MAP_ALLOC);
 }
 EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
 
@@ -1209,17 +1128,16 @@ static void __init setup_IO_APIC_irqs(void)
 	unsigned int ioapic, pin;
 	int idx;
 
-	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+	apic_pr_verbose("Init IO_APIC IRQs\n");
 
 	for_each_ioapic_pin(ioapic, pin) {
 		idx = find_irq_entry(ioapic, pin, mp_INT);
-		if (idx < 0)
-			apic_printk(APIC_VERBOSE,
-				    KERN_DEBUG " apic %d pin %d not connected\n",
-				    mpc_ioapic_id(ioapic), pin);
-		else
-			pin_2_irq(idx, ioapic, pin,
-				  ioapic ? 0 : IOAPIC_MAP_ALLOC);
+		if (idx < 0) {
+			apic_pr_verbose("apic %d pin %d not connected\n",
+					mpc_ioapic_id(ioapic), pin);
+		} else {
+			pin_2_irq(idx, ioapic, pin, ioapic ? 0 : IOAPIC_MAP_ALLOC);
+		}
 	}
 }
 
@@ -1234,26 +1152,21 @@ static void io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
 	char buf[256];
 	int i;
 
-	printk(KERN_DEBUG "IOAPIC %d:\n", apic);
+	apic_dbg("IOAPIC %d:\n", apic);
 	for (i = 0; i <= nr_entries; i++) {
 		entry = ioapic_read_entry(apic, i);
-		snprintf(buf, sizeof(buf),
-			 " pin%02x, %s, %s, %s, V(%02X), IRR(%1d), S(%1d)",
-			 i,
-			 entry.masked ? "disabled" : "enabled ",
+		snprintf(buf, sizeof(buf), " pin%02x, %s, %s, %s, V(%02X), IRR(%1d), S(%1d)",
+			 i, entry.masked ? "disabled" : "enabled ",
 			 entry.is_level ? "level" : "edge ",
 			 entry.active_low ? "low " : "high",
 			 entry.vector, entry.irr, entry.delivery_status);
 		if (entry.ir_format) {
-			printk(KERN_DEBUG "%s, remapped, I(%04X),  Z(%X)\n",
-			       buf,
-			       (entry.ir_index_15 << 15) | entry.ir_index_0_14,
-				entry.ir_zero);
+			apic_dbg("%s, remapped, I(%04X),  Z(%X)\n", buf,
+				 (entry.ir_index_15 << 15) | entry.ir_index_0_14, entry.ir_zero);
 		} else {
-			printk(KERN_DEBUG "%s, %s, D(%02X%02X), M(%1d)\n", buf,
-			       entry.dest_mode_logical ? "logical " : "physical",
-			       entry.virt_destid_8_14, entry.destid_0_7,
-			       entry.delivery_mode);
+			apic_dbg("%s, %s, D(%02X%02X), M(%1d)\n", buf,
+				 entry.dest_mode_logical ? "logical " : "physic	al",
+				 entry.virt_destid_8_14, entry.destid_0_7, entry.delivery_mode);
 		}
 	}
 }
@@ -1264,30 +1177,25 @@ static void __init print_IO_APIC(int ioapic_idx)
 	union IO_APIC_reg_01 reg_01;
 	union IO_APIC_reg_02 reg_02;
 	union IO_APIC_reg_03 reg_03;
-	unsigned long flags;
 
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(ioapic_idx, 0);
-	reg_01.raw = io_apic_read(ioapic_idx, 1);
-	if (reg_01.bits.version >= 0x10)
-		reg_02.raw = io_apic_read(ioapic_idx, 2);
-	if (reg_01.bits.version >= 0x20)
-		reg_03.raw = io_apic_read(ioapic_idx, 3);
-	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx));
-	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
-	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
-	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
-	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
-
-	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
-	printk(KERN_DEBUG ".......     : max redirection entries: %02X\n",
-		reg_01.bits.entries);
-
-	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
-	printk(KERN_DEBUG ".......     : IO APIC version: %02X\n",
-		reg_01.bits.version);
+	scoped_guard (raw_spinlock_irqsave, &ioapic_lock) {
+		reg_00.raw = io_apic_read(ioapic_idx, 0);
+		reg_01.raw = io_apic_read(ioapic_idx, 1);
+		if (reg_01.bits.version >= 0x10)
+			reg_02.raw = io_apic_read(ioapic_idx, 2);
+		if (reg_01.bits.version >= 0x20)
+			reg_03.raw = io_apic_read(ioapic_idx, 3);
+	}
+
+	apic_dbg("IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx));
+	apic_dbg(".... register #00: %08X\n", reg_00.raw);
+	apic_dbg(".......    : physical APIC id: %02X\n", reg_00.bits.ID);
+	apic_dbg(".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
+	apic_dbg(".......    : LTS          : %X\n", reg_00.bits.LTS);
+	apic_dbg(".... register #01: %08X\n", *(int *)&reg_01);
+	apic_dbg(".......     : max redirection entries: %02X\n", reg_01.bits.entries);
+	apic_dbg(".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
+	apic_dbg(".......     : IO APIC version: %02X\n", reg_01.bits.version);
 
 	/*
 	 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
@@ -1295,8 +1203,8 @@ static void __init print_IO_APIC(int ioapic_idx)
 	 * value, so ignore it if reg_02 == reg_01.
 	 */
 	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
-		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
-		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
+		apic_dbg(".... register #02: %08X\n", reg_02.raw);
+		apic_dbg(".......     : arbitration: %02X\n", reg_02.bits.arbitration);
 	}
 
 	/*
@@ -1306,11 +1214,11 @@ static void __init print_IO_APIC(int ioapic_idx)
 	 */
 	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
 	    reg_03.raw != reg_01.raw) {
-		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
-		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
+		apic_dbg(".... register #03: %08X\n", reg_03.raw);
+		apic_dbg(".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
 	}
 
-	printk(KERN_DEBUG ".... IRQ redirection table:\n");
+	apic_dbg(".... IRQ redirection table:\n");
 	io_apic_print_entries(ioapic_idx, reg_01.bits.entries);
 }
 
@@ -1319,11 +1227,11 @@ void __init print_IO_APICs(void)
 	int ioapic_idx;
 	unsigned int irq;
 
-	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
-	for_each_ioapic(ioapic_idx)
-		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
-		       mpc_ioapic_id(ioapic_idx),
-		       ioapics[ioapic_idx].nr_registers);
+	apic_dbg("number of MP IRQ sources: %d.\n", mp_irq_entries);
+	for_each_ioapic(ioapic_idx) {
+		apic_dbg("number of IO-APIC #%d registers: %d.\n",
+			 mpc_ioapic_id(ioapic_idx), ioapics[ioapic_idx].nr_registers);
+	}
 
 	/*
 	 * We are a bit conservative about what we expect.  We have to
@@ -1334,7 +1242,7 @@ void __init print_IO_APICs(void)
 	for_each_ioapic(ioapic_idx)
 		print_IO_APIC(ioapic_idx);
 
-	printk(KERN_DEBUG "IRQ to pin mappings:\n");
+	apic_dbg("IRQ to pin mappings:\n");
 	for_each_active_irq(irq) {
 		struct irq_pin_list *entry;
 		struct irq_chip *chip;
@@ -1349,7 +1257,7 @@ void __init print_IO_APICs(void)
 		if (list_empty(&data->irq_2_pin))
 			continue;
 
-		printk(KERN_DEBUG "IRQ%d ", irq);
+		apic_dbg("IRQ%d ", irq);
 		for_each_irq_pin(entry, data->irq_2_pin)
 			pr_cont("-> %d:%d", entry->apic, entry->pin);
 		pr_cont("\n");
@@ -1363,8 +1271,7 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
 void __init enable_IO_APIC(void)
 {
-	int i8259_apic, i8259_pin;
-	int apic, pin;
+	int i8259_apic, i8259_pin, apic, pin;
 
 	if (ioapic_is_disabled)
 		nr_ioapics = 0;
@@ -1376,19 +1283,21 @@ void __init enable_IO_APIC(void)
 		/* See if any of the pins is in ExtINT mode */
 		struct IO_APIC_route_entry entry = ioapic_read_entry(apic, pin);
 
-		/* If the interrupt line is enabled and in ExtInt mode
-		 * I have found the pin where the i8259 is connected.
+		/*
+		 * If the interrupt line is enabled and in ExtInt mode I
+		 * have found the pin where the i8259 is connected.
 		 */
-		if (!entry.masked &&
-		    entry.delivery_mode == APIC_DELIVERY_MODE_EXTINT) {
+		if (!entry.masked && entry.delivery_mode == APIC_DELIVERY_MODE_EXTINT) {
 			ioapic_i8259.apic = apic;
 			ioapic_i8259.pin  = pin;
-			goto found_i8259;
+			break;
 		}
 	}
- found_i8259:
-	/* Look to see what if the MP table has reported the ExtINT */
-	/* If we could not find the appropriate pin by looking at the ioapic
+
+	/*
+	 * Look to see what if the MP table has reported the ExtINT
+	 *
+	 * If we could not find the appropriate pin by looking at the ioapic
 	 * the i8259 probably is not connected the ioapic but give the
 	 * mptable a chance anyway.
 	 */
@@ -1396,29 +1305,24 @@ void __init enable_IO_APIC(void)
 	i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
 	/* Trust the MP table if nothing is setup in the hardware */
 	if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
-		printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
+		pr_warn("ExtINT not setup in hardware but reported by MP table\n");
 		ioapic_i8259.pin  = i8259_pin;
 		ioapic_i8259.apic = i8259_apic;
 	}
 	/* Complain if the MP table and the hardware disagree */
 	if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
-		(i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
-	{
-		printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
-	}
+	    (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
+		pr_warn("ExtINT in hardware and MP table differ\n");
 
-	/*
-	 * Do not trust the IO-APIC being empty at bootup
-	 */
+	/* Do not trust the IO-APIC being empty at bootup */
 	clear_IO_APIC();
 }
 
 void native_restore_boot_irq_mode(void)
 {
 	/*
-	 * If the i8259 is routed through an IOAPIC
-	 * Put that IOAPIC in virtual wire mode
-	 * so legacy interrupts can be delivered.
+	 * If the i8259 is routed through an IOAPIC Put that IOAPIC in
+	 * virtual wire mode so legacy interrupts can be delivered.
 	 */
 	if (ioapic_i8259.pin != -1) {
 		struct IO_APIC_route_entry entry;
@@ -1433,9 +1337,7 @@ void native_restore_boot_irq_mode(void)
 		entry.destid_0_7	= apic_id & 0xFF;
 		entry.virt_destid_8_14	= apic_id >> 8;
 
-		/*
-		 * Add it to the IO-APIC irq-routing table:
-		 */
+		/* Add it to the IO-APIC irq-routing table */
 		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
 	}
 
@@ -1464,7 +1366,6 @@ static void __init setup_ioapic_ids_from_mpc_nocheck(void)
 	const u32 broadcast_id = 0xF;
 	union IO_APIC_reg_00 reg_00;
 	unsigned char old_id;
-	unsigned long flags;
 	int ioapic_idx, i;
 
 	/*
@@ -1478,9 +1379,8 @@ static void __init setup_ioapic_ids_from_mpc_nocheck(void)
 	 */
 	for_each_ioapic(ioapic_idx) {
 		/* Read the register 0 value */
-		raw_spin_lock_irqsave(&ioapic_lock, flags);
-		reg_00.raw = io_apic_read(ioapic_idx, 0);
-		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+		scoped_guard (raw_spinlock_irqsave, &ioapic_lock)
+			reg_00.raw = io_apic_read(ioapic_idx, 0);
 
 		old_id = mpc_ioapic_id(ioapic_idx);
 
@@ -1508,47 +1408,42 @@ static void __init setup_ioapic_ids_from_mpc_nocheck(void)
 			set_bit(i, phys_id_present_map);
 			ioapics[ioapic_idx].mp_config.apicid = i;
 		} else {
-			apic_printk(APIC_VERBOSE, "Setting %d in the phys_id_present_map\n",
-				    mpc_ioapic_id(ioapic_idx));
+			apic_pr_verbose("Setting %d in the phys_id_present_map\n",
+					mpc_ioapic_id(ioapic_idx));
 			set_bit(mpc_ioapic_id(ioapic_idx), phys_id_present_map);
 		}
 
 		/*
-		 * We need to adjust the IRQ routing table
-		 * if the ID changed.
+		 * We need to adjust the IRQ routing table if the ID
+		 * changed.
 		 */
-		if (old_id != mpc_ioapic_id(ioapic_idx))
-			for (i = 0; i < mp_irq_entries; i++)
+		if (old_id != mpc_ioapic_id(ioapic_idx)) {
+			for (i = 0; i < mp_irq_entries; i++) {
 				if (mp_irqs[i].dstapic == old_id)
-					mp_irqs[i].dstapic
-						= mpc_ioapic_id(ioapic_idx);
+					mp_irqs[i].dstapic = mpc_ioapic_id(ioapic_idx);
+			}
+		}
 
 		/*
-		 * Update the ID register according to the right value
-		 * from the MPC table if they are different.
+		 * Update the ID register according to the right value from
+		 * the MPC table if they are different.
 		 */
 		if (mpc_ioapic_id(ioapic_idx) == reg_00.bits.ID)
 			continue;
 
-		apic_printk(APIC_VERBOSE, KERN_INFO
-			"...changing IO-APIC physical APIC ID to %d ...",
-			mpc_ioapic_id(ioapic_idx));
+		apic_pr_verbose("...changing IO-APIC physical APIC ID to %d ...",
+				mpc_ioapic_id(ioapic_idx));
 
 		reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
-		raw_spin_lock_irqsave(&ioapic_lock, flags);
-		io_apic_write(ioapic_idx, 0, reg_00.raw);
-		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-
-		/*
-		 * Sanity check
-		 */
-		raw_spin_lock_irqsave(&ioapic_lock, flags);
-		reg_00.raw = io_apic_read(ioapic_idx, 0);
-		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+		scoped_guard (raw_spinlock_irqsave, &ioapic_lock) {
+			io_apic_write(ioapic_idx, 0, reg_00.raw);
+			reg_00.raw = io_apic_read(ioapic_idx, 0);
+		}
+		/* Sanity check */
 		if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx))
 			pr_cont("could not set ID!\n");
 		else
-			apic_printk(APIC_VERBOSE, " ok.\n");
+			apic_pr_verbose(" ok.\n");
 	}
 }
 
@@ -1593,8 +1488,7 @@ static void __init delay_with_tsc(void)
 	do {
 		rep_nop();
 		now = rdtsc();
-	} while ((now - start) < 40000000000ULL / HZ &&
-		time_before_eq(jiffies, end));
+	} while ((now - start) < 40000000000ULL / HZ &&	time_before_eq(jiffies, end));
 }
 
 static void __init delay_without_tsc(void)
@@ -1655,36 +1549,29 @@ static int __init timer_irq_works(void)
  * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
  * better to do it this way as thus we do not have to be aware of
  * 'pending' interrupts in the IRQ path, except at this point.
- */
-/*
- * Edge triggered needs to resend any interrupt
- * that was delayed but this is now handled in the device
- * independent code.
- */
-
-/*
- * Starting up a edge-triggered IO-APIC interrupt is
- * nasty - we need to make sure that we get the edge.
- * If it is already asserted for some reason, we need
- * return 1 to indicate that is was pending.
  *
- * This is not complete - we should be able to fake
- * an edge even if it isn't on the 8259A...
+ *
+ * Edge triggered needs to resend any interrupt that was delayed but this
+ * is now handled in the device independent code.
+ *
+ * Starting up a edge-triggered IO-APIC interrupt is nasty - we need to
+ * make sure that we get the edge.  If it is already asserted for some
+ * reason, we need return 1 to indicate that is was pending.
+ *
+ * This is not complete - we should be able to fake an edge even if it
+ * isn't on the 8259A...
  */
 static unsigned int startup_ioapic_irq(struct irq_data *data)
 {
 	int was_pending = 0, irq = data->irq;
-	unsigned long flags;
 
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	guard(raw_spinlock_irqsave)(&ioapic_lock);
 	if (irq < nr_legacy_irqs()) {
 		legacy_pic->mask(irq);
 		if (legacy_pic->irq_pending(irq))
 			was_pending = 1;
 	}
 	__unmask_ioapic(data->chip_data);
-	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-
 	return was_pending;
 }
 
@@ -1694,9 +1581,8 @@ atomic_t irq_mis_count;
 static bool io_apic_level_ack_pending(struct mp_chip_data *data)
 {
 	struct irq_pin_list *entry;
-	unsigned long flags;
 
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	guard(raw_spinlock_irqsave)(&ioapic_lock);
 	for_each_irq_pin(entry, data->irq_2_pin) {
 		struct IO_APIC_route_entry e;
 		int pin;
@@ -1704,13 +1590,9 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data)
 		pin = entry->pin;
 		e.w1 = io_apic_read(entry->apic, 0x10 + pin*2);
 		/* Is the remote IRR bit set? */
-		if (e.irr) {
-			raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+		if (e.irr)
 			return true;
-		}
 	}
-	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-
 	return false;
 }
 
@@ -1728,7 +1610,8 @@ static inline bool ioapic_prepare_move(struct irq_data *data)
 static inline void ioapic_finish_move(struct irq_data *data, bool moveit)
 {
 	if (unlikely(moveit)) {
-		/* Only migrate the irq if the ack has been received.
+		/*
+		 * Only migrate the irq if the ack has been received.
 		 *
 		 * On rare occasions the broadcast level triggered ack gets
 		 * delayed going to ioapics, and if we reprogram the
@@ -1911,18 +1794,16 @@ static void ioapic_configure_entry(struct irq_data *irqd)
 		__ioapic_write_entry(entry->apic, entry->pin, mpd->entry);
 }
 
-static int ioapic_set_affinity(struct irq_data *irq_data,
-			       const struct cpumask *mask, bool force)
+static int ioapic_set_affinity(struct irq_data *irq_data, const struct cpumask *mask, bool force)
 {
 	struct irq_data *parent = irq_data->parent_data;
-	unsigned long flags;
 	int ret;
 
 	ret = parent->chip->irq_set_affinity(parent, mask, force);
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
+
+	guard(raw_spinlock_irqsave)(&ioapic_lock);
 	if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE)
 		ioapic_configure_entry(irq_data);
-	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	return ret;
 }
@@ -1941,9 +1822,8 @@ static int ioapic_set_affinity(struct irq_data *irq_data,
  *
  * Verify that the corresponding Remote-IRR bits are clear.
  */
-static int ioapic_irq_get_chip_state(struct irq_data *irqd,
-				   enum irqchip_irq_state which,
-				   bool *state)
+static int ioapic_irq_get_chip_state(struct irq_data *irqd, enum irqchip_irq_state which,
+				     bool *state)
 {
 	struct mp_chip_data *mcd = irqd->chip_data;
 	struct IO_APIC_route_entry rentry;
@@ -1953,7 +1833,8 @@ static int ioapic_irq_get_chip_state(struct irq_data *irqd,
 		return -EINVAL;
 
 	*state = false;
-	raw_spin_lock(&ioapic_lock);
+
+	guard(raw_spinlock)(&ioapic_lock);
 	for_each_irq_pin(p, mcd->irq_2_pin) {
 		rentry = __ioapic_read_entry(p->apic, p->pin);
 		/*
@@ -1967,7 +1848,6 @@ static int ioapic_irq_get_chip_state(struct irq_data *irqd,
 			break;
 		}
 	}
-	raw_spin_unlock(&ioapic_lock);
 	return 0;
 }
 
@@ -2008,14 +1888,13 @@ static inline void init_IO_APIC_traps(void)
 		cfg = irq_cfg(irq);
 		if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
 			/*
-			 * Hmm.. We don't have an entry for this,
-			 * so default to an old-fashioned 8259
-			 * interrupt if we can..
+			 * Hmm.. We don't have an entry for this, so
+			 * default to an old-fashioned 8259 interrupt if we
+			 * can. Otherwise set the dummy interrupt chip.
 			 */
 			if (irq < nr_legacy_irqs())
 				legacy_pic->make_irq(irq);
 			else
-				/* Strange. Oh, well.. */
 				irq_set_chip(irq, &no_irq_chip);
 		}
 	}
@@ -2024,20 +1903,17 @@ static inline void init_IO_APIC_traps(void)
 /*
  * The local APIC irq-chip implementation:
  */
-
 static void mask_lapic_irq(struct irq_data *data)
 {
-	unsigned long v;
+	unsigned long v = apic_read(APIC_LVT0);
 
-	v = apic_read(APIC_LVT0);
 	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
 }
 
 static void unmask_lapic_irq(struct irq_data *data)
 {
-	unsigned long v;
+	unsigned long v = apic_read(APIC_LVT0);
 
-	v = apic_read(APIC_LVT0);
 	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
 }
 
@@ -2056,8 +1932,7 @@ static struct irq_chip lapic_chip __read_mostly = {
 static void lapic_register_intr(int irq)
 {
 	irq_clear_status_flags(irq, IRQ_LEVEL);
-	irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
-				      "edge");
+	irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, "edge");
 }
 
 /*
@@ -2069,9 +1944,9 @@ static void lapic_register_intr(int irq)
  */
 static inline void __init unlock_ExtINT_logic(void)
 {
-	int apic, pin, i;
-	struct IO_APIC_route_entry entry0, entry1;
 	unsigned char save_control, save_freq_select;
+	struct IO_APIC_route_entry entry0, entry1;
+	int apic, pin, i;
 	u32 apic_id;
 
 	pin  = find_isa_irq_pin(8, mp_INT);
@@ -2131,10 +2006,10 @@ static int __init disable_timer_pin_setup(char *arg)
 }
 early_param("disable_timer_pin_1", disable_timer_pin_setup);
 
-static int mp_alloc_timer_irq(int ioapic, int pin)
+static int __init mp_alloc_timer_irq(int ioapic, int pin)
 {
-	int irq = -1;
 	struct irq_domain *domain = mp_ioapic_irqdomain(ioapic);
+	int irq = -1;
 
 	if (domain) {
 		struct irq_alloc_info info;
@@ -2142,21 +2017,36 @@ static int mp_alloc_timer_irq(int ioapic, int pin)
 		ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 0, 0);
 		info.devid = mpc_ioapic_id(ioapic);
 		info.ioapic.pin = pin;
-		mutex_lock(&ioapic_mutex);
+		guard(mutex)(&ioapic_mutex);
 		irq = alloc_isa_irq_from_domain(domain, 0, ioapic, pin, &info);
-		mutex_unlock(&ioapic_mutex);
 	}
 
 	return irq;
 }
 
+static void __init replace_pin_at_irq_node(struct mp_chip_data *data, int node,
+					   int oldapic, int oldpin,
+					   int newapic, int newpin)
+{
+	struct irq_pin_list *entry;
+
+	for_each_irq_pin(entry, data->irq_2_pin) {
+		if (entry->apic == oldapic && entry->pin == oldpin) {
+			entry->apic = newapic;
+			entry->pin = newpin;
+			return;
+		}
+	}
+
+	/* Old apic/pin didn't exist, so just add a new one */
+	add_pin_to_irq_node(data, node, newapic, newpin);
+}
+
 /*
  * This code may look a bit paranoid, but it's supposed to cooperate with
  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
  * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
  * fanatically on his truly buggy board.
- *
- * FIXME: really need to revamp this for all platforms.
  */
 static inline void __init check_timer(void)
 {
@@ -2194,9 +2084,8 @@ static inline void __init check_timer(void)
 	pin2  = ioapic_i8259.pin;
 	apic2 = ioapic_i8259.apic;
 
-	apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
-		    "apic1=%d pin1=%d apic2=%d pin2=%d\n",
-		    cfg->vector, apic1, pin1, apic2, pin2);
+	pr_info("..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
+		cfg->vector, apic1, pin1, apic2, pin2);
 
 	/*
 	 * Some BIOS writers are clueless and report the ExtINTA
@@ -2240,13 +2129,10 @@ static inline void __init check_timer(void)
 		panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC");
 		clear_IO_APIC_pin(apic1, pin1);
 		if (!no_pin1)
-			apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
-				    "8254 timer not connected to IO-APIC\n");
+			pr_err("..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
 
-		apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
-			    "(IRQ0) through the 8259A ...\n");
-		apic_printk(APIC_QUIET, KERN_INFO
-			    "..... (found apic %d pin %d) ...\n", apic2, pin2);
+		pr_info("...trying to set up timer (IRQ0) through the 8259A ...\n");
+		pr_info("..... (found apic %d pin %d) ...\n", apic2, pin2);
 		/*
 		 * legacy devices should be connected to IO APIC #0
 		 */
@@ -2255,7 +2141,7 @@ static inline void __init check_timer(void)
 		irq_domain_activate_irq(irq_data, false);
 		legacy_pic->unmask(0);
 		if (timer_irq_works()) {
-			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
+			pr_info("....... works.\n");
 			goto out;
 		}
 		/*
@@ -2263,26 +2149,24 @@ static inline void __init check_timer(void)
 		 */
 		legacy_pic->mask(0);
 		clear_IO_APIC_pin(apic2, pin2);
-		apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
+		pr_info("....... failed.\n");
 	}
 
-	apic_printk(APIC_QUIET, KERN_INFO
-		    "...trying to set up timer as Virtual Wire IRQ...\n");
+	pr_info("...trying to set up timer as Virtual Wire IRQ...\n");
 
 	lapic_register_intr(0);
 	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
 	legacy_pic->unmask(0);
 
 	if (timer_irq_works()) {
-		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
+		pr_info("..... works.\n");
 		goto out;
 	}
 	legacy_pic->mask(0);
 	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
-	apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
+	pr_info("..... failed.\n");
 
-	apic_printk(APIC_QUIET, KERN_INFO
-		    "...trying to set up timer as ExtINT IRQ...\n");
+	pr_info("...trying to set up timer as ExtINT IRQ...\n");
 
 	legacy_pic->init(0);
 	legacy_pic->make_irq(0);
@@ -2292,14 +2176,15 @@ static inline void __init check_timer(void)
 	unlock_ExtINT_logic();
 
 	if (timer_irq_works()) {
-		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
+		pr_info("..... works.\n");
 		goto out;
 	}
-	apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
-	if (apic_is_x2apic_enabled())
-		apic_printk(APIC_QUIET, KERN_INFO
-			    "Perhaps problem with the pre-enabled x2apic mode\n"
-			    "Try booting with x2apic and interrupt-remapping disabled in the bios.\n");
+
+	pr_info("..... failed :\n");
+	if (apic_is_x2apic_enabled()) {
+		pr_info("Perhaps problem with the pre-enabled x2apic mode\n"
+			"Try booting with x2apic and interrupt-remapping disabled in the bios.\n");
+	}
 	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
 		"report.  Then try booting with the 'noapic' option.\n");
 out:
@@ -2327,11 +2212,11 @@ static inline void __init check_timer(void)
 
 static int mp_irqdomain_create(int ioapic)
 {
-	struct irq_domain *parent;
+	struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
 	int hwirqs = mp_ioapic_pin_count(ioapic);
 	struct ioapic *ip = &ioapics[ioapic];
 	struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg;
-	struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
+	struct irq_domain *parent;
 	struct fwnode_handle *fn;
 	struct irq_fwspec fwspec;
 
@@ -2367,10 +2252,8 @@ static int mp_irqdomain_create(int ioapic)
 		return -ENOMEM;
 	}
 
-	if (cfg->type == IOAPIC_DOMAIN_LEGACY ||
-	    cfg->type == IOAPIC_DOMAIN_STRICT)
-		ioapic_dynirq_base = max(ioapic_dynirq_base,
-					 gsi_cfg->gsi_end + 1);
+	if (cfg->type == IOAPIC_DOMAIN_LEGACY || cfg->type == IOAPIC_DOMAIN_STRICT)
+		ioapic_dynirq_base = max(ioapic_dynirq_base, gsi_cfg->gsi_end + 1);
 
 	return 0;
 }
@@ -2397,13 +2280,11 @@ void __init setup_IO_APIC(void)
 
 	io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL;
 
-	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
+	apic_pr_verbose("ENABLING IO-APIC IRQs\n");
 	for_each_ioapic(ioapic)
 		BUG_ON(mp_irqdomain_create(ioapic));
 
-	/*
-         * Set up IO-APIC IRQ routing.
-         */
+	/* Set up IO-APIC IRQ routing. */
 	x86_init.mpparse.setup_ioapic_ids();
 
 	sync_Arb_IDs();
@@ -2417,16 +2298,14 @@ void __init setup_IO_APIC(void)
 
 static void resume_ioapic_id(int ioapic_idx)
 {
-	unsigned long flags;
 	union IO_APIC_reg_00 reg_00;
 
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	guard(raw_spinlock_irqsave)(&ioapic_lock);
 	reg_00.raw = io_apic_read(ioapic_idx, 0);
 	if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) {
 		reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
 		io_apic_write(ioapic_idx, 0, reg_00.raw);
 	}
-	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
 static void ioapic_resume(void)
@@ -2440,8 +2319,8 @@ static void ioapic_resume(void)
 }
 
 static struct syscore_ops ioapic_syscore_ops = {
-	.suspend = save_ioapic_entries,
-	.resume = ioapic_resume,
+	.suspend	= save_ioapic_entries,
+	.resume		= ioapic_resume,
 };
 
 static int __init ioapic_init_ops(void)
@@ -2456,15 +2335,13 @@ device_initcall(ioapic_init_ops);
 static int io_apic_get_redir_entries(int ioapic)
 {
 	union IO_APIC_reg_01	reg_01;
-	unsigned long flags;
 
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	guard(raw_spinlock_irqsave)(&ioapic_lock);
 	reg_01.raw = io_apic_read(ioapic, 1);
-	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
-	/* The register returns the maximum index redir index
-	 * supported, which is one less than the total number of redir
-	 * entries.
+	/*
+	 * The register returns the maximum index redir index supported,
+	 * which is one less than the total number of redir entries.
 	 */
 	return reg_01.bits.entries + 1;
 }
@@ -2494,16 +2371,14 @@ static int io_apic_get_unique_id(int ioapic, int apic_id)
 	static DECLARE_BITMAP(apic_id_map, MAX_LOCAL_APIC);
 	const u32 broadcast_id = 0xF;
 	union IO_APIC_reg_00 reg_00;
-	unsigned long flags;
 	int i = 0;
 
 	/* Initialize the ID map */
 	if (bitmap_empty(apic_id_map, MAX_LOCAL_APIC))
 		copy_phys_cpu_present_map(apic_id_map);
 
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(ioapic, 0);
-	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+	scoped_guard (raw_spinlock_irqsave, &ioapic_lock)
+		reg_00.raw = io_apic_read(ioapic, 0);
 
 	if (apic_id >= broadcast_id) {
 		pr_warn("IOAPIC[%d]: Invalid apic_id %d, trying %d\n",
@@ -2530,21 +2405,19 @@ static int io_apic_get_unique_id(int ioapic, int apic_id)
 	if (reg_00.bits.ID != apic_id) {
 		reg_00.bits.ID = apic_id;
 
-		raw_spin_lock_irqsave(&ioapic_lock, flags);
-		io_apic_write(ioapic, 0, reg_00.raw);
-		reg_00.raw = io_apic_read(ioapic, 0);
-		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+		scoped_guard (raw_spinlock_irqsave, &ioapic_lock) {
+			io_apic_write(ioapic, 0, reg_00.raw);
+			reg_00.raw = io_apic_read(ioapic, 0);
+		}
 
 		/* Sanity check */
 		if (reg_00.bits.ID != apic_id) {
-			pr_err("IOAPIC[%d]: Unable to change apic_id!\n",
-			       ioapic);
+			pr_err("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
 			return -1;
 		}
 	}
 
-	apic_printk(APIC_VERBOSE, KERN_INFO
-			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+	apic_pr_verbose("IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
 
 	return apic_id;
 }
@@ -2560,7 +2433,6 @@ static u8 io_apic_unique_id(int idx, u8 id)
 {
 	union IO_APIC_reg_00 reg_00;
 	DECLARE_BITMAP(used, 256);
-	unsigned long flags;
 	u8 new_id;
 	int i;
 
@@ -2576,26 +2448,23 @@ static u8 io_apic_unique_id(int idx, u8 id)
 	 * Read the current id from the ioapic and keep it if
 	 * available.
 	 */
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(idx, 0);
-	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+	scoped_guard (raw_spinlock_irqsave, &ioapic_lock)
+		reg_00.raw = io_apic_read(idx, 0);
+
 	new_id = reg_00.bits.ID;
 	if (!test_bit(new_id, used)) {
-		apic_printk(APIC_VERBOSE, KERN_INFO
-			"IOAPIC[%d]: Using reg apic_id %d instead of %d\n",
-			 idx, new_id, id);
+		apic_pr_verbose("IOAPIC[%d]: Using reg apic_id %d instead of %d\n",
+				idx, new_id, id);
 		return new_id;
 	}
 
-	/*
-	 * Get the next free id and write it to the ioapic.
-	 */
+	/* Get the next free id and write it to the ioapic. */
 	new_id = find_first_zero_bit(used, 256);
 	reg_00.bits.ID = new_id;
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	io_apic_write(idx, 0, reg_00.raw);
-	reg_00.raw = io_apic_read(idx, 0);
-	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+	scoped_guard (raw_spinlock_irqsave, &ioapic_lock) {
+		io_apic_write(idx, 0, reg_00.raw);
+		reg_00.raw = io_apic_read(idx, 0);
+	}
 	/* Sanity check */
 	BUG_ON(reg_00.bits.ID != new_id);
 
@@ -2605,12 +2474,10 @@ static u8 io_apic_unique_id(int idx, u8 id)
 
 static int io_apic_get_version(int ioapic)
 {
-	union IO_APIC_reg_01	reg_01;
-	unsigned long flags;
+	union IO_APIC_reg_01 reg_01;
 
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	guard(raw_spinlock_irqsave)(&ioapic_lock);
 	reg_01.raw = io_apic_read(ioapic, 1);
-	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	return reg_01.bits.version;
 }
@@ -2625,8 +2492,8 @@ static struct resource *ioapic_resources;
 
 static struct resource * __init ioapic_setup_resources(void)
 {
-	unsigned long n;
 	struct resource *res;
+	unsigned long n;
 	char *mem;
 	int i;
 
@@ -2686,9 +2553,7 @@ void __init io_apic_init_mappings(void)
 			ioapic_phys = mpc_ioapic_addr(i);
 #ifdef CONFIG_X86_32
 			if (!ioapic_phys) {
-				printk(KERN_ERR
-				       "WARNING: bogus zero IO-APIC "
-				       "address found in MPTABLE, "
+				pr_err("WARNING: bogus zero IO-APIC address found in MPTABLE, "
 				       "disabling IO/APIC support!\n");
 				smp_found_config = 0;
 				ioapic_is_disabled = true;
@@ -2707,9 +2572,8 @@ void __init io_apic_init_mappings(void)
 			ioapic_phys = __pa(ioapic_phys);
 		}
 		io_apic_set_fixmap(idx, ioapic_phys);
-		apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
-			__fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
-			ioapic_phys);
+		apic_pr_verbose("mapped IOAPIC to %08lx (%08lx)\n",
+				__fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK), ioapic_phys);
 		idx++;
 
 		ioapic_res->start = ioapic_phys;
@@ -2720,13 +2584,12 @@ void __init io_apic_init_mappings(void)
 
 void __init ioapic_insert_resources(void)
 {
-	int i;
 	struct resource *r = ioapic_resources;
+	int i;
 
 	if (!r) {
 		if (nr_ioapics > 0)
-			printk(KERN_ERR
-				"IO APIC resources couldn't be allocated.\n");
+			pr_err("IO APIC resources couldn't be allocated.\n");
 		return;
 	}
 
@@ -2746,11 +2609,12 @@ int mp_find_ioapic(u32 gsi)
 	/* Find the IOAPIC that manages this GSI. */
 	for_each_ioapic(i) {
 		struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(i);
+
 		if (gsi >= gsi_cfg->gsi_base && gsi <= gsi_cfg->gsi_end)
 			return i;
 	}
 
-	printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+	pr_err("ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
 	return -1;
 }
 
@@ -2789,12 +2653,10 @@ static int bad_ioapic_register(int idx)
 
 static int find_free_ioapic_entry(void)
 {
-	int idx;
-
-	for (idx = 0; idx < MAX_IO_APICS; idx++)
+	for (int idx = 0; idx < MAX_IO_APICS; idx++) {
 		if (ioapics[idx].nr_registers == 0)
 			return idx;
-
+	}
 	return MAX_IO_APICS;
 }
 
@@ -2805,8 +2667,7 @@ static int find_free_ioapic_entry(void)
  * @gsi_base:	base of GSI associated with the IOAPIC
  * @cfg:	configuration information for the IOAPIC
  */
-int mp_register_ioapic(int id, u32 address, u32 gsi_base,
-		       struct ioapic_domain_cfg *cfg)
+int mp_register_ioapic(int id, u32 address, u32 gsi_base, struct ioapic_domain_cfg *cfg)
 {
 	bool hotplug = !!ioapic_initialized;
 	struct mp_ioapic_gsi *gsi_cfg;
@@ -2817,12 +2678,13 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,
 		pr_warn("Bogus (zero) I/O APIC address found, skipping!\n");
 		return -EINVAL;
 	}
-	for_each_ioapic(ioapic)
+
+	for_each_ioapic(ioapic) {
 		if (ioapics[ioapic].mp_config.apicaddr == address) {
-			pr_warn("address 0x%x conflicts with IOAPIC%d\n",
-				address, ioapic);
+			pr_warn("address 0x%x conflicts with IOAPIC%d\n", address, ioapic);
 			return -EEXIST;
 		}
+	}
 
 	idx = find_free_ioapic_entry();
 	if (idx >= MAX_IO_APICS) {
@@ -2857,8 +2719,7 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,
 		    (gsi_end >= gsi_cfg->gsi_base &&
 		     gsi_end <= gsi_cfg->gsi_end)) {
 			pr_warn("GSI range [%u-%u] for new IOAPIC conflicts with GSI[%u-%u]\n",
-				gsi_base, gsi_end,
-				gsi_cfg->gsi_base, gsi_cfg->gsi_end);
+				gsi_base, gsi_end, gsi_cfg->gsi_base, gsi_cfg->gsi_end);
 			clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
 			return -ENOSPC;
 		}
@@ -2892,8 +2753,7 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,
 	ioapics[idx].nr_registers = entries;
 
 	pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n",
-		idx, mpc_ioapic_id(idx),
-		mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
+		idx, mpc_ioapic_id(idx), mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
 		gsi_cfg->gsi_base, gsi_cfg->gsi_end);
 
 	return 0;
@@ -2904,11 +2764,13 @@ int mp_unregister_ioapic(u32 gsi_base)
 	int ioapic, pin;
 	int found = 0;
 
-	for_each_ioapic(ioapic)
+	for_each_ioapic(ioapic) {
 		if (ioapics[ioapic].gsi_config.gsi_base == gsi_base) {
 			found = 1;
 			break;
 		}
+	}
+
 	if (!found) {
 		pr_warn("can't find IOAPIC for GSI %d\n", gsi_base);
 		return -ENODEV;
@@ -2922,8 +2784,7 @@ int mp_unregister_ioapic(u32 gsi_base)
 		if (irq >= 0) {
 			data = irq_get_chip_data(irq);
 			if (data && data->count) {
-				pr_warn("pin%d on IOAPIC%d is still in use.\n",
-					pin, ioapic);
+				pr_warn("pin%d on IOAPIC%d is still in use.\n",	pin, ioapic);
 				return -EBUSY;
 			}
 		}
@@ -2958,8 +2819,7 @@ static void mp_irqdomain_get_attr(u32 gsi, struct mp_chip_data *data,
 	if (info && info->ioapic.valid) {
 		data->is_level = info->ioapic.is_level;
 		data->active_low = info->ioapic.active_low;
-	} else if (__acpi_get_override_irq(gsi, &data->is_level,
-					   &data->active_low) < 0) {
+	} else if (__acpi_get_override_irq(gsi, &data->is_level, &data->active_low) < 0) {
 		/* PCI interrupts are always active low level triggered. */
 		data->is_level = true;
 		data->active_low = true;
@@ -3017,10 +2877,8 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
 		return -ENOMEM;
 
 	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info);
-	if (ret < 0) {
-		kfree(data);
-		return ret;
-	}
+	if (ret < 0)
+		goto free_data;
 
 	INIT_LIST_HEAD(&data->irq_2_pin);
 	irq_data->hwirq = info->ioapic.pin;
@@ -3029,7 +2887,10 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
 	irq_data->chip_data = data;
 	mp_irqdomain_get_attr(mp_pin_to_gsi(ioapic, pin), data, info);
 
-	add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin);
+	if (!add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin)) {
+		ret = -ENOMEM;
+		goto free_irqs;
+	}
 
 	mp_preconfigure_entry(data);
 	mp_register_handler(virq, data->is_level);
@@ -3039,11 +2900,15 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
 		legacy_pic->mask(virq);
 	local_irq_restore(flags);
 
-	apic_printk(APIC_VERBOSE, KERN_DEBUG
-		    "IOAPIC[%d]: Preconfigured routing entry (%d-%d -> IRQ %d Level:%i ActiveLow:%i)\n",
-		    ioapic, mpc_ioapic_id(ioapic), pin, virq,
-		    data->is_level, data->active_low);
+	apic_pr_verbose("IOAPIC[%d]: Preconfigured routing entry (%d-%d -> IRQ %d Level:%i ActiveLow:%i)\n",
+			ioapic, mpc_ioapic_id(ioapic), pin, virq, data->is_level, data->active_low);
 	return 0;
+
+free_irqs:
+	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+free_data:
+	kfree(data);
+	return ret;
 }
 
 void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
@@ -3056,22 +2921,17 @@ void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
 	irq_data = irq_domain_get_irq_data(domain, virq);
 	if (irq_data && irq_data->chip_data) {
 		data = irq_data->chip_data;
-		__remove_pin_from_irq(data, mp_irqdomain_ioapic_idx(domain),
-				      (int)irq_data->hwirq);
+		__remove_pin_from_irq(data, mp_irqdomain_ioapic_idx(domain), (int)irq_data->hwirq);
 		WARN_ON(!list_empty(&data->irq_2_pin));
 		kfree(irq_data->chip_data);
 	}
 	irq_domain_free_irqs_top(domain, virq, nr_irqs);
 }
 
-int mp_irqdomain_activate(struct irq_domain *domain,
-			  struct irq_data *irq_data, bool reserve)
+int mp_irqdomain_activate(struct irq_domain *domain, struct irq_data *irq_data, bool reserve)
 {
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	guard(raw_spinlock_irqsave)(&ioapic_lock);
 	ioapic_configure_entry(irq_data);
-	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 	return 0;
 }
 
@@ -3079,8 +2939,7 @@ void mp_irqdomain_deactivate(struct irq_domain *domain,
 			     struct irq_data *irq_data)
 {
 	/* It won't be called for IRQ with multiple IOAPIC pins associated */
-	ioapic_mask_entry(mp_irqdomain_ioapic_idx(domain),
-			  (int)irq_data->hwirq);
+	ioapic_mask_entry(mp_irqdomain_ioapic_idx(domain), (int)irq_data->hwirq);
 }
 
 int mp_irqdomain_ioapic_idx(struct irq_domain *domain)
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index e89171b0347a..4a1b1b28abf9 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -68,7 +68,7 @@ static void __init mpc_oem_bus_info(struct mpc_bus *m, char *str)
 {
 	memcpy(str, m->bustype, 6);
 	str[6] = 0;
-	apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str);
+	apic_pr_verbose("Bus #%d is %s\n", m->busid, str);
 }
 
 static void __init MP_bus_info(struct mpc_bus *m)
@@ -417,7 +417,7 @@ static unsigned long __init get_mpc_size(unsigned long physptr)
 	mpc = early_memremap(physptr, PAGE_SIZE);
 	size = mpc->length;
 	early_memunmap(mpc, PAGE_SIZE);
-	apic_printk(APIC_VERBOSE, "  mpc: %lx-%lx\n", physptr, physptr + size);
+	apic_pr_verbose("  mpc: %lx-%lx\n", physptr, physptr + size);
 
 	return size;
 }
@@ -560,8 +560,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
 	struct mpf_intel *mpf;
 	int ret = 0;
 
-	apic_printk(APIC_VERBOSE, "Scan for SMP in [mem %#010lx-%#010lx]\n",
-		    base, base + length - 1);
+	apic_pr_verbose("Scan for SMP in [mem %#010lx-%#010lx]\n", base, base + length - 1);
 	BUILD_BUG_ON(sizeof(*mpf) != 16);
 
 	while (length > 0) {
@@ -683,13 +682,13 @@ static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
 {
 	int i;
 
-	apic_printk(APIC_VERBOSE, "OLD ");
+	apic_pr_verbose("OLD ");
 	print_mp_irq_info(m);
 
 	i = get_MP_intsrc_index(m);
 	if (i > 0) {
 		memcpy(m, &mp_irqs[i], sizeof(*m));
-		apic_printk(APIC_VERBOSE, "NEW ");
+		apic_pr_verbose("NEW ");
 		print_mp_irq_info(&mp_irqs[i]);
 		return;
 	}
@@ -772,7 +771,7 @@ static int  __init replace_intsrc_all(struct mpc_table *mpc,
 			continue;
 
 		if (nr_m_spare > 0) {
-			apic_printk(APIC_VERBOSE, "*NEW* found\n");
+			apic_pr_verbose("*NEW* found\n");
 			nr_m_spare--;
 			memcpy(m_spare[nr_m_spare], &mp_irqs[i], sizeof(mp_irqs[i]));
 			m_spare[nr_m_spare] = NULL;
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index e090ca07364b..7a6d188e3bea 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -1352,12 +1352,11 @@ static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data,
 	case X86_IRQ_ALLOC_TYPE_IOAPIC:
 		/* Set source-id of interrupt request */
 		set_ioapic_sid(irte, info->devid);
-		apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: Set IRTE entry (P:%d FPD:%d Dst_Mode:%d Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X Avail:%X Vector:%02X Dest:%08X SID:%04X SQ:%X SVT:%X)\n",
-			info->devid, irte->present, irte->fpd,
-			irte->dst_mode, irte->redir_hint,
-			irte->trigger_mode, irte->dlvry_mode,
-			irte->avail, irte->vector, irte->dest_id,
-			irte->sid, irte->sq, irte->svt);
+		apic_pr_verbose("IOAPIC[%d]: Set IRTE entry (P:%d FPD:%d Dst_Mode:%d Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X Avail:%X Vector:%02X Dest:%08X SID:%04X SQ:%X SVT:%X)\n",
+				info->devid, irte->present, irte->fpd, irte->dst_mode,
+				irte->redir_hint, irte->trigger_mode, irte->dlvry_mode,
+				irte->avail, irte->vector, irte->dest_id, irte->sid,
+				irte->sq, irte->svt);
 		sub_handle = info->ioapic.pin;
 		break;
 	case X86_IRQ_ALLOC_TYPE_HPET:
Re: [GIT pull] x86/apic for v6.12-rc1
Posted by pr-tracker-bot@kernel.org 2 months, 1 week ago
The pull request you sent on Tue, 17 Sep 2024 10:54:04 +0200 (CEST):

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-apic-2024-09-17

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/61d1ea914b3556c44f9ca04277ab990a60afb44d

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
[GIT pull] x86/fpu for v6.12-rc1
Posted by Thomas Gleixner 2 months, 1 week ago
Linus,

please pull the latest x86/fpu branch from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-fpu-2024-09-17

up to:  ba386777a30b: x86/elf: Add a new FPU buffer layout info to x86 core files

Provide FPU buffer layout in core dumps:

  Debuggers have guess the FPU buffer layout in core dumps, which is error
  prone. This is because AMD and Intel layouts differ.

  To avoid buggy heuristics add a ELF section which describes the buffer
  layout which can be retrieved by tools.

Thanks,

	tglx

------------------>
Vignesh Balasubramanian (1):
      x86/elf: Add a new FPU buffer layout info to x86 core files


 arch/x86/Kconfig                |  1 +
 arch/x86/include/uapi/asm/elf.h | 16 ++++++++
 arch/x86/kernel/fpu/xstate.c    | 89 +++++++++++++++++++++++++++++++++++++++++
 fs/binfmt_elf.c                 |  4 +-
 include/uapi/linux/elf.h        |  1 +
 5 files changed, 109 insertions(+), 2 deletions(-)
 create mode 100644 arch/x86/include/uapi/asm/elf.h

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 007bab9f2a0e..c15b4b3fb328 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -107,6 +107,7 @@ config X86
 	select ARCH_HAS_DEBUG_WX
 	select ARCH_HAS_ZONE_DMA_SET if EXPERT
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
+	select ARCH_HAVE_EXTRA_ELF_NOTES
 	select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
 	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
 	select ARCH_MIGHT_HAVE_PC_PARPORT
diff --git a/arch/x86/include/uapi/asm/elf.h b/arch/x86/include/uapi/asm/elf.h
new file mode 100644
index 000000000000..468e135fa285
--- /dev/null
+++ b/arch/x86/include/uapi/asm/elf.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_X86_ELF_H
+#define _UAPI_ASM_X86_ELF_H
+
+#include <linux/types.h>
+
+struct x86_xfeat_component {
+	__u32 type;
+	__u32 size;
+	__u32 offset;
+	__u32 flags;
+} __packed;
+
+_Static_assert(sizeof(struct x86_xfeat_component) % 4 == 0, "x86_xfeat_component is not aligned");
+
+#endif /* _UAPI_ASM_X86_ELF_H */
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c5a026fee5e0..f3a2e59a28e7 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -13,6 +13,7 @@
 #include <linux/seq_file.h>
 #include <linux/proc_fs.h>
 #include <linux/vmalloc.h>
+#include <linux/coredump.h>
 
 #include <asm/fpu/api.h>
 #include <asm/fpu/regset.h>
@@ -23,6 +24,8 @@
 #include <asm/prctl.h>
 #include <asm/elf.h>
 
+#include <uapi/asm/elf.h>
+
 #include "context.h"
 #include "internal.h"
 #include "legacy.h"
@@ -1838,3 +1841,89 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
 	return 0;
 }
 #endif /* CONFIG_PROC_PID_ARCH_STATUS */
+
+#ifdef CONFIG_COREDUMP
+static const char owner_name[] = "LINUX";
+
+/*
+ * Dump type, size, offset and flag values for every xfeature that is present.
+ */
+static int dump_xsave_layout_desc(struct coredump_params *cprm)
+{
+	int num_records = 0;
+	int i;
+
+	for_each_extended_xfeature(i, fpu_user_cfg.max_features) {
+		struct x86_xfeat_component xc = {
+			.type   = i,
+			.size   = xstate_sizes[i],
+			.offset = xstate_offsets[i],
+			/* reserved for future use */
+			.flags  = 0,
+		};
+
+		if (!dump_emit(cprm, &xc, sizeof(xc)))
+			return 0;
+
+		num_records++;
+	}
+	return num_records;
+}
+
+static u32 get_xsave_desc_size(void)
+{
+	u32 cnt = 0;
+	u32 i;
+
+	for_each_extended_xfeature(i, fpu_user_cfg.max_features)
+		cnt++;
+
+	return cnt * (sizeof(struct x86_xfeat_component));
+}
+
+int elf_coredump_extra_notes_write(struct coredump_params *cprm)
+{
+	int num_records = 0;
+	struct elf_note en;
+
+	if (!fpu_user_cfg.max_features)
+		return 0;
+
+	en.n_namesz = sizeof(owner_name);
+	en.n_descsz = get_xsave_desc_size();
+	en.n_type = NT_X86_XSAVE_LAYOUT;
+
+	if (!dump_emit(cprm, &en, sizeof(en)))
+		return 1;
+	if (!dump_emit(cprm, owner_name, en.n_namesz))
+		return 1;
+	if (!dump_align(cprm, 4))
+		return 1;
+
+	num_records = dump_xsave_layout_desc(cprm);
+	if (!num_records)
+		return 1;
+
+	/* Total size should be equal to the number of records */
+	if ((sizeof(struct x86_xfeat_component) * num_records) != en.n_descsz)
+		return 1;
+
+	return 0;
+}
+
+int elf_coredump_extra_notes_size(void)
+{
+	int size;
+
+	if (!fpu_user_cfg.max_features)
+		return 0;
+
+	/* .note header */
+	size  = sizeof(struct elf_note);
+	/*  Name plus alignment to 4 bytes */
+	size += roundup(sizeof(owner_name), 4);
+	size += get_xsave_desc_size();
+
+	return size;
+}
+#endif /* CONFIG_COREDUMP */
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 19fa49cd9907..01bcbe7fdebd 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2039,7 +2039,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 	{
 		size_t sz = info.size;
 
-		/* For cell spufs */
+		/* For cell spufs and x86 xstate */
 		sz += elf_coredump_extra_notes_size();
 
 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
@@ -2103,7 +2103,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 	if (!write_note_info(&info, cprm))
 		goto end_coredump;
 
-	/* For cell spufs */
+	/* For cell spufs and x86 xstate */
 	if (elf_coredump_extra_notes_write(cprm))
 		goto end_coredump;
 
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index b54b313bcf07..e30a9b47dc87 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -411,6 +411,7 @@ typedef struct elf64_shdr {
 #define NT_X86_XSTATE	0x202		/* x86 extended state using xsave */
 /* Old binutils treats 0x203 as a CET state */
 #define NT_X86_SHSTK	0x204		/* x86 SHSTK state */
+#define NT_X86_XSAVE_LAYOUT	0x205	/* XSAVE layout description */
 #define NT_S390_HIGH_GPRS	0x300	/* s390 upper register halves */
 #define NT_S390_TIMER	0x301		/* s390 timer register */
 #define NT_S390_TODCMP	0x302		/* s390 TOD clock comparator register */
Re: [GIT pull] x86/fpu for v6.12-rc1
Posted by pr-tracker-bot@kernel.org 2 months, 1 week ago
The pull request you sent on Tue, 17 Sep 2024 10:54:10 +0200 (CEST):

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-fpu-2024-09-17

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/c3056a7d1494e9b5511e4dba358834c5ef68949a

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
[GIT pull] x86/build for v6.12-rc1
Posted by Thomas Gleixner 2 months, 1 week ago
Linus,

please pull the latest x86/build branch from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-build-2024-09-17

up to:  ae94b263f5f6: x86: Ignore stack unwinding in KCOV

Updates for KCOV instrumentation on x86:

  - Prevent spurious KCOV coverage in common_interrupt()

  - Fixup the KCOV Makefile directive which got stale due to a source file
    rename

  - Exclude stack unwinding from KCOV as it creates large amounts of
    uninteresting coverage

  - Provide a self test to validate that KCOV coverage of the interrupt
    handling code starts not before preempt count got updated.

Thanks,

	tglx

------------------>
Dmitry Vyukov (4):
      x86/entry: Remove unwanted instrumentation in common_interrupt()
      kcov: Add interrupt handling self test
      module: Fix KCOV-ignored file name
      x86: Ignore stack unwinding in KCOV


 arch/x86/include/asm/hardirq.h  |  8 ++++++--
 arch/x86/include/asm/idtentry.h |  6 +++---
 arch/x86/kernel/Makefile        |  8 ++++++++
 kernel/kcov.c                   | 31 +++++++++++++++++++++++++++++++
 kernel/module/Makefile          |  2 +-
 lib/Kconfig.debug               |  8 ++++++++
 6 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index c67fa6ad098a..6ffa8b75f4cd 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -69,7 +69,11 @@ extern u64 arch_irq_stat(void);
 #define local_softirq_pending_ref       pcpu_hot.softirq_pending
 
 #if IS_ENABLED(CONFIG_KVM_INTEL)
-static inline void kvm_set_cpu_l1tf_flush_l1d(void)
+/*
+ * This function is called from noinstr interrupt contexts
+ * and must be inlined to not get instrumentation.
+ */
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void)
 {
 	__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
 }
@@ -84,7 +88,7 @@ static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void)
 	return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d);
 }
 #else /* !IS_ENABLED(CONFIG_KVM_INTEL) */
-static inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
 #endif /* IS_ENABLED(CONFIG_KVM_INTEL) */
 
 #endif /* _ASM_X86_HARDIRQ_H */
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index d4f24499b256..ad5c68f0509d 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -212,8 +212,8 @@ __visible noinstr void func(struct pt_regs *regs,			\
 	irqentry_state_t state = irqentry_enter(regs);			\
 	u32 vector = (u32)(u8)error_code;				\
 									\
+	kvm_set_cpu_l1tf_flush_l1d();                                   \
 	instrumentation_begin();					\
-	kvm_set_cpu_l1tf_flush_l1d();					\
 	run_irq_on_irqstack_cond(__##func, regs, vector);		\
 	instrumentation_end();						\
 	irqentry_exit(regs, state);					\
@@ -250,7 +250,6 @@ static void __##func(struct pt_regs *regs);				\
 									\
 static __always_inline void instr_##func(struct pt_regs *regs)		\
 {									\
-	kvm_set_cpu_l1tf_flush_l1d();					\
 	run_sysvec_on_irqstack_cond(__##func, regs);			\
 }									\
 									\
@@ -258,6 +257,7 @@ __visible noinstr void func(struct pt_regs *regs)			\
 {									\
 	irqentry_state_t state = irqentry_enter(regs);			\
 									\
+	kvm_set_cpu_l1tf_flush_l1d();                                   \
 	instrumentation_begin();					\
 	instr_##func (regs);						\
 	instrumentation_end();						\
@@ -288,7 +288,6 @@ static __always_inline void __##func(struct pt_regs *regs);		\
 static __always_inline void instr_##func(struct pt_regs *regs)		\
 {									\
 	__irq_enter_raw();						\
-	kvm_set_cpu_l1tf_flush_l1d();					\
 	__##func (regs);						\
 	__irq_exit_raw();						\
 }									\
@@ -297,6 +296,7 @@ __visible noinstr void func(struct pt_regs *regs)			\
 {									\
 	irqentry_state_t state = irqentry_enter(regs);			\
 									\
+	kvm_set_cpu_l1tf_flush_l1d();                                   \
 	instrumentation_begin();					\
 	instr_##func (regs);						\
 	instrumentation_end();						\
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index a847180836e4..f7918980667a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -35,6 +35,14 @@ KMSAN_SANITIZE_nmi.o					:= n
 # If instrumentation of the following files is enabled, boot hangs during
 # first second.
 KCOV_INSTRUMENT_head$(BITS).o				:= n
+# These are called from save_stack_trace() on debug paths,
+# and produce large amounts of uninteresting coverage.
+KCOV_INSTRUMENT_stacktrace.o				:= n
+KCOV_INSTRUMENT_dumpstack.o				:= n
+KCOV_INSTRUMENT_dumpstack_$(BITS).o			:= n
+KCOV_INSTRUMENT_unwind_orc.o				:= n
+KCOV_INSTRUMENT_unwind_frame.o				:= n
+KCOV_INSTRUMENT_unwind_guess.o				:= n
 
 CFLAGS_irq.o := -I $(src)/../include/asm/trace
 
diff --git a/kernel/kcov.c b/kernel/kcov.c
index f0a69d402066..d9d4a0c04185 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -11,6 +11,7 @@
 #include <linux/fs.h>
 #include <linux/hashtable.h>
 #include <linux/init.h>
+#include <linux/jiffies.h>
 #include <linux/kmsan-checks.h>
 #include <linux/mm.h>
 #include <linux/preempt.h>
@@ -1058,6 +1059,32 @@ u64 kcov_common_handle(void)
 }
 EXPORT_SYMBOL(kcov_common_handle);
 
+#ifdef CONFIG_KCOV_SELFTEST
+static void __init selftest(void)
+{
+	unsigned long start;
+
+	pr_err("running self test\n");
+	/*
+	 * Test that interrupts don't produce spurious coverage.
+	 * The coverage callback filters out interrupt code, but only
+	 * after the handler updates preempt count. Some code periodically
+	 * leaks out of that section and leads to spurious coverage.
+	 * It's hard to call the actual interrupt handler directly,
+	 * so we just loop here for a bit waiting for a timer interrupt.
+	 * We set kcov_mode to enable tracing, but don't setup the area,
+	 * so any attempt to trace will crash. Note: we must not call any
+	 * potentially traced functions in this region.
+	 */
+	start = jiffies;
+	current->kcov_mode = KCOV_MODE_TRACE_PC;
+	while ((jiffies - start) * MSEC_PER_SEC / HZ < 300)
+		;
+	current->kcov_mode = 0;
+	pr_err("done running self test\n");
+}
+#endif
+
 static int __init kcov_init(void)
 {
 	int cpu;
@@ -1077,6 +1104,10 @@ static int __init kcov_init(void)
 	 */
 	debugfs_create_file_unsafe("kcov", 0600, NULL, NULL, &kcov_fops);
 
+#ifdef CONFIG_KCOV_SELFTEST
+	selftest();
+#endif
+
 	return 0;
 }
 
diff --git a/kernel/module/Makefile b/kernel/module/Makefile
index a10b2b9a6fdf..50ffcc413b54 100644
--- a/kernel/module/Makefile
+++ b/kernel/module/Makefile
@@ -5,7 +5,7 @@
 
 # These are called from save_stack_trace() on slub debug path,
 # and produce insane amounts of uninteresting coverage.
-KCOV_INSTRUMENT_module.o := n
+KCOV_INSTRUMENT_main.o := n
 
 obj-y += main.o
 obj-y += strict_rwx.o
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index a30c03a66172..270e367b3e6f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2173,6 +2173,14 @@ config KCOV_IRQ_AREA_SIZE
 	  soft interrupts. This specifies the size of those areas in the
 	  number of unsigned long words.
 
+config KCOV_SELFTEST
+	bool "Perform short selftests on boot"
+	depends on KCOV
+	help
+	  Run short KCOV coverage collection selftests on boot.
+	  On test failure, causes the kernel to panic. Recommended to be
+	  enabled, ensuring critical functionality works as intended.
+
 menuconfig RUNTIME_TESTING_MENU
 	bool "Runtime Testing"
 	default y
Re: [GIT pull] x86/build for v6.12-rc1
Posted by pr-tracker-bot@kernel.org 2 months, 1 week ago
The pull request you sent on Tue, 17 Sep 2024 10:54:05 +0200 (CEST):

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-build-2024-09-17

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/5ba202a7c986fc58dd2fd1571c99667ab2699995

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
[GIT pull] x86/core for v6.12-rc1
Posted by Thomas Gleixner 2 months, 1 week ago
Linus,

please pull the latest x86/core branch from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-core-2024-09-17

up to:  7424fc6b86c8: x86/traps: Enable UBSAN traps on x86

Enable UBSAN traps for x86, which provides better reporting through
metadata encodeded into UD1.

Thanks,

	tglx

------------------>
Gatlin Newhouse (1):
      x86/traps: Enable UBSAN traps on x86


 arch/x86/include/asm/bug.h | 12 ++++++++++
 arch/x86/kernel/traps.c    | 59 ++++++++++++++++++++++++++++++++++++++++++----
 include/linux/ubsan.h      |  5 ++++
 lib/Kconfig.ubsan          |  4 ++--
 4 files changed, 73 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index a3ec87d198ac..806649c7f23d 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -13,6 +13,18 @@
 #define INSN_UD2	0x0b0f
 #define LEN_UD2		2
 
+/*
+ * In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit.
+ */
+#define INSN_ASOP		0x67
+#define OPCODE_ESCAPE		0x0f
+#define SECOND_BYTE_OPCODE_UD1	0xb9
+#define SECOND_BYTE_OPCODE_UD2	0x0b
+
+#define BUG_NONE		0xffff
+#define BUG_UD1			0xfffe
+#define BUG_UD2			0xfffd
+
 #ifdef CONFIG_GENERIC_BUG
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 4fa0b17e5043..415881607c5d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -42,6 +42,7 @@
 #include <linux/hardirq.h>
 #include <linux/atomic.h>
 #include <linux/iommu.h>
+#include <linux/ubsan.h>
 
 #include <asm/stacktrace.h>
 #include <asm/processor.h>
@@ -91,6 +92,47 @@ __always_inline int is_valid_bugaddr(unsigned long addr)
 	return *(unsigned short *)addr == INSN_UD2;
 }
 
+/*
+ * Check for UD1 or UD2, accounting for Address Size Override Prefixes.
+ * If it's a UD1, get the ModRM byte to pass along to UBSan.
+ */
+__always_inline int decode_bug(unsigned long addr, u32 *imm)
+{
+	u8 v;
+
+	if (addr < TASK_SIZE_MAX)
+		return BUG_NONE;
+
+	v = *(u8 *)(addr++);
+	if (v == INSN_ASOP)
+		v = *(u8 *)(addr++);
+	if (v != OPCODE_ESCAPE)
+		return BUG_NONE;
+
+	v = *(u8 *)(addr++);
+	if (v == SECOND_BYTE_OPCODE_UD2)
+		return BUG_UD2;
+
+	if (!IS_ENABLED(CONFIG_UBSAN_TRAP) || v != SECOND_BYTE_OPCODE_UD1)
+		return BUG_NONE;
+
+	/* Retrieve the immediate (type value) for the UBSAN UD1 */
+	v = *(u8 *)(addr++);
+	if (X86_MODRM_RM(v) == 4)
+		addr++;
+
+	*imm = 0;
+	if (X86_MODRM_MOD(v) == 1)
+		*imm = *(u8 *)addr;
+	else if (X86_MODRM_MOD(v) == 2)
+		*imm = *(u32 *)addr;
+	else
+		WARN_ONCE(1, "Unexpected MODRM_MOD: %u\n", X86_MODRM_MOD(v));
+
+	return BUG_UD1;
+}
+
+
 static nokprobe_inline int
 do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str,
 		  struct pt_regs *regs,	long error_code)
@@ -216,6 +258,8 @@ static inline void handle_invalid_op(struct pt_regs *regs)
 static noinstr bool handle_bug(struct pt_regs *regs)
 {
 	bool handled = false;
+	int ud_type;
+	u32 imm;
 
 	/*
 	 * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug()
@@ -223,7 +267,8 @@ static noinstr bool handle_bug(struct pt_regs *regs)
 	 * irqentry_enter().
 	 */
 	kmsan_unpoison_entry_regs(regs);
-	if (!is_valid_bugaddr(regs->ip))
+	ud_type = decode_bug(regs->ip, &imm);
+	if (ud_type == BUG_NONE)
 		return handled;
 
 	/*
@@ -236,10 +281,14 @@ static noinstr bool handle_bug(struct pt_regs *regs)
 	 */
 	if (regs->flags & X86_EFLAGS_IF)
 		raw_local_irq_enable();
-	if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN ||
-	    handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
-		regs->ip += LEN_UD2;
-		handled = true;
+	if (ud_type == BUG_UD2) {
+		if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN ||
+		    handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
+			regs->ip += LEN_UD2;
+			handled = true;
+		}
+	} else if (IS_ENABLED(CONFIG_UBSAN_TRAP)) {
+		pr_crit("%s at %pS\n", report_ubsan_failure(regs, imm), (void *)regs->ip);
 	}
 	if (regs->flags & X86_EFLAGS_IF)
 		raw_local_irq_disable();
diff --git a/include/linux/ubsan.h b/include/linux/ubsan.h
index bff7445498de..d8219cbe09ff 100644
--- a/include/linux/ubsan.h
+++ b/include/linux/ubsan.h
@@ -4,6 +4,11 @@
 
 #ifdef CONFIG_UBSAN_TRAP
 const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type);
+#else
+static inline const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type)
+{
+	return NULL;
+}
 #endif
 
 #endif
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index bdda600f8dfb..1d4aa7a83b3a 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -29,8 +29,8 @@ config UBSAN_TRAP
 
 	  Also note that selecting Y will cause your kernel to Oops
 	  with an "illegal instruction" error with no further details
-	  when a UBSAN violation occurs. (Except on arm64, which will
-	  report which Sanitizer failed.) This may make it hard to
+	  when a UBSAN violation occurs. (Except on arm64 and x86, which
+	  will report which Sanitizer failed.) This may make it hard to
 	  determine whether an Oops was caused by UBSAN or to figure
 	  out the details of a UBSAN violation. It makes the kernel log
 	  output less useful for bug reports.
Re: [GIT pull] x86/core for v6.12-rc1
Posted by pr-tracker-bot@kernel.org 2 months, 1 week ago
The pull request you sent on Tue, 17 Sep 2024 10:54:09 +0200 (CEST):

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-core-2024-09-17

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/dea435d397ab90d8e682e4162a5b9835d24b1e3a

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
[GIT pull] x86/cleanups for v6.12-rc1
Posted by Thomas Gleixner 2 months, 1 week ago
Linus,

please pull the latest x86/cleanups branch from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-cleanups-2024-09-17

up to:  a678164aadbf: x86/EISA: Dereference memory directly instead of using readl()

A set of cleanups across x86:

  - Use memremap() for the EISA probe instrad of ioremap(). EISA is
    strictly memory and not MMIO

  - Cleanups and enhancement all over the place

Thanks,

	tglx

------------------>
Gaosheng Cui (1):
      x86/mtrr: Remove obsolete declaration for mtrr_bp_restore()

Kai Huang (3):
      x86/sgx: Fix a W=1 build warning in function comment
      x86/kexec: Fix a comment of swap_pages() assembly
      x86/kexec: Add comments around swap_pages() assembly to improve readability

Maciej W. Rozycki (2):
      x86/EISA: Use memremap() to probe for the EISA BIOS signature
      x86/EISA: Dereference memory directly instead of using readl()

Nathan Chancellor (1):
      x86/cpu_entry_area: Annotate percpu_setup_exception_stacks() as __init

Uros Bizjak (1):
      x86/boot/64: Strip percpu address space when setting up GDT descriptors

WangYuli (1):
      x86/cpu: Clarify the error message when BIOS does not support SGX

Yue Haibing (1):
      x86/extable: Remove unused declaration fixup_bug()


 arch/x86/include/asm/extable.h       |  1 -
 arch/x86/include/asm/mtrr.h          |  2 --
 arch/x86/kernel/cpu/feat_ctl.c       |  2 +-
 arch/x86/kernel/cpu/sgx/main.c       |  2 +-
 arch/x86/kernel/eisa.c               |  8 ++++----
 arch/x86/kernel/head64.c             |  3 ++-
 arch/x86/kernel/relocate_kernel_64.S | 10 +++++++---
 arch/x86/mm/cpu_entry_area.c         |  2 +-
 8 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h
index eeed395c3177..a0e0c6b50155 100644
--- a/arch/x86/include/asm/extable.h
+++ b/arch/x86/include/asm/extable.h
@@ -37,7 +37,6 @@ struct pt_regs;
 
 extern int fixup_exception(struct pt_regs *regs, int trapnr,
 			   unsigned long error_code, unsigned long fault_addr);
-extern int fixup_bug(struct pt_regs *regs, int trapnr);
 extern int ex_get_fixup_type(unsigned long ip);
 extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
 
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index 090d658a85a6..4218248083d9 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -69,7 +69,6 @@ extern int mtrr_add_page(unsigned long base, unsigned long size,
 			 unsigned int type, bool increment);
 extern int mtrr_del(int reg, unsigned long base, unsigned long size);
 extern int mtrr_del_page(int reg, unsigned long base, unsigned long size);
-extern void mtrr_bp_restore(void);
 extern int mtrr_trim_uncached_memory(unsigned long end_pfn);
 extern int amd_special_default_mtrr(void);
 void mtrr_disable(void);
@@ -117,7 +116,6 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
 	return 0;
 }
 #define mtrr_bp_init() do {} while (0)
-#define mtrr_bp_restore() do {} while (0)
 #define mtrr_disable() do {} while (0)
 #define mtrr_enable() do {} while (0)
 #define mtrr_generic_set_state() do {} while (0)
diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c
index 1640ae76548f..4a4118784c13 100644
--- a/arch/x86/kernel/cpu/feat_ctl.c
+++ b/arch/x86/kernel/cpu/feat_ctl.c
@@ -188,7 +188,7 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c)
 update_sgx:
 	if (!(msr & FEAT_CTL_SGX_ENABLED)) {
 		if (enable_sgx_kvm || enable_sgx_driver)
-			pr_err_once("SGX disabled by BIOS.\n");
+			pr_err_once("SGX disabled or unsupported by BIOS.\n");
 		clear_cpu_cap(c, X86_FEATURE_SGX);
 		return;
 	}
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 27892e57c4ef..1a000acd933a 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -732,7 +732,7 @@ int arch_memory_failure(unsigned long pfn, int flags)
 	return 0;
 }
 
-/**
+/*
  * A section metric is concatenated in a way that @low bits 12-31 define the
  * bits 12-31 of the metric and @high bits 0-19 define the bits 32-51 of the
  * metric.
diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c
index 53935b4d62e3..9535a6507db7 100644
--- a/arch/x86/kernel/eisa.c
+++ b/arch/x86/kernel/eisa.c
@@ -11,15 +11,15 @@
 
 static __init int eisa_bus_probe(void)
 {
-	void __iomem *p;
+	u32 *p;
 
 	if ((xen_pv_domain() && !xen_initial_domain()) || cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
 		return 0;
 
-	p = ioremap(0x0FFFD9, 4);
-	if (p && readl(p) == 'E' + ('I' << 8) + ('S' << 16) + ('A' << 24))
+	p = memremap(0x0FFFD9, 4, MEMREMAP_WB);
+	if (p && *p == 'E' + ('I' << 8) + ('S' << 16) + ('A' << 24))
 		EISA_bus = 1;
-	iounmap(p);
+	memunmap(p);
 	return 0;
 }
 subsys_initcall(eisa_bus_probe);
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index a817ed0724d1..4b9d4557fc94 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -559,10 +559,11 @@ void early_setup_idt(void)
  */
 void __head startup_64_setup_gdt_idt(void)
 {
+	struct desc_struct *gdt = (void *)(__force unsigned long)init_per_cpu_var(gdt_page.gdt);
 	void *handler = NULL;
 
 	struct desc_ptr startup_gdt_descr = {
-		.address = (unsigned long)&RIP_REL_REF(init_per_cpu_var(gdt_page.gdt)),
+		.address = (unsigned long)&RIP_REL_REF(*gdt),
 		.size    = GDT_SIZE - 1,
 	};
 
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 042c9a0334e9..e9e88c342f75 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -170,6 +170,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
 	wbinvd
 .Lsme_off:
 
+	/* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */
 	movq	%rcx, %r11
 	call	swap_pages
 
@@ -258,7 +259,7 @@ SYM_CODE_END(virtual_mapped)
 	/* Do the copies */
 SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
 	UNWIND_HINT_END_OF_STACK
-	movq	%rdi, %rcx	/* Put the page_list in %rcx */
+	movq	%rdi, %rcx	/* Put the indirection_page in %rcx */
 	xorl	%edi, %edi
 	xorl	%esi, %esi
 	jmp	1f
@@ -289,18 +290,21 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
 	movq	%rcx,   %rsi  /* For ever source page do a copy */
 	andq	$0xfffffffffffff000, %rsi
 
-	movq	%rdi, %rdx
-	movq	%rsi, %rax
+	movq	%rdi, %rdx    /* Save destination page to %rdx */
+	movq	%rsi, %rax    /* Save source page to %rax */
 
+	/* copy source page to swap page */
 	movq	%r10, %rdi
 	movl	$512, %ecx
 	rep ; movsq
 
+	/* copy destination page to source page */
 	movq	%rax, %rdi
 	movq	%rdx, %rsi
 	movl	$512, %ecx
 	rep ; movsq
 
+	/* copy swap page to destination page */
 	movq	%rdx, %rdi
 	movq	%r10, %rsi
 	movl	$512, %ecx
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index e91500a80963..575f863f3c75 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -164,7 +164,7 @@ static void __init percpu_setup_exception_stacks(unsigned int cpu)
 	}
 }
 #else
-static inline void percpu_setup_exception_stacks(unsigned int cpu)
+static void __init percpu_setup_exception_stacks(unsigned int cpu)
 {
 	struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
 
Re: [GIT pull] x86/cleanups for v6.12-rc1
Posted by pr-tracker-bot@kernel.org 2 months, 1 week ago
The pull request you sent on Tue, 17 Sep 2024 10:54:07 +0200 (CEST):

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-cleanups-2024-09-17

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/0279aa780df4362f218b5645c07e5265859937f6

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
[GIT pull] x86/platform for v6.12-rc1
Posted by Thomas Gleixner 2 months, 1 week ago
Linus,

please pull the latest x86/platform branch from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-platform-2024-09-17

up to:  22f426972655: x86/platform/uv: Remove unused declaration uv_irq_2_mmr_info()

Remove a stale declaration from the UV platform code

Thanks,

	tglx

------------------>
Yue Haibing (1):
      x86/platform/uv: Remove unused declaration uv_irq_2_mmr_info()


 arch/x86/include/asm/uv/uv_irq.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h
index d6b17c760622..1876b5edd142 100644
--- a/arch/x86/include/asm/uv/uv_irq.h
+++ b/arch/x86/include/asm/uv/uv_irq.h
@@ -31,7 +31,6 @@ enum {
 	UV_AFFINITY_CPU
 };
 
-extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
 extern int uv_setup_irq(char *, int, int, unsigned long, int);
 extern void uv_teardown_irq(unsigned int);
 
Re: [GIT pull] x86/platform for v6.12-rc1
Posted by pr-tracker-bot@kernel.org 2 months, 1 week ago
The pull request you sent on Tue, 17 Sep 2024 10:54:17 +0200 (CEST):

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-platform-2024-09-17

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/a3233da6c014b421f16dce27955fa4a803b50474

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
[GIT pull] x86/fred for v6.12-rc1
Posted by Thomas Gleixner 2 months, 1 week ago
Linus,

please pull the latest x86/fred branch from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-fred-2024-09-17

up to:  fe85ee391966: x86/entry: Set FRED RSP0 on return to userspace instead of context switch

Updates for x86 FRED:

  - Enable FRED right after init_mem_mapping() because at that point the
    early IDT fault handler is replaced by the real fault handler. The real
    fault handler retrieves the faulting address from the stack frame and
    not from CR2 when the FRED feature is set. But that obviously only
    works when FRED is enabled in the CPU as well.

  - Set SS to __KERNEL_DS when enabling FRED to prevent a corner case where
    ERETS can observe a SS mismatch and raises a #GP.


Thanks,

	tglx

------------------>
Andrew Cooper (1):
      x86/msr: Switch between WRMSRNS and WRMSR with the alternatives mechanism

Xin Li (Intel) (6):
      x86/fred: Parse cmdline param "fred=" in cpu_parse_early_param()
      x86/fred: Move FRED RSP initialization into a separate function
      x86/fred: Enable FRED right after init_mem_mapping()
      x86/fred: Set SS to __KERNEL_DS when enabling FRED
      x86/entry: Test ti_work for zero before processing individual bits
      x86/entry: Set FRED RSP0 on return to userspace instead of context switch


 arch/x86/include/asm/entry-common.h | 13 +++++++++--
 arch/x86/include/asm/fred.h         | 23 ++++++++++++++++++-
 arch/x86/include/asm/msr.h          | 25 +++++++++------------
 arch/x86/include/asm/processor.h    |  3 ++-
 arch/x86/include/asm/switch_to.h    |  6 +----
 arch/x86/kernel/cpu/common.c        | 22 ++++++++++++++++--
 arch/x86/kernel/cpu/cpuid-deps.c    |  1 -
 arch/x86/kernel/fred.c              | 45 +++++++++++++++++++++++++++++--------
 arch/x86/kernel/setup.c             |  7 +++++-
 arch/x86/kernel/smpboot.c           |  2 +-
 arch/x86/kernel/traps.c             | 28 +----------------------
 11 files changed, 111 insertions(+), 64 deletions(-)

diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index fb2809b20b0a..77d20555e04d 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -8,6 +8,7 @@
 #include <asm/nospec-branch.h>
 #include <asm/io_bitmap.h>
 #include <asm/fpu/api.h>
+#include <asm/fred.h>
 
 /* Check that the stack and regs on entry from user mode are sane. */
 static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
@@ -44,8 +45,7 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
 }
 #define arch_enter_from_user_mode arch_enter_from_user_mode
 
-static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
-						  unsigned long ti_work)
+static inline void arch_exit_work(unsigned long ti_work)
 {
 	if (ti_work & _TIF_USER_RETURN_NOTIFY)
 		fire_user_return_notifiers();
@@ -56,6 +56,15 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
 	fpregs_assert_state_consistent();
 	if (unlikely(ti_work & _TIF_NEED_FPU_LOAD))
 		switch_fpu_return();
+}
+
+static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
+						  unsigned long ti_work)
+{
+	if (IS_ENABLED(CONFIG_X86_DEBUG_FPU) || unlikely(ti_work))
+		arch_exit_work(ti_work);
+
+	fred_update_rsp0();
 
 #ifdef CONFIG_COMPAT
 	/*
diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h
index e86c7ba32435..25ca00bd70e8 100644
--- a/arch/x86/include/asm/fred.h
+++ b/arch/x86/include/asm/fred.h
@@ -36,6 +36,7 @@
 
 #ifdef CONFIG_X86_FRED
 #include <linux/kernel.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/ptrace.h>
 
@@ -84,13 +85,33 @@ static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int
 }
 
 void cpu_init_fred_exceptions(void);
+void cpu_init_fred_rsps(void);
 void fred_complete_exception_setup(void);
 
+DECLARE_PER_CPU(unsigned long, fred_rsp0);
+
+static __always_inline void fred_sync_rsp0(unsigned long rsp0)
+{
+	__this_cpu_write(fred_rsp0, rsp0);
+}
+
+static __always_inline void fred_update_rsp0(void)
+{
+	unsigned long rsp0 = (unsigned long) task_stack_page(current) + THREAD_SIZE;
+
+	if (cpu_feature_enabled(X86_FEATURE_FRED) && (__this_cpu_read(fred_rsp0) != rsp0)) {
+		wrmsrns(MSR_IA32_FRED_RSP0, rsp0);
+		__this_cpu_write(fred_rsp0, rsp0);
+	}
+}
 #else /* CONFIG_X86_FRED */
 static __always_inline unsigned long fred_event_data(struct pt_regs *regs) { return 0; }
 static inline void cpu_init_fred_exceptions(void) { }
+static inline void cpu_init_fred_rsps(void) { }
 static inline void fred_complete_exception_setup(void) { }
-static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
+static inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
+static inline void fred_sync_rsp0(unsigned long rsp0) { }
+static inline void fred_update_rsp0(void) { }
 #endif /* CONFIG_X86_FRED */
 #endif /* !__ASSEMBLY__ */
 
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index d642037f9ed5..001853541f1e 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -99,19 +99,6 @@ static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high)
 		     : : "c" (msr), "a"(low), "d" (high) : "memory");
 }
 
-/*
- * WRMSRNS behaves exactly like WRMSR with the only difference being
- * that it is not a serializing instruction by default.
- */
-static __always_inline void __wrmsrns(u32 msr, u32 low, u32 high)
-{
-	/* Instruction opcode for WRMSRNS; supported in binutils >= 2.40. */
-	asm volatile("1: .byte 0x0f,0x01,0xc6\n"
-		     "2:\n"
-		     _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
-		     : : "c" (msr), "a"(low), "d" (high));
-}
-
 #define native_rdmsr(msr, val1, val2)			\
 do {							\
 	u64 __val = __rdmsr((msr));			\
@@ -312,9 +299,19 @@ do {							\
 
 #endif	/* !CONFIG_PARAVIRT_XXL */
 
+/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */
+#define WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6)
+
+/* Non-serializing WRMSR, when available.  Falls back to a serializing WRMSR. */
 static __always_inline void wrmsrns(u32 msr, u64 val)
 {
-	__wrmsrns(msr, val, val >> 32);
+	/*
+	 * WRMSR is 2 bytes.  WRMSRNS is 3 bytes.  Pad WRMSR with a redundant
+	 * DS prefix to avoid a trailing NOP.
+	 */
+	asm volatile("1: " ALTERNATIVE("ds wrmsr", WRMSRNS, X86_FEATURE_WRMSRNS)
+		     "2: " _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
+		     : : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)));
 }
 
 /*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index a75a07f4931f..399f7d1c4c61 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -582,7 +582,8 @@ extern void switch_gdt_and_percpu_base(int);
 extern void load_direct_gdt(int);
 extern void load_fixmap_gdt(int);
 extern void cpu_init(void);
-extern void cpu_init_exception_handling(void);
+extern void cpu_init_exception_handling(bool boot_cpu);
+extern void cpu_init_replace_early_idt(void);
 extern void cr4_init(void);
 
 extern void set_task_blockstep(struct task_struct *task, bool on);
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index c3bd0c0758c9..75248546403d 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -70,13 +70,9 @@ static inline void update_task_stack(struct task_struct *task)
 #ifdef CONFIG_X86_32
 	this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0);
 #else
-	if (cpu_feature_enabled(X86_FEATURE_FRED)) {
-		/* WRMSRNS is a baseline feature for FRED. */
-		wrmsrns(MSR_IA32_FRED_RSP0, (unsigned long)task_stack_page(task) + THREAD_SIZE);
-	} else if (cpu_feature_enabled(X86_FEATURE_XENPV)) {
+	if (!cpu_feature_enabled(X86_FEATURE_FRED) && cpu_feature_enabled(X86_FEATURE_XENPV))
 		/* Xen PV enters the kernel on the thread stack. */
 		load_sp0(task_top_of_stack(task));
-	}
 #endif
 }
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d4e539d4e158..a4735d9b5a1d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1510,6 +1510,11 @@ static void __init cpu_parse_early_param(void)
 	if (cmdline_find_option_bool(boot_command_line, "nousershstk"))
 		setup_clear_cpu_cap(X86_FEATURE_USER_SHSTK);
 
+	/* Minimize the gap between FRED is available and available but disabled. */
+	arglen = cmdline_find_option(boot_command_line, "fred", arg, sizeof(arg));
+	if (arglen != 2 || strncmp(arg, "on", 2))
+		setup_clear_cpu_cap(X86_FEATURE_FRED);
+
 	arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
 	if (arglen <= 0)
 		return;
@@ -2171,7 +2176,7 @@ static inline void tss_setup_io_bitmap(struct tss_struct *tss)
  * Setup everything needed to handle exceptions from the IDT, including the IST
  * exceptions which use paranoid_entry().
  */
-void cpu_init_exception_handling(void)
+void cpu_init_exception_handling(bool boot_cpu)
 {
 	struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
 	int cpu = raw_smp_processor_id();
@@ -2190,10 +2195,23 @@ void cpu_init_exception_handling(void)
 	/* GHCB needs to be setup to handle #VC. */
 	setup_ghcb();
 
+	if (cpu_feature_enabled(X86_FEATURE_FRED)) {
+		/* The boot CPU has enabled FRED during early boot */
+		if (!boot_cpu)
+			cpu_init_fred_exceptions();
+
+		cpu_init_fred_rsps();
+	} else {
+		load_current_idt();
+	}
+}
+
+void __init cpu_init_replace_early_idt(void)
+{
 	if (cpu_feature_enabled(X86_FEATURE_FRED))
 		cpu_init_fred_exceptions();
 	else
-		load_current_idt();
+		idt_setup_early_pf();
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index b7d9f530ae16..8bd84114c2d9 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -83,7 +83,6 @@ static const struct cpuid_dep cpuid_deps[] = {
 	{ X86_FEATURE_AMX_TILE,			X86_FEATURE_XFD       },
 	{ X86_FEATURE_SHSTK,			X86_FEATURE_XSAVES    },
 	{ X86_FEATURE_FRED,			X86_FEATURE_LKGS      },
-	{ X86_FEATURE_FRED,			X86_FEATURE_WRMSRNS   },
 	{}
 };
 
diff --git a/arch/x86/kernel/fred.c b/arch/x86/kernel/fred.c
index 4bcd8791ad96..8d32c3f48abc 100644
--- a/arch/x86/kernel/fred.c
+++ b/arch/x86/kernel/fred.c
@@ -21,17 +21,53 @@
 
 #define FRED_STKLVL(vector, lvl)	((lvl) << (2 * (vector)))
 
+DEFINE_PER_CPU(unsigned long, fred_rsp0);
+EXPORT_PER_CPU_SYMBOL(fred_rsp0);
+
 void cpu_init_fred_exceptions(void)
 {
 	/* When FRED is enabled by default, remove this log message */
 	pr_info("Initialize FRED on CPU%d\n", smp_processor_id());
 
+	/*
+	 * If a kernel event is delivered before a CPU goes to user level for
+	 * the first time, its SS is NULL thus NULL is pushed into the SS field
+	 * of the FRED stack frame.  But before ERETS is executed, the CPU may
+	 * context switch to another task and go to user level.  Then when the
+	 * CPU comes back to kernel mode, SS is changed to __KERNEL_DS.  Later
+	 * when ERETS is executed to return from the kernel event handler, a #GP
+	 * fault is generated because SS doesn't match the SS saved in the FRED
+	 * stack frame.
+	 *
+	 * Initialize SS to __KERNEL_DS when enabling FRED to avoid such #GPs.
+	 */
+	loadsegment(ss, __KERNEL_DS);
+
 	wrmsrl(MSR_IA32_FRED_CONFIG,
 	       /* Reserve for CALL emulation */
 	       FRED_CONFIG_REDZONE |
 	       FRED_CONFIG_INT_STKLVL(0) |
 	       FRED_CONFIG_ENTRYPOINT(asm_fred_entrypoint_user));
 
+	wrmsrl(MSR_IA32_FRED_STKLVLS, 0);
+	wrmsrl(MSR_IA32_FRED_RSP0, 0);
+	wrmsrl(MSR_IA32_FRED_RSP1, 0);
+	wrmsrl(MSR_IA32_FRED_RSP2, 0);
+	wrmsrl(MSR_IA32_FRED_RSP3, 0);
+
+	/* Enable FRED */
+	cr4_set_bits(X86_CR4_FRED);
+	/* Any further IDT use is a bug */
+	idt_invalidate();
+
+	/* Use int $0x80 for 32-bit system calls in FRED mode */
+	setup_clear_cpu_cap(X86_FEATURE_SYSENTER32);
+	setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
+}
+
+/* Must be called after setup_cpu_entry_areas() */
+void cpu_init_fred_rsps(void)
+{
 	/*
 	 * The purpose of separate stacks for NMI, #DB and #MC *in the kernel*
 	 * (remember that user space faults are always taken on stack level 0)
@@ -47,13 +83,4 @@ void cpu_init_fred_exceptions(void)
 	wrmsrl(MSR_IA32_FRED_RSP1, __this_cpu_ist_top_va(DB));
 	wrmsrl(MSR_IA32_FRED_RSP2, __this_cpu_ist_top_va(NMI));
 	wrmsrl(MSR_IA32_FRED_RSP3, __this_cpu_ist_top_va(DF));
-
-	/* Enable FRED */
-	cr4_set_bits(X86_CR4_FRED);
-	/* Any further IDT use is a bug */
-	idt_invalidate();
-
-	/* Use int $0x80 for 32-bit system calls in FRED mode */
-	setup_clear_cpu_cap(X86_FEATURE_SYSENTER32);
-	setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
 }
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 6129dc2ba784..f1fea506e20f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1039,7 +1039,12 @@ void __init setup_arch(char **cmdline_p)
 
 	init_mem_mapping();
 
-	idt_setup_early_pf();
+	/*
+	 * init_mem_mapping() relies on the early IDT page fault handling.
+	 * Now either enable FRED or install the real page fault handler
+	 * for 64-bit in the IDT.
+	 */
+	cpu_init_replace_early_idt();
 
 	/*
 	 * Update mmu_cr4_features (and, indirectly, trampoline_cr4_features)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0c35207320cb..dc4fff8fccce 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -246,7 +246,7 @@ static void notrace start_secondary(void *unused)
 		__flush_tlb_all();
 	}
 
-	cpu_init_exception_handling();
+	cpu_init_exception_handling(false);
 
 	/*
 	 * Load the microcode before reaching the AP alive synchronization
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 4fa0b17e5043..197d5888b0e2 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -1402,34 +1402,8 @@ DEFINE_IDTENTRY_SW(iret_error)
 }
 #endif
 
-/* Do not enable FRED by default yet. */
-static bool enable_fred __ro_after_init = false;
-
-#ifdef CONFIG_X86_FRED
-static int __init fred_setup(char *str)
-{
-	if (!str)
-		return -EINVAL;
-
-	if (!cpu_feature_enabled(X86_FEATURE_FRED))
-		return 0;
-
-	if (!strcmp(str, "on"))
-		enable_fred = true;
-	else if (!strcmp(str, "off"))
-		enable_fred = false;
-	else
-		pr_warn("invalid FRED option: 'fred=%s'\n", str);
-	return 0;
-}
-early_param("fred", fred_setup);
-#endif
-
 void __init trap_init(void)
 {
-	if (cpu_feature_enabled(X86_FEATURE_FRED) && !enable_fred)
-		setup_clear_cpu_cap(X86_FEATURE_FRED);
-
 	/* Init cpu_entry_area before IST entries are set up */
 	setup_cpu_entry_areas();
 
@@ -1437,7 +1411,7 @@ void __init trap_init(void)
 	sev_es_init_vc_handling();
 
 	/* Initialize TSS before setting up traps so ISTs work */
-	cpu_init_exception_handling();
+	cpu_init_exception_handling(true);
 
 	/* Setup traps as cpu_init() might #GP */
 	if (!cpu_feature_enabled(X86_FEATURE_FRED))
Re: [GIT pull] x86/fred for v6.12-rc1
Posted by pr-tracker-bot@kernel.org 2 months, 1 week ago
The pull request you sent on Tue, 17 Sep 2024 10:54:12 +0200 (CEST):

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-fred-2024-09-17

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/b136021126b99072da705f693a8be07c6285e47c

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
[GIT pull] x86/timers for v6.12-rc1
Posted by Thomas Gleixner 2 months, 1 week ago
Linus,

please pull the latest x86/timers branch from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-timers-2024-09-17

up to:  e7ff4ebffe3b: x86/tsc: Check for sockets instead of CPUs to make code match comment

Updates for x86 timers:

  - Use the topology information of number of packages for making the
    decision about TSC trust instead of using the number of online nodes
    which is not reflecting the real topology.

  - Stop the PIT timer 0 when its not in use as to stop pointless emulation
    in the VMM.

  - Fix the PIT timer stop sequence for timer 0 so it truly stops both real
    hardware and buggy VMM emulations.

Thanks,

	tglx

------------------>
David Woodhouse (2):
      x86/i8253: Disable PIT timer 0 when not in use
      clockevents/drivers/i8253: Fix stop sequence for timer 0

Feng Tang (1):
      x86/tsc: Use topology_max_packages() to get package number

Paul E. McKenney (1):
      x86/tsc: Check for sockets instead of CPUs to make code match comment


 arch/x86/kernel/cpu/mshyperv.c | 11 ----------
 arch/x86/kernel/i8253.c        | 11 ++++++++--
 arch/x86/kernel/tsc.c          | 10 ++++-----
 drivers/clocksource/i8253.c    | 49 +++++++++++++++++++++++++++++-------------
 include/linux/i8253.h          |  2 +-
 5 files changed, 48 insertions(+), 35 deletions(-)

diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index e0fd57a8ba84..3d4237f27569 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -16,7 +16,6 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/kexec.h>
-#include <linux/i8253.h>
 #include <linux/random.h>
 #include <asm/processor.h>
 #include <asm/hypervisor.h>
@@ -522,16 +521,6 @@ static void __init ms_hyperv_init_platform(void)
 	if (efi_enabled(EFI_BOOT))
 		x86_platform.get_nmi_reason = hv_get_nmi_reason;
 
-	/*
-	 * Hyper-V VMs have a PIT emulation quirk such that zeroing the
-	 * counter register during PIT shutdown restarts the PIT. So it
-	 * continues to interrupt @18.2 HZ. Setting i8253_clear_counter
-	 * to false tells pit_shutdown() not to zero the counter so that
-	 * the PIT really is shutdown. Generation 2 VMs don't have a PIT,
-	 * and setting this value has no effect.
-	 */
-	i8253_clear_counter_on_shutdown = false;
-
 #if IS_ENABLED(CONFIG_HYPERV)
 	if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) ||
 	    ms_hyperv.paravisor_present)
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 2b7999a1a50a..80e262bb627f 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -8,6 +8,7 @@
 #include <linux/timex.h>
 #include <linux/i8253.h>
 
+#include <asm/hypervisor.h>
 #include <asm/apic.h>
 #include <asm/hpet.h>
 #include <asm/time.h>
@@ -39,9 +40,15 @@ static bool __init use_pit(void)
 
 bool __init pit_timer_init(void)
 {
-	if (!use_pit())
+	if (!use_pit()) {
+		/*
+		 * Don't just ignore the PIT. Ensure it's stopped, because
+		 * VMMs otherwise steal CPU time just to pointlessly waggle
+		 * the (masked) IRQ.
+		 */
+		clockevent_i8253_disable();
 		return false;
-
+	}
 	clockevent_i8253_init(true);
 	global_clock_event = &i8253_clockevent;
 	return true;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index d4462fb26299..dfe6847fd99e 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -28,6 +28,7 @@
 #include <asm/apic.h>
 #include <asm/cpu_device_id.h>
 #include <asm/i8259.h>
+#include <asm/topology.h>
 #include <asm/uv/uv.h>
 
 unsigned int __read_mostly cpu_khz;	/* TSC clocks / usec, not used here */
@@ -1253,15 +1254,12 @@ static void __init check_system_tsc_reliable(void)
 	 *  - TSC which does not stop in C-States
 	 *  - the TSC_ADJUST register which allows to detect even minimal
 	 *    modifications
-	 *  - not more than two sockets. As the number of sockets cannot be
-	 *    evaluated at the early boot stage where this has to be
-	 *    invoked, check the number of online memory nodes as a
-	 *    fallback solution which is an reasonable estimate.
+	 *  - not more than four packages
 	 */
 	if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
 	    boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
 	    boot_cpu_has(X86_FEATURE_TSC_ADJUST) &&
-	    nr_online_nodes <= 4)
+	    topology_max_packages() <= 4)
 		tsc_disable_clocksource_watchdog();
 }
 
@@ -1290,7 +1288,7 @@ int unsynchronized_tsc(void)
 	 */
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
 		/* assume multi socket systems are not synchronized: */
-		if (num_possible_cpus() > 1)
+		if (topology_max_packages() > 1)
 			return 1;
 	}
 
diff --git a/drivers/clocksource/i8253.c b/drivers/clocksource/i8253.c
index d4350bb10b83..39f7c2d736d1 100644
--- a/drivers/clocksource/i8253.c
+++ b/drivers/clocksource/i8253.c
@@ -20,13 +20,6 @@
 DEFINE_RAW_SPINLOCK(i8253_lock);
 EXPORT_SYMBOL(i8253_lock);
 
-/*
- * Handle PIT quirk in pit_shutdown() where zeroing the counter register
- * restarts the PIT, negating the shutdown. On platforms with the quirk,
- * platform specific code can set this to false.
- */
-bool i8253_clear_counter_on_shutdown __ro_after_init = true;
-
 #ifdef CONFIG_CLKSRC_I8253
 /*
  * Since the PIT overflows every tick, its not very useful
@@ -108,21 +101,47 @@ int __init clocksource_i8253_init(void)
 #endif
 
 #ifdef CONFIG_CLKEVT_I8253
-static int pit_shutdown(struct clock_event_device *evt)
+void clockevent_i8253_disable(void)
 {
-	if (!clockevent_state_oneshot(evt) && !clockevent_state_periodic(evt))
-		return 0;
-
 	raw_spin_lock(&i8253_lock);
 
+	/*
+	 * Writing the MODE register should stop the counter, according to
+	 * the datasheet. This appears to work on real hardware (well, on
+	 * modern Intel and AMD boxes; I didn't dig the Pegasos out of the
+	 * shed).
+	 *
+	 * However, some virtual implementations differ, and the MODE change
+	 * doesn't have any effect until either the counter is written (KVM
+	 * in-kernel PIT) or the next interrupt (QEMU). And in those cases,
+	 * it may not stop the *count*, only the interrupts. Although in
+	 * the virt case, that probably doesn't matter, as the value of the
+	 * counter will only be calculated on demand if the guest reads it;
+	 * it's the interrupts which cause steal time.
+	 *
+	 * Hyper-V apparently has a bug where even in mode 0, the IRQ keeps
+	 * firing repeatedly if the counter is running. But it *does* do the
+	 * right thing when the MODE register is written.
+	 *
+	 * So: write the MODE and then load the counter, which ensures that
+	 * the IRQ is stopped on those buggy virt implementations. And then
+	 * write the MODE again, which is the right way to stop it.
+	 */
 	outb_p(0x30, PIT_MODE);
+	outb_p(0, PIT_CH0);
+	outb_p(0, PIT_CH0);
 
-	if (i8253_clear_counter_on_shutdown) {
-		outb_p(0, PIT_CH0);
-		outb_p(0, PIT_CH0);
-	}
+	outb_p(0x30, PIT_MODE);
 
 	raw_spin_unlock(&i8253_lock);
+}
+
+static int pit_shutdown(struct clock_event_device *evt)
+{
+	if (!clockevent_state_oneshot(evt) && !clockevent_state_periodic(evt))
+		return 0;
+
+	clockevent_i8253_disable();
 	return 0;
 }
 
diff --git a/include/linux/i8253.h b/include/linux/i8253.h
index 8336b2f6f834..56c280eb2d4f 100644
--- a/include/linux/i8253.h
+++ b/include/linux/i8253.h
@@ -21,9 +21,9 @@
 #define PIT_LATCH	((PIT_TICK_RATE + HZ/2) / HZ)
 
 extern raw_spinlock_t i8253_lock;
-extern bool i8253_clear_counter_on_shutdown;
 extern struct clock_event_device i8253_clockevent;
 extern void clockevent_i8253_init(bool oneshot);
+extern void clockevent_i8253_disable(void);
 
 extern void setup_pit_timer(void);
 
Re: [GIT pull] x86/timers for v6.12-rc1
Posted by pr-tracker-bot@kernel.org 2 months, 1 week ago
The pull request you sent on Tue, 17 Sep 2024 10:54:19 +0200 (CEST):

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-timers-2024-09-17

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/fc1dc0d50780a9b215322bcc315f07ad8e4c6c13

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
[GIT pull] x86/mm for v6.12-rc1
Posted by Thomas Gleixner 2 months, 1 week ago
Linus,

please pull the latest x86/mm branch from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-mm-2024-09-17

up to:  50c6dbdfd16e: x86/ioremap: Improve iounmap() address range checks

Updates for x86 memory management:

 - Make LAM enablement safe vs. kernel threads using a process mm
   temporarily as switching back to the process would not update CR3 and
   therefore not enable LAM causing faults in user space when using tagged
   pointers. Cure it by synchronizing LAM enablement via IPIs to all CPUs
   which use the related mm.

 - Cure a LAM harmless inconsistency between CR3 and the state during
   context switch. It's both confusing and prone to lead to real bugs

 - Handle alt stack handling for threads which run with a non-zero
   protection key. The non-zero key prevents the kernel to access the
   alternate stack. Cure it by temporarily enabling all protection keys for
   the alternate stack setup/restore operations.

 - Provide a EFI config table identity mapping for kexec kernel to prevent
   kexec fails because the new kernel cannot access the config table array

 - Use GB pages only when a full GB is mapped in the identity map as
   otherwise the CPU can speculate into reserved areas after the end of
   memory which causes malfunction on UV systems.

 - Remove the noisy and pointless SRAT table dump during boot

 - Use is_ioremap_addr() for iounmap() address range checks instead of
   high_memory. is_ioremap_addr() is more precise.


Thanks,

	tglx

------------------>
Aruna Ramakrishna (4):
      x86/pkeys: Add PKRU as a parameter in signal handling functions
      x86/pkeys: Add helper functions to update PKRU on the sigframe
      x86/pkeys: Update PKRU to enable all pkeys before XSAVE
      x86/pkeys: Restore altstack access in sigreturn()

Keith Lucas (1):
      selftests/mm: Add new testcases for pkeys

Li RongQing (1):
      x86/mm: Don't print out SRAT table information

Max Ramanouski (1):
      x86/ioremap: Improve iounmap() address range checks

Steve Wahl (1):
      x86/mm/ident_map: Use gbpages only where full GB page should be mapped.

Tao Liu (1):
      x86/kexec: Add EFI config table identity mapping for kexec kernel

Yosry Ahmed (4):
      x86/mm: Use IPIs to synchronize LAM enablement
      x86/mm: Fix LAM inconsistency during context switch
      x86/mm: Cleanup prctl_enable_tagged_addr() nr_bits error checking
      x86/mm: Remove unused CR3_HW_ASID_BITS

Yue Haibing (1):
      x86/mm: Remove unused NX related declarations

Yuntao Wang (1):
      x86/mm: Remove duplicate check from build_cr3()


 arch/x86/include/asm/fpu/signal.h                  |   2 +-
 arch/x86/include/asm/mmu_context.h                 |   8 +-
 arch/x86/include/asm/pgtable_types.h               |   2 -
 arch/x86/include/asm/tlbflush.h                    |   9 +-
 arch/x86/kernel/fpu/signal.c                       |  27 +-
 arch/x86/kernel/fpu/xstate.c                       |  13 +
 arch/x86/kernel/fpu/xstate.h                       |   2 +
 arch/x86/kernel/machine_kexec_64.c                 |  27 ++
 arch/x86/kernel/process_64.c                       |  42 +-
 arch/x86/kernel/signal.c                           |  29 +-
 arch/x86/kernel/signal_64.c                        |   6 +-
 arch/x86/mm/ident_map.c                            |  23 +-
 arch/x86/mm/ioremap.c                              |   3 +-
 arch/x86/mm/srat.c                                 |   6 +-
 arch/x86/mm/tlb.c                                  |  19 +-
 include/linux/ioremap.h                            |   1 +
 tools/testing/selftests/mm/Makefile                |   1 +
 tools/testing/selftests/mm/pkey-helpers.h          |  13 +-
 tools/testing/selftests/mm/pkey_sighandler_tests.c | 481 +++++++++++++++++++++
 tools/testing/selftests/mm/protection_keys.c       |  10 -
 20 files changed, 664 insertions(+), 60 deletions(-)
 create mode 100644 tools/testing/selftests/mm/pkey_sighandler_tests.c

diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h
index 611fa41711af..eccc75bc9c4f 100644
--- a/arch/x86/include/asm/fpu/signal.h
+++ b/arch/x86/include/asm/fpu/signal.h
@@ -29,7 +29,7 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
 
 unsigned long fpu__get_fpstate_size(void);
 
-extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
+extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size, u32 pkru);
 extern void fpu__clear_user_states(struct fpu *fpu);
 extern bool fpu__restore_sig(void __user *buf, int ia32_frame);
 
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 8dac45a2c7fc..19091ebb8633 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -88,7 +88,13 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
 #ifdef CONFIG_ADDRESS_MASKING
 static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
 {
-	return mm->context.lam_cr3_mask;
+	/*
+	 * When switch_mm_irqs_off() is called for a kthread, it may race with
+	 * LAM enablement. switch_mm_irqs_off() uses the LAM mask to do two
+	 * things: populate CR3 and populate 'cpu_tlbstate.lam'. Make sure it
+	 * reads a single value for both.
+	 */
+	return READ_ONCE(mm->context.lam_cr3_mask);
 }
 
 static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 2f321137736c..6f82e75b6149 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -517,8 +517,6 @@ typedef struct page *pgtable_t;
 
 extern pteval_t __supported_pte_mask;
 extern pteval_t __default_kernel_pte_mask;
-extern void set_nx(void);
-extern int nx_enabled;
 
 #define pgprot_writecombine	pgprot_writecombine
 extern pgprot_t pgprot_writecombine(pgprot_t prot);
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 25726893c6f4..69e79fff41b8 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -399,11 +399,10 @@ static inline  u64 tlbstate_lam_cr3_mask(void)
 	return lam << X86_CR3_LAM_U57_BIT;
 }
 
-static inline void set_tlbstate_lam_mode(struct mm_struct *mm)
+static inline void cpu_tlbstate_update_lam(unsigned long lam, u64 untag_mask)
 {
-	this_cpu_write(cpu_tlbstate.lam,
-		       mm->context.lam_cr3_mask >> X86_CR3_LAM_U57_BIT);
-	this_cpu_write(tlbstate_untag_mask, mm->context.untag_mask);
+	this_cpu_write(cpu_tlbstate.lam, lam >> X86_CR3_LAM_U57_BIT);
+	this_cpu_write(tlbstate_untag_mask, untag_mask);
 }
 
 #else
@@ -413,7 +412,7 @@ static inline u64 tlbstate_lam_cr3_mask(void)
 	return 0;
 }
 
-static inline void set_tlbstate_lam_mode(struct mm_struct *mm)
+static inline void cpu_tlbstate_update_lam(unsigned long lam, u64 untag_mask)
 {
 }
 #endif
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 247f2225aa9f..1065ab995305 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -63,6 +63,16 @@ static inline bool check_xstate_in_sigframe(struct fxregs_state __user *fxbuf,
 	return true;
 }
 
+/*
+ * Update the value of PKRU register that was already pushed onto the signal frame.
+ */
+static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u32 pkru)
+{
+	if (unlikely(!cpu_feature_enabled(X86_FEATURE_OSPKE)))
+		return 0;
+	return __put_user(pkru, (unsigned int __user *)get_xsave_addr_user(buf, XFEATURE_PKRU));
+}
+
 /*
  * Signal frame handlers.
  */
@@ -156,10 +166,17 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
 	return !err;
 }
 
-static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
+static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf, u32 pkru)
 {
-	if (use_xsave())
-		return xsave_to_user_sigframe(buf);
+	int err = 0;
+
+	if (use_xsave()) {
+		err = xsave_to_user_sigframe(buf);
+		if (!err)
+			err = update_pkru_in_sigframe(buf, pkru);
+		return err;
+	}
+
 	if (use_fxsr())
 		return fxsave_to_user_sigframe((struct fxregs_state __user *) buf);
 	else
@@ -185,7 +202,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
  * For [f]xsave state, update the SW reserved fields in the [f]xsave frame
  * indicating the absence/presence of the extended state to the user.
  */
-bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
+bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size, u32 pkru)
 {
 	struct task_struct *tsk = current;
 	struct fpstate *fpstate = tsk->thread.fpu.fpstate;
@@ -228,7 +245,7 @@ bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 		fpregs_restore_userregs();
 
 	pagefault_disable();
-	ret = copy_fpregs_to_sigframe(buf_fx);
+	ret = copy_fpregs_to_sigframe(buf_fx, pkru);
 	pagefault_enable();
 	fpregs_unlock();
 
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c5a026fee5e0..fa7628bb541b 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -993,6 +993,19 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
 }
 EXPORT_SYMBOL_GPL(get_xsave_addr);
 
+/*
+ * Given an xstate feature nr, calculate where in the xsave buffer the state is.
+ * The xsave buffer should be in standard format, not compacted (e.g. user mode
+ * signal frames).
+ */
+void __user *get_xsave_addr_user(struct xregs_state __user *xsave, int xfeature_nr)
+{
+	if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
+		return NULL;
+
+	return (void __user *)xsave + xstate_offsets[xfeature_nr];
+}
+
 #ifdef CONFIG_ARCH_HAS_PKEYS
 
 /*
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 2ee0b9c53dcc..5f057e50df81 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -54,6 +54,8 @@ extern int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void
 extern void fpu__init_cpu_xstate(void);
 extern void fpu__init_system_xstate(unsigned int legacy_size);
 
+extern void __user *get_xsave_addr_user(struct xregs_state __user *xsave, int xfeature_nr);
+
 static inline u64 xfeatures_mask_supervisor(void)
 {
 	return fpu_kernel_cfg.max_features & XFEATURE_MASK_SUPERVISOR_SUPPORTED;
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index cc0f7f70b17b..9c9ac606893e 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -28,6 +28,7 @@
 #include <asm/setup.h>
 #include <asm/set_memory.h>
 #include <asm/cpu.h>
+#include <asm/efi.h>
 
 #ifdef CONFIG_ACPI
 /*
@@ -87,6 +88,8 @@ map_efi_systab(struct x86_mapping_info *info, pgd_t *level4p)
 {
 #ifdef CONFIG_EFI
 	unsigned long mstart, mend;
+	void *kaddr;
+	int ret;
 
 	if (!efi_enabled(EFI_BOOT))
 		return 0;
@@ -102,6 +105,30 @@ map_efi_systab(struct x86_mapping_info *info, pgd_t *level4p)
 	if (!mstart)
 		return 0;
 
+	ret = kernel_ident_mapping_init(info, level4p, mstart, mend);
+	if (ret)
+		return ret;
+
+	kaddr = memremap(mstart, mend - mstart, MEMREMAP_WB);
+	if (!kaddr) {
+		pr_err("Could not map UEFI system table\n");
+		return -ENOMEM;
+	}
+
+	mstart = efi_config_table;
+
+	if (efi_enabled(EFI_64BIT)) {
+		efi_system_table_64_t *stbl = (efi_system_table_64_t *)kaddr;
+
+		mend = mstart + sizeof(efi_config_table_64_t) * stbl->nr_tables;
+	} else {
+		efi_system_table_32_t *stbl = (efi_system_table_32_t *)kaddr;
+
+		mend = mstart + sizeof(efi_config_table_32_t) * stbl->nr_tables;
+	}
+
+	memunmap(kaddr);
+
 	return kernel_ident_mapping_init(info, level4p, mstart, mend);
 #endif
 	return 0;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6d3d20e3e43a..226472332a70 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -798,6 +798,32 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
 
 #define LAM_U57_BITS 6
 
+static void enable_lam_func(void *__mm)
+{
+	struct mm_struct *mm = __mm;
+	unsigned long lam;
+
+	if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm) {
+		lam = mm_lam_cr3_mask(mm);
+		write_cr3(__read_cr3() | lam);
+		cpu_tlbstate_update_lam(lam, mm_untag_mask(mm));
+	}
+}
+
+static void mm_enable_lam(struct mm_struct *mm)
+{
+	mm->context.lam_cr3_mask = X86_CR3_LAM_U57;
+	mm->context.untag_mask =  ~GENMASK(62, 57);
+
+	/*
+	 * Even though the process must still be single-threaded at this
+	 * point, kernel threads may be using the mm.  IPI those kernel
+	 * threads if they exist.
+	 */
+	on_each_cpu_mask(mm_cpumask(mm), enable_lam_func, mm, true);
+	set_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags);
+}
+
 static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
 {
 	if (!cpu_feature_enabled(X86_FEATURE_LAM))
@@ -814,25 +840,21 @@ static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
 	if (mmap_write_lock_killable(mm))
 		return -EINTR;
 
+	/*
+	 * MM_CONTEXT_LOCK_LAM is set on clone.  Prevent LAM from
+	 * being enabled unless the process is single threaded:
+	 */
 	if (test_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags)) {
 		mmap_write_unlock(mm);
 		return -EBUSY;
 	}
 
-	if (!nr_bits) {
-		mmap_write_unlock(mm);
-		return -EINVAL;
-	} else if (nr_bits <= LAM_U57_BITS) {
-		mm->context.lam_cr3_mask = X86_CR3_LAM_U57;
-		mm->context.untag_mask =  ~GENMASK(62, 57);
-	} else {
+	if (!nr_bits || nr_bits > LAM_U57_BITS) {
 		mmap_write_unlock(mm);
 		return -EINVAL;
 	}
 
-	write_cr3(__read_cr3() | mm->context.lam_cr3_mask);
-	set_tlbstate_lam_mode(mm);
-	set_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags);
+	mm_enable_lam(mm);
 
 	mmap_write_unlock(mm);
 
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 31b6f5dddfc2..5f441039b572 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -60,6 +60,24 @@ static inline int is_x32_frame(struct ksignal *ksig)
 		ksig->ka.sa.sa_flags & SA_X32_ABI;
 }
 
+/*
+ * Enable all pkeys temporarily, so as to ensure that both the current
+ * execution stack as well as the alternate signal stack are writeable.
+ * The application can use any of the available pkeys to protect the
+ * alternate signal stack, and we don't know which one it is, so enable
+ * all. The PKRU register will be reset to init_pkru later in the flow,
+ * in fpu__clear_user_states(), and it is the application's responsibility
+ * to enable the appropriate pkey as the first step in the signal handler
+ * so that the handler does not segfault.
+ */
+static inline u32 sig_prepare_pkru(void)
+{
+	u32 orig_pkru = read_pkru();
+
+	write_pkru(0);
+	return orig_pkru;
+}
+
 /*
  * Set up a signal frame.
  */
@@ -84,6 +102,7 @@ get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size,
 	unsigned long math_size = 0;
 	unsigned long sp = regs->sp;
 	unsigned long buf_fx = 0;
+	u32 pkru;
 
 	/* redzone */
 	if (!ia32_frame)
@@ -138,9 +157,17 @@ get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size,
 		return (void __user *)-1L;
 	}
 
+	/* Update PKRU to enable access to the alternate signal stack. */
+	pkru = sig_prepare_pkru();
 	/* save i387 and extended state */
-	if (!copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size))
+	if (!copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size, pkru)) {
+		/*
+		 * Restore PKRU to the original, user-defined value; disable
+		 * extra pkeys enabled for the alternate signal stack, if any.
+		 */
+		write_pkru(pkru);
 		return (void __user *)-1L;
+	}
 
 	return (void __user *)sp;
 }
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 8a94053c5444..ee9453891901 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -260,13 +260,13 @@ SYSCALL_DEFINE0(rt_sigreturn)
 
 	set_current_blocked(&set);
 
-	if (!restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
+	if (restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
 
-	if (restore_signal_shadow_stack())
+	if (!restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
 		goto badframe;
 
-	if (restore_altstack(&frame->uc.uc_stack))
+	if (restore_signal_shadow_stack())
 		goto badframe;
 
 	return regs->ax;
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index c45127265f2f..437e96fb4977 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -99,18 +99,31 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
 	for (; addr < end; addr = next) {
 		pud_t *pud = pud_page + pud_index(addr);
 		pmd_t *pmd;
+		bool use_gbpage;
 
 		next = (addr & PUD_MASK) + PUD_SIZE;
 		if (next > end)
 			next = end;
 
-		if (info->direct_gbpages) {
-			pud_t pudval;
+		/* if this is already a gbpage, this portion is already mapped */
+		if (pud_leaf(*pud))
+			continue;
+
+		/* Is using a gbpage allowed? */
+		use_gbpage = info->direct_gbpages;
 
-			if (pud_present(*pud))
-				continue;
+		/* Don't use gbpage if it maps more than the requested region. */
+		/* at the begining: */
+		use_gbpage &= ((addr & ~PUD_MASK) == 0);
+		/* ... or at the end: */
+		use_gbpage &= ((next & ~PUD_MASK) == 0);
+
+		/* Never overwrite existing mappings */
+		use_gbpage &= !pud_present(*pud);
+
+		if (use_gbpage) {
+			pud_t pudval;
 
-			addr &= PUD_MASK;
 			pudval = __pud((addr - info->offset) | info->page_flag);
 			set_pud(pud, pudval);
 			continue;
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index aa7d279321ea..70b02fc61d93 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/ioport.h>
+#include <linux/ioremap.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/mmiotrace.h>
@@ -457,7 +458,7 @@ void iounmap(volatile void __iomem *addr)
 {
 	struct vm_struct *p, *o;
 
-	if ((void __force *)addr <= high_memory)
+	if (WARN_ON_ONCE(!is_ioremap_addr((void __force *)addr)))
 		return;
 
 	/*
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 9c52a95937ad..6f8e0f21c710 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -57,8 +57,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
 	}
 	set_apicid_to_node(apic_id, node);
 	node_set(node, numa_nodes_parsed);
-	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
-	       pxm, apic_id, node);
+	pr_debug("SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", pxm, apic_id, node);
 }
 
 /* Callback for Proximity Domain -> LAPIC mapping */
@@ -98,8 +97,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 
 	set_apicid_to_node(apic_id, node);
 	node_set(node, numa_nodes_parsed);
-	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
-	       pxm, apic_id, node);
+	pr_debug("SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", pxm, apic_id, node);
 }
 
 int __init x86_acpi_numa_init(void)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 44ac64f3a047..86593d1b787d 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -11,6 +11,7 @@
 #include <linux/sched/smt.h>
 #include <linux/task_work.h>
 #include <linux/mmu_notifier.h>
+#include <linux/mmu_context.h>
 
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
@@ -85,9 +86,6 @@
  *
  */
 
-/* There are 12 bits of space for ASIDS in CR3 */
-#define CR3_HW_ASID_BITS		12
-
 /*
  * When enabled, MITIGATION_PAGE_TABLE_ISOLATION consumes a single bit for
  * user/kernel switches
@@ -160,7 +158,6 @@ static inline unsigned long build_cr3(pgd_t *pgd, u16 asid, unsigned long lam)
 	unsigned long cr3 = __sme_pa(pgd) | lam;
 
 	if (static_cpu_has(X86_FEATURE_PCID)) {
-		VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
 		cr3 |= kern_pcid(asid);
 	} else {
 		VM_WARN_ON_ONCE(asid != 0);
@@ -503,9 +500,9 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
 {
 	struct mm_struct *prev = this_cpu_read(cpu_tlbstate.loaded_mm);
 	u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
-	unsigned long new_lam = mm_lam_cr3_mask(next);
 	bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
 	unsigned cpu = smp_processor_id();
+	unsigned long new_lam;
 	u64 next_tlb_gen;
 	bool need_flush;
 	u16 new_asid;
@@ -619,9 +616,7 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
 			cpumask_clear_cpu(cpu, mm_cpumask(prev));
 		}
 
-		/*
-		 * Start remote flushes and then read tlb_gen.
-		 */
+		/* Start receiving IPIs and then read tlb_gen (and LAM below) */
 		if (next != &init_mm)
 			cpumask_set_cpu(cpu, mm_cpumask(next));
 		next_tlb_gen = atomic64_read(&next->context.tlb_gen);
@@ -633,7 +628,7 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
 		barrier();
 	}
 
-	set_tlbstate_lam_mode(next);
+	new_lam = mm_lam_cr3_mask(next);
 	if (need_flush) {
 		this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
 		this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
@@ -652,6 +647,7 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
 
 	this_cpu_write(cpu_tlbstate.loaded_mm, next);
 	this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
+	cpu_tlbstate_update_lam(new_lam, mm_untag_mask(next));
 
 	if (next != prev) {
 		cr4_update_pce_mm(next);
@@ -698,6 +694,7 @@ void initialize_tlbstate_and_flush(void)
 	int i;
 	struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
 	u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
+	unsigned long lam = mm_lam_cr3_mask(mm);
 	unsigned long cr3 = __read_cr3();
 
 	/* Assert that CR3 already references the right mm. */
@@ -705,7 +702,7 @@ void initialize_tlbstate_and_flush(void)
 
 	/* LAM expected to be disabled */
 	WARN_ON(cr3 & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57));
-	WARN_ON(mm_lam_cr3_mask(mm));
+	WARN_ON(lam);
 
 	/*
 	 * Assert that CR4.PCIDE is set if needed.  (CR4.PCIDE initialization
@@ -724,7 +721,7 @@ void initialize_tlbstate_and_flush(void)
 	this_cpu_write(cpu_tlbstate.next_asid, 1);
 	this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
 	this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
-	set_tlbstate_lam_mode(mm);
+	cpu_tlbstate_update_lam(lam, mm_untag_mask(mm));
 
 	for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
 		this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
diff --git a/include/linux/ioremap.h b/include/linux/ioremap.h
index f0e99fc7dd8b..2bd1661fe9ad 100644
--- a/include/linux/ioremap.h
+++ b/include/linux/ioremap.h
@@ -4,6 +4,7 @@
 
 #include <linux/kasan.h>
 #include <asm/pgtable.h>
+#include <asm/vmalloc.h>
 
 #if defined(CONFIG_HAS_IOMEM) || defined(CONFIG_GENERIC_IOREMAP)
 /*
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index 901e0d07765b..1f176fff7054 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -88,6 +88,7 @@ CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_pr
 CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie)
 
 VMTARGETS := protection_keys
+VMTARGETS += pkey_sighandler_tests
 BINARIES_32 := $(VMTARGETS:%=%_32)
 BINARIES_64 := $(VMTARGETS:%=%_64)
 
diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h
index 1af3156a9db8..4d31a309a46b 100644
--- a/tools/testing/selftests/mm/pkey-helpers.h
+++ b/tools/testing/selftests/mm/pkey-helpers.h
@@ -79,7 +79,18 @@ extern void abort_hooks(void);
 	}					\
 } while (0)
 
-__attribute__((noinline)) int read_ptr(int *ptr);
+#define barrier() __asm__ __volatile__("": : :"memory")
+#ifndef noinline
+# define noinline __attribute__((noinline))
+#endif
+
+noinline int read_ptr(int *ptr)
+{
+	/* Keep GCC from optimizing this away somehow */
+	barrier();
+	return *ptr;
+}
+
 void expected_pkey_fault(int pkey);
 int sys_pkey_alloc(unsigned long flags, unsigned long init_val);
 int sys_pkey_free(unsigned long pkey);
diff --git a/tools/testing/selftests/mm/pkey_sighandler_tests.c b/tools/testing/selftests/mm/pkey_sighandler_tests.c
new file mode 100644
index 000000000000..a8088b645ad6
--- /dev/null
+++ b/tools/testing/selftests/mm/pkey_sighandler_tests.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests Memory Protection Keys (see Documentation/core-api/protection-keys.rst)
+ *
+ * The testcases in this file exercise various flows related to signal handling,
+ * using an alternate signal stack, with the default pkey (pkey 0) disabled.
+ *
+ * Compile with:
+ * gcc -mxsave      -o pkey_sighandler_tests -O2 -g -std=gnu99 -pthread -Wall pkey_sighandler_tests.c -I../../../../tools/include -lrt -ldl -lm
+ * gcc -mxsave -m32 -o pkey_sighandler_tests -O2 -g -std=gnu99 -pthread -Wall pkey_sighandler_tests.c -I../../../../tools/include -lrt -ldl -lm
+ */
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__
+#include <errno.h>
+#include <sys/syscall.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <limits.h>
+
+#include "pkey-helpers.h"
+
+#define STACK_SIZE PTHREAD_STACK_MIN
+
+void expected_pkey_fault(int pkey) {}
+
+pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+siginfo_t siginfo = {0};
+
+/*
+ * We need to use inline assembly instead of glibc's syscall because glibc's
+ * syscall will attempt to access the PLT in order to call a library function
+ * which is protected by MPK 0 which we don't have access to.
+ */
+static inline __always_inline
+long syscall_raw(long n, long a1, long a2, long a3, long a4, long a5, long a6)
+{
+	unsigned long ret;
+#ifdef __x86_64__
+	register long r10 asm("r10") = a4;
+	register long r8 asm("r8") = a5;
+	register long r9 asm("r9") = a6;
+	asm volatile ("syscall"
+		      : "=a"(ret)
+		      : "a"(n), "D"(a1), "S"(a2), "d"(a3), "r"(r10), "r"(r8), "r"(r9)
+		      : "rcx", "r11", "memory");
+#elif defined __i386__
+	asm volatile ("int $0x80"
+		      : "=a"(ret)
+		      : "a"(n), "b"(a1), "c"(a2), "d"(a3), "S"(a4), "D"(a5)
+		      : "memory");
+#else
+# error syscall_raw() not implemented
+#endif
+	return ret;
+}
+
+static void sigsegv_handler(int signo, siginfo_t *info, void *ucontext)
+{
+	pthread_mutex_lock(&mutex);
+
+	memcpy(&siginfo, info, sizeof(siginfo_t));
+
+	pthread_cond_signal(&cond);
+	pthread_mutex_unlock(&mutex);
+
+	syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0);
+}
+
+static void sigusr1_handler(int signo, siginfo_t *info, void *ucontext)
+{
+	pthread_mutex_lock(&mutex);
+
+	memcpy(&siginfo, info, sizeof(siginfo_t));
+
+	pthread_cond_signal(&cond);
+	pthread_mutex_unlock(&mutex);
+}
+
+static void sigusr2_handler(int signo, siginfo_t *info, void *ucontext)
+{
+	/*
+	 * pkru should be the init_pkru value which enabled MPK 0 so
+	 * we can use library functions.
+	 */
+	printf("%s invoked.\n", __func__);
+}
+
+static void raise_sigusr2(void)
+{
+	pid_t tid = 0;
+
+	tid = syscall_raw(SYS_gettid, 0, 0, 0, 0, 0, 0);
+
+	syscall_raw(SYS_tkill, tid, SIGUSR2, 0, 0, 0, 0);
+
+	/*
+	 * We should return from the signal handler here and be able to
+	 * return to the interrupted thread.
+	 */
+}
+
+static void *thread_segv_with_pkey0_disabled(void *ptr)
+{
+	/* Disable MPK 0 (and all others too) */
+	__write_pkey_reg(0x55555555);
+
+	/* Segfault (with SEGV_MAPERR) */
+	*(int *) (0x1) = 1;
+	return NULL;
+}
+
+static void *thread_segv_pkuerr_stack(void *ptr)
+{
+	/* Disable MPK 0 (and all others too) */
+	__write_pkey_reg(0x55555555);
+
+	/* After we disable MPK 0, we can't access the stack to return */
+	return NULL;
+}
+
+static void *thread_segv_maperr_ptr(void *ptr)
+{
+	stack_t *stack = ptr;
+	int *bad = (int *)1;
+
+	/*
+	 * Setup alternate signal stack, which should be pkey_mprotect()ed by
+	 * MPK 0. The thread's stack cannot be used for signals because it is
+	 * not accessible by the default init_pkru value of 0x55555554.
+	 */
+	syscall_raw(SYS_sigaltstack, (long)stack, 0, 0, 0, 0, 0);
+
+	/* Disable MPK 0.  Only MPK 1 is enabled. */
+	__write_pkey_reg(0x55555551);
+
+	/* Segfault */
+	*bad = 1;
+	syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0);
+	return NULL;
+}
+
+/*
+ * Verify that the sigsegv handler is invoked when pkey 0 is disabled.
+ * Note that the new thread stack and the alternate signal stack is
+ * protected by MPK 0.
+ */
+static void test_sigsegv_handler_with_pkey0_disabled(void)
+{
+	struct sigaction sa;
+	pthread_attr_t attr;
+	pthread_t thr;
+
+	sa.sa_flags = SA_SIGINFO;
+
+	sa.sa_sigaction = sigsegv_handler;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(SIGSEGV, &sa, NULL) == -1) {
+		perror("sigaction");
+		exit(EXIT_FAILURE);
+	}
+
+	memset(&siginfo, 0, sizeof(siginfo));
+
+	pthread_attr_init(&attr);
+	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+
+	pthread_create(&thr, &attr, thread_segv_with_pkey0_disabled, NULL);
+
+	pthread_mutex_lock(&mutex);
+	while (siginfo.si_signo == 0)
+		pthread_cond_wait(&cond, &mutex);
+	pthread_mutex_unlock(&mutex);
+
+	ksft_test_result(siginfo.si_signo == SIGSEGV &&
+			 siginfo.si_code == SEGV_MAPERR &&
+			 siginfo.si_addr == (void *)1,
+			 "%s\n", __func__);
+}
+
+/*
+ * Verify that the sigsegv handler is invoked when pkey 0 is disabled.
+ * Note that the new thread stack and the alternate signal stack is
+ * protected by MPK 0, which renders them inaccessible when MPK 0
+ * is disabled. So just the return from the thread should cause a
+ * segfault with SEGV_PKUERR.
+ */
+static void test_sigsegv_handler_cannot_access_stack(void)
+{
+	struct sigaction sa;
+	pthread_attr_t attr;
+	pthread_t thr;
+
+	sa.sa_flags = SA_SIGINFO;
+
+	sa.sa_sigaction = sigsegv_handler;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(SIGSEGV, &sa, NULL) == -1) {
+		perror("sigaction");
+		exit(EXIT_FAILURE);
+	}
+
+	memset(&siginfo, 0, sizeof(siginfo));
+
+	pthread_attr_init(&attr);
+	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+
+	pthread_create(&thr, &attr, thread_segv_pkuerr_stack, NULL);
+
+	pthread_mutex_lock(&mutex);
+	while (siginfo.si_signo == 0)
+		pthread_cond_wait(&cond, &mutex);
+	pthread_mutex_unlock(&mutex);
+
+	ksft_test_result(siginfo.si_signo == SIGSEGV &&
+			 siginfo.si_code == SEGV_PKUERR,
+			 "%s\n", __func__);
+}
+
+/*
+ * Verify that the sigsegv handler that uses an alternate signal stack
+ * is correctly invoked for a thread which uses a non-zero MPK to protect
+ * its own stack, and disables all other MPKs (including 0).
+ */
+static void test_sigsegv_handler_with_different_pkey_for_stack(void)
+{
+	struct sigaction sa;
+	static stack_t sigstack;
+	void *stack;
+	int pkey;
+	int parent_pid = 0;
+	int child_pid = 0;
+
+	sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
+
+	sa.sa_sigaction = sigsegv_handler;
+
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(SIGSEGV, &sa, NULL) == -1) {
+		perror("sigaction");
+		exit(EXIT_FAILURE);
+	}
+
+	stack = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE,
+		     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+	assert(stack != MAP_FAILED);
+
+	/* Allow access to MPK 0 and MPK 1 */
+	__write_pkey_reg(0x55555550);
+
+	/* Protect the new stack with MPK 1 */
+	pkey = pkey_alloc(0, 0);
+	pkey_mprotect(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey);
+
+	/* Set up alternate signal stack that will use the default MPK */
+	sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC,
+			      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	sigstack.ss_flags = 0;
+	sigstack.ss_size = STACK_SIZE;
+
+	memset(&siginfo, 0, sizeof(siginfo));
+
+	/* Use clone to avoid newer glibcs using rseq on new threads */
+	long ret = syscall_raw(SYS_clone,
+			       CLONE_VM | CLONE_FS | CLONE_FILES |
+			       CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
+			       CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID |
+			       CLONE_DETACHED,
+			       (long) ((char *)(stack) + STACK_SIZE),
+			       (long) &parent_pid,
+			       (long) &child_pid, 0, 0);
+
+	if (ret < 0) {
+		errno = -ret;
+		perror("clone");
+	} else if (ret == 0) {
+		thread_segv_maperr_ptr(&sigstack);
+		syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0);
+	}
+
+	pthread_mutex_lock(&mutex);
+	while (siginfo.si_signo == 0)
+		pthread_cond_wait(&cond, &mutex);
+	pthread_mutex_unlock(&mutex);
+
+	ksft_test_result(siginfo.si_signo == SIGSEGV &&
+			 siginfo.si_code == SEGV_MAPERR &&
+			 siginfo.si_addr == (void *)1,
+			 "%s\n", __func__);
+}
+
+/*
+ * Verify that the PKRU value set by the application is correctly
+ * restored upon return from signal handling.
+ */
+static void test_pkru_preserved_after_sigusr1(void)
+{
+	struct sigaction sa;
+	unsigned long pkru = 0x45454544;
+
+	sa.sa_flags = SA_SIGINFO;
+
+	sa.sa_sigaction = sigusr1_handler;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(SIGUSR1, &sa, NULL) == -1) {
+		perror("sigaction");
+		exit(EXIT_FAILURE);
+	}
+
+	memset(&siginfo, 0, sizeof(siginfo));
+
+	__write_pkey_reg(pkru);
+
+	raise(SIGUSR1);
+
+	pthread_mutex_lock(&mutex);
+	while (siginfo.si_signo == 0)
+		pthread_cond_wait(&cond, &mutex);
+	pthread_mutex_unlock(&mutex);
+
+	/* Ensure the pkru value is the same after returning from signal. */
+	ksft_test_result(pkru == __read_pkey_reg() &&
+			 siginfo.si_signo == SIGUSR1,
+			 "%s\n", __func__);
+}
+
+static noinline void *thread_sigusr2_self(void *ptr)
+{
+	/*
+	 * A const char array like "Resuming after SIGUSR2" won't be stored on
+	 * the stack and the code could access it via an offset from the program
+	 * counter. This makes sure it's on the function's stack frame.
+	 */
+	char str[] = {'R', 'e', 's', 'u', 'm', 'i', 'n', 'g', ' ',
+		'a', 'f', 't', 'e', 'r', ' ',
+		'S', 'I', 'G', 'U', 'S', 'R', '2',
+		'.', '.', '.', '\n', '\0'};
+	stack_t *stack = ptr;
+
+	/*
+	 * Setup alternate signal stack, which should be pkey_mprotect()ed by
+	 * MPK 0. The thread's stack cannot be used for signals because it is
+	 * not accessible by the default init_pkru value of 0x55555554.
+	 */
+	syscall(SYS_sigaltstack, (long)stack, 0, 0, 0, 0, 0);
+
+	/* Disable MPK 0.  Only MPK 2 is enabled. */
+	__write_pkey_reg(0x55555545);
+
+	raise_sigusr2();
+
+	/* Do something, to show the thread resumed execution after the signal */
+	syscall_raw(SYS_write, 1, (long) str, sizeof(str) - 1, 0, 0, 0);
+
+	/*
+	 * We can't return to test_pkru_sigreturn because it
+	 * will attempt to use a %rbp value which is on the stack
+	 * of the main thread.
+	 */
+	syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0);
+	return NULL;
+}
+
+/*
+ * Verify that sigreturn is able to restore altstack even if the thread had
+ * disabled pkey 0.
+ */
+static void test_pkru_sigreturn(void)
+{
+	struct sigaction sa = {0};
+	static stack_t sigstack;
+	void *stack;
+	int pkey;
+	int parent_pid = 0;
+	int child_pid = 0;
+
+	sa.sa_handler = SIG_DFL;
+	sa.sa_flags = 0;
+	sigemptyset(&sa.sa_mask);
+
+	/*
+	 * For this testcase, we do not want to handle SIGSEGV. Reset handler
+	 * to default so that the application can crash if it receives SIGSEGV.
+	 */
+	if (sigaction(SIGSEGV, &sa, NULL) == -1) {
+		perror("sigaction");
+		exit(EXIT_FAILURE);
+	}
+
+	sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
+	sa.sa_sigaction = sigusr2_handler;
+	sigemptyset(&sa.sa_mask);
+
+	if (sigaction(SIGUSR2, &sa, NULL) == -1) {
+		perror("sigaction");
+		exit(EXIT_FAILURE);
+	}
+
+	stack = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE,
+		     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+	assert(stack != MAP_FAILED);
+
+	/*
+	 * Allow access to MPK 0 and MPK 2. The child thread (to be created
+	 * later in this flow) will have its stack protected by MPK 2, whereas
+	 * the current thread's stack is protected by the default MPK 0. Hence
+	 * both need to be enabled.
+	 */
+	__write_pkey_reg(0x55555544);
+
+	/* Protect the stack with MPK 2 */
+	pkey = pkey_alloc(0, 0);
+	pkey_mprotect(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey);
+
+	/* Set up alternate signal stack that will use the default MPK */
+	sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC,
+			      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	sigstack.ss_flags = 0;
+	sigstack.ss_size = STACK_SIZE;
+
+	/* Use clone to avoid newer glibcs using rseq on new threads */
+	long ret = syscall_raw(SYS_clone,
+			       CLONE_VM | CLONE_FS | CLONE_FILES |
+			       CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
+			       CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID |
+			       CLONE_DETACHED,
+			       (long) ((char *)(stack) + STACK_SIZE),
+			       (long) &parent_pid,
+			       (long) &child_pid, 0, 0);
+
+	if (ret < 0) {
+		errno = -ret;
+		perror("clone");
+	}  else if (ret == 0) {
+		thread_sigusr2_self(&sigstack);
+		syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0);
+	}
+
+	child_pid =  ret;
+	/* Check that thread exited */
+	do {
+		sched_yield();
+		ret = syscall_raw(SYS_tkill, child_pid, 0, 0, 0, 0, 0);
+	} while (ret != -ESRCH && ret != -EINVAL);
+
+	ksft_test_result_pass("%s\n", __func__);
+}
+
+static void (*pkey_tests[])(void) = {
+	test_sigsegv_handler_with_pkey0_disabled,
+	test_sigsegv_handler_cannot_access_stack,
+	test_sigsegv_handler_with_different_pkey_for_stack,
+	test_pkru_preserved_after_sigusr1,
+	test_pkru_sigreturn
+};
+
+int main(int argc, char *argv[])
+{
+	int i;
+
+	ksft_print_header();
+	ksft_set_plan(ARRAY_SIZE(pkey_tests));
+
+	for (i = 0; i < ARRAY_SIZE(pkey_tests); i++)
+		(*pkey_tests[i])();
+
+	ksft_finished();
+	return 0;
+}
diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c
index eaa6d1fc5328..cc6de1644360 100644
--- a/tools/testing/selftests/mm/protection_keys.c
+++ b/tools/testing/selftests/mm/protection_keys.c
@@ -950,16 +950,6 @@ void close_test_fds(void)
 	nr_test_fds = 0;
 }
 
-#define barrier() __asm__ __volatile__("": : :"memory")
-__attribute__((noinline)) int read_ptr(int *ptr)
-{
-	/*
-	 * Keep GCC from optimizing this away somehow
-	 */
-	barrier();
-	return *ptr;
-}
-
 void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey)
 {
 	int i, err;
Re: [GIT pull] x86/mm for v6.12-rc1
Posted by pr-tracker-bot@kernel.org 2 months, 1 week ago
The pull request you sent on Tue, 17 Sep 2024 10:54:15 +0200 (CEST):

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-mm-2024-09-17

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/70f43ea3a360c5a7d3474b0cfbabb80be6424596

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
[GIT pull] x86/misc for v6.12-rc1
Posted by Thomas Gleixner 2 months, 1 week ago
Linus,

please pull the latest x86/misc branch from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-misc-2024-09-17

up to:  4460e8538ef1: MAINTAINERS: Add selftests/x86 entry

Miscellaneous updates for x86:

  - Rework kcpuid to handle the the autogenerated CSV file correctly and
    update the CSV file to cover the whole zoo of CPUID.

  - Avoid memcpy() for ia32 syscall_get_arguments() and use direct
    assignments as fortified memcpy() is unhappy about writing/reading
    beyond the end of the addresses destination/source struct member

  - A few new PCI IDs for AMD

  - Update MAINTAINERS to cover x86 specific selftests


Thanks,

	tglx

------------------>
Ahmed S. Darwish (9):
      tools/x86/kcpuid: Remove unused variable
      tools/x86/kcpuid: Properly align long-description columns
      tools/x86/kcpuid: Set max possible subleaves count to 64
      tools/x86/kcpuid: Protect against faulty "max subleaf" values
      tools/x86/kcpuid: Strip bitfield names leading/trailing whitespace
      tools/x86/kcpuid: Recognize all leaves with subleaves
      tools/x86/kcpuid: Parse subleaf ranges if provided
      tools/x86/kcpuid: Introduce a complete cpuid bitfields CSV file
      MAINTAINERS: Add x86 cpuid database entry

Kees Cook (1):
      x86/syscall: Avoid memcpy() for ia32 syscall_get_arguments()

Muhammad Usama Anjum (1):
      MAINTAINERS: Add selftests/x86 entry

Richard Gong (1):
      x86/amd_nb: Add new PCI IDs for AMD family 1Ah model 60h-70h

Shyam Sundar S K (1):
      x86/amd_nb: Add new PCI IDs for AMD family 1Ah model 60h


 MAINTAINERS                     |   11 +
 arch/x86/include/asm/syscall.h  |    7 +-
 arch/x86/kernel/amd_nb.c        |    7 +
 drivers/hwmon/k10temp.c         |    1 +
 include/linux/pci_ids.h         |    1 +
 tools/arch/x86/kcpuid/cpuid.csv | 1430 +++++++++++++++++++++++++++------------
 tools/arch/x86/kcpuid/kcpuid.c  |  109 +--
 7 files changed, 1100 insertions(+), 466 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 42decde38320..7715a806c255 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -24772,6 +24772,17 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/core
 F:	Documentation/arch/x86/
 F:	Documentation/devicetree/bindings/x86/
 F:	arch/x86/
+F:	tools/testing/selftests/x86
+
+X86 CPUID DATABASE
+M:	Borislav Petkov <bp@alien8.de>
+M:	Thomas Gleixner <tglx@linutronix.de>
+M:	x86@kernel.org
+R:	Ahmed S. Darwish <darwi@linutronix.de>
+L:	x86-cpuid@lists.linux.dev
+S:	Maintained
+W:	https://x86-cpuid.org
+F:	tools/arch/x86/kcpuid/cpuid.csv
 
 X86 ENTRY CODE
 M:	Andy Lutomirski <luto@kernel.org>
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 2fc7bc3863ff..7c488ff0c764 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -82,7 +82,12 @@ static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
 					 unsigned long *args)
 {
-	memcpy(args, &regs->bx, 6 * sizeof(args[0]));
+	args[0] = regs->bx;
+	args[1] = regs->cx;
+	args[2] = regs->dx;
+	args[3] = regs->si;
+	args[4] = regs->di;
+	args[5] = regs->bp;
 }
 
 static inline int syscall_get_arch(struct task_struct *task)
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 059e5c16af05..dc5d3216af24 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -26,6 +26,7 @@
 #define PCI_DEVICE_ID_AMD_19H_M70H_ROOT		0x14e8
 #define PCI_DEVICE_ID_AMD_1AH_M00H_ROOT		0x153a
 #define PCI_DEVICE_ID_AMD_1AH_M20H_ROOT		0x1507
+#define PCI_DEVICE_ID_AMD_1AH_M60H_ROOT		0x1122
 #define PCI_DEVICE_ID_AMD_MI200_ROOT		0x14bb
 #define PCI_DEVICE_ID_AMD_MI300_ROOT		0x14f8
 
@@ -43,6 +44,8 @@
 #define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4	0x14f4
 #define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4	0x12fc
 #define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4	0x12c4
+#define PCI_DEVICE_ID_AMD_1AH_M60H_DF_F4	0x124c
+#define PCI_DEVICE_ID_AMD_1AH_M70H_DF_F4	0x12bc
 #define PCI_DEVICE_ID_AMD_MI200_DF_F4		0x14d4
 #define PCI_DEVICE_ID_AMD_MI300_DF_F4		0x152c
 
@@ -63,6 +66,7 @@ static const struct pci_device_id amd_root_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_ROOT) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_ROOT) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_ROOT) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M60H_ROOT) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_ROOT) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI300_ROOT) },
 	{}
@@ -95,6 +99,7 @@ static const struct pci_device_id amd_nb_misc_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M60H_DF_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M70H_DF_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI300_DF_F3) },
@@ -122,6 +127,8 @@ static const struct pci_device_id amd_nb_link_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M60H_DF_F4) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M70H_DF_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI300_DF_F4) },
 	{}
diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index 543526bac042..f96b91e43312 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -548,6 +548,7 @@ static const struct pci_device_id k10temp_id_table[] = {
 	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F3) },
 	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3) },
 	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3) },
+	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_1AH_M60H_DF_F3) },
 	{ PCI_VDEVICE(HYGON, PCI_DEVICE_ID_AMD_17H_DF_F3) },
 	{}
 };
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index e388c8b1cbc2..91182aa1d2ec 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -580,6 +580,7 @@
 #define PCI_DEVICE_ID_AMD_19H_M78H_DF_F3 0x12fb
 #define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3 0x12c3
 #define PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3 0x16fb
+#define PCI_DEVICE_ID_AMD_1AH_M60H_DF_F3 0x124b
 #define PCI_DEVICE_ID_AMD_1AH_M70H_DF_F3 0x12bb
 #define PCI_DEVICE_ID_AMD_MI200_DF_F3	0x14d3
 #define PCI_DEVICE_ID_AMD_MI300_DF_F3	0x152b
diff --git a/tools/arch/x86/kcpuid/cpuid.csv b/tools/arch/x86/kcpuid/cpuid.csv
index e0c25b75327e..d751eb8585d0 100644
--- a/tools/arch/x86/kcpuid/cpuid.csv
+++ b/tools/arch/x86/kcpuid/cpuid.csv
@@ -1,451 +1,1053 @@
-# The basic row format is:
-# LEAF, SUBLEAF, register_name, bits, short_name, long_description
-
-# Leaf 00H
-         0,    0,  EAX,   31:0, max_basic_leafs, Max input value for supported subleafs
-
-# Leaf 01H
-         1,    0,  EAX,    3:0, stepping, Stepping ID
-         1,    0,  EAX,    7:4, model, Model
-         1,    0,  EAX,   11:8, family, Family ID
-         1,    0,  EAX,  13:12, processor, Processor Type
-         1,    0,  EAX,  19:16, model_ext, Extended Model ID
-         1,    0,  EAX,  27:20, family_ext, Extended Family ID
-
-         1,    0,  EBX,    7:0, brand, Brand Index
-         1,    0,  EBX,   15:8, clflush_size, CLFLUSH line size (value * 8) in bytes
-         1,    0,  EBX,  23:16, max_cpu_id, Maxim number of addressable logic cpu in this package
-         1,    0,  EBX,  31:24, apic_id, Initial APIC ID
-
-         1,    0,  ECX,      0, sse3, Streaming SIMD Extensions 3(SSE3)
-         1,    0,  ECX,      1, pclmulqdq, PCLMULQDQ instruction supported
-         1,    0,  ECX,      2, dtes64, DS area uses 64-bit layout
-         1,    0,  ECX,      3, mwait, MONITOR/MWAIT supported
-         1,    0,  ECX,      4, ds_cpl, CPL Qualified Debug Store which allows for branch message storage qualified by CPL
-         1,    0,  ECX,      5, vmx, Virtual Machine Extensions supported
-         1,    0,  ECX,      6, smx, Safer Mode Extension supported
-         1,    0,  ECX,      7, eist, Enhanced Intel SpeedStep Technology
-         1,    0,  ECX,      8, tm2, Thermal Monitor 2
-         1,    0,  ECX,      9, ssse3, Supplemental Streaming SIMD Extensions 3 (SSSE3)
-         1,    0,  ECX,     10, l1_ctx_id, L1 data cache could be set to either adaptive mode or shared mode (check IA32_MISC_ENABLE bit 24 definition)
-         1,    0,  ECX,     11, sdbg, IA32_DEBUG_INTERFACE MSR for silicon debug supported
-         1,    0,  ECX,     12, fma, FMA extensions using YMM state supported
-         1,    0,  ECX,     13, cmpxchg16b, 'CMPXCHG16B - Compare and Exchange Bytes' supported
-         1,    0,  ECX,     14, xtpr_update, xTPR Update Control supported
-         1,    0,  ECX,     15, pdcm, Perfmon and Debug Capability present
-         1,    0,  ECX,     17, pcid, Process-Context Identifiers feature present
-         1,    0,  ECX,     18, dca, Prefetching data from a memory mapped device supported
-         1,    0,  ECX,     19, sse4_1, SSE4.1 feature present
-         1,    0,  ECX,     20, sse4_2, SSE4.2 feature present
-         1,    0,  ECX,     21, x2apic, x2APIC supported
-         1,    0,  ECX,     22, movbe, MOVBE instruction supported
-         1,    0,  ECX,     23, popcnt, POPCNT instruction supported
-         1,    0,  ECX,     24, tsc_deadline_timer, LAPIC supports one-shot operation using a TSC deadline value
-         1,    0,  ECX,     25, aesni, AESNI instruction supported
-         1,    0,  ECX,     26, xsave, XSAVE/XRSTOR processor extended states (XSETBV/XGETBV/XCR0)
-         1,    0,  ECX,     27, osxsave, OS has set CR4.OSXSAVE bit to enable XSETBV/XGETBV/XCR0
-         1,    0,  ECX,     28, avx, AVX instruction supported
-         1,    0,  ECX,     29, f16c, 16-bit floating-point conversion instruction supported
-         1,    0,  ECX,     30, rdrand, RDRAND instruction supported
-
-         1,    0,  EDX,      0, fpu, x87 FPU on chip
-         1,    0,  EDX,      1, vme, Virtual-8086 Mode Enhancement
-         1,    0,  EDX,      2, de, Debugging Extensions
-         1,    0,  EDX,      3, pse, Page Size Extensions
-         1,    0,  EDX,      4, tsc, Time Stamp Counter
-         1,    0,  EDX,      5, msr, RDMSR and WRMSR Support
-         1,    0,  EDX,      6, pae, Physical Address Extensions
-         1,    0,  EDX,      7, mce, Machine Check Exception
-         1,    0,  EDX,      8, cx8, CMPXCHG8B instr
-         1,    0,  EDX,      9, apic, APIC on Chip
-         1,    0,  EDX,     11, sep, SYSENTER and SYSEXIT instrs
-         1,    0,  EDX,     12, mtrr, Memory Type Range Registers
-         1,    0,  EDX,     13, pge, Page Global Bit
-         1,    0,  EDX,     14, mca, Machine Check Architecture
-         1,    0,  EDX,     15, cmov, Conditional Move Instrs
-         1,    0,  EDX,     16, pat, Page Attribute Table
-         1,    0,  EDX,     17, pse36, 36-Bit Page Size Extension
-         1,    0,  EDX,     18, psn, Processor Serial Number
-         1,    0,  EDX,     19, clflush, CLFLUSH instr
-#         1,    0,  EDX,     20,
-         1,    0,  EDX,     21, ds, Debug Store
-         1,    0,  EDX,     22, acpi, Thermal Monitor and Software Controlled Clock Facilities
-         1,    0,  EDX,     23, mmx, Intel MMX Technology
-         1,    0,  EDX,     24, fxsr, XSAVE and FXRSTOR Instrs
-         1,    0,  EDX,     25, sse, SSE
-         1,    0,  EDX,     26, sse2, SSE2
-         1,    0,  EDX,     27, ss, Self Snoop
-         1,    0,  EDX,     28, hit, Max APIC IDs
-         1,    0,  EDX,     29, tm, Thermal Monitor
-#         1,    0,  EDX,     30,
-         1,    0,  EDX,     31, pbe, Pending Break Enable
-
-# Leaf 02H
-# cache and TLB descriptor info
-
-# Leaf 03H
-# Precessor Serial Number, introduced on Pentium III, not valid for
-# latest models
-
-# Leaf 04H
-# thread/core and cache topology
-         4,    0,  EAX,    4:0, cache_type, Cache type like instr/data or unified
-         4,    0,  EAX,    7:5, cache_level, Cache Level (starts at 1)
-         4,    0,  EAX,      8, cache_self_init, Cache Self Initialization
-         4,    0,  EAX,      9, fully_associate, Fully Associative cache
-#         4,    0,  EAX,  13:10, resvd, resvd
-         4,    0,  EAX,  25:14, max_logical_id, Max number of addressable IDs for logical processors sharing the cache
-         4,    0,  EAX,  31:26, max_phy_id, Max number of addressable IDs for processors in phy package
-
-         4,    0,  EBX,   11:0, cache_linesize, Size of a cache line in bytes
-         4,    0,  EBX,  21:12, cache_partition, Physical Line partitions
-         4,    0,  EBX,  31:22, cache_ways, Ways of associativity
-         4,    0,  ECX,   31:0, cache_sets, Number of Sets - 1
-         4,    0,  EDX,      0, c_wbinvd, 1 means WBINVD/INVD is not ganranteed to act upon lower level caches of non-originating threads sharing this cache
-         4,    0,  EDX,      1, c_incl, Whether cache is inclusive of lower cache level
-         4,    0,  EDX,      2, c_comp_index, Complex Cache Indexing
-
-# Leaf 05H
-# MONITOR/MWAIT
-	 5,    0,  EAX,   15:0, min_mon_size, Smallest monitor line size in bytes
-	 5,    0,  EBX,   15:0, max_mon_size, Largest monitor line size in bytes
-	 5,    0,  ECX,      0, mwait_ext, Enum of Monitor-Mwait extensions supported
-	 5,    0,  ECX,      1, mwait_irq_break, Largest monitor line size in bytes
-	 5,    0,  EDX,    3:0, c0_sub_stats, Number of C0* sub C-states supported using MWAIT
-	 5,    0,  EDX,    7:4, c1_sub_stats, Number of C1* sub C-states supported using MWAIT
-	 5,    0,  EDX,   11:8, c2_sub_stats, Number of C2* sub C-states supported using MWAIT
-	 5,    0,  EDX,  15:12, c3_sub_stats, Number of C3* sub C-states supported using MWAIT
-	 5,    0,  EDX,  19:16, c4_sub_stats, Number of C4* sub C-states supported using MWAIT
-	 5,    0,  EDX,  23:20, c5_sub_stats, Number of C5* sub C-states supported using MWAIT
-	 5,    0,  EDX,  27:24, c6_sub_stats, Number of C6* sub C-states supported using MWAIT
-	 5,    0,  EDX,  31:28, c7_sub_stats, Number of C7* sub C-states supported using MWAIT
-
-# Leaf 06H
-# Thermal & Power Management
-
-	 6,    0,  EAX,      0, dig_temp, Digital temperature sensor supported
-	 6,    0,  EAX,      1, turbo, Intel Turbo Boost
-	 6,    0,  EAX,      2, arat, Always running APIC timer
-#	 6,    0,  EAX,      3, resv, Reserved
-	 6,    0,  EAX,      4, pln, Power limit notifications supported
-	 6,    0,  EAX,      5, ecmd, Clock modulation duty cycle extension supported
-	 6,    0,  EAX,      6, ptm, Package thermal management supported
-	 6,    0,  EAX,      7, hwp, HWP base register
-	 6,    0,  EAX,      8, hwp_notify, HWP notification
-	 6,    0,  EAX,      9, hwp_act_window, HWP activity window
-	 6,    0,  EAX,     10, hwp_energy, HWP energy performance preference
-	 6,    0,  EAX,     11, hwp_pkg_req, HWP package level request
-#	 6,    0,  EAX,     12, resv, Reserved
-	 6,    0,  EAX,     13, hdc, HDC base registers supported
-	 6,    0,  EAX,     14, turbo3, Turbo Boost Max 3.0
-	 6,    0,  EAX,     15, hwp_cap, Highest Performance change supported
-	 6,    0,  EAX,     16, hwp_peci, HWP PECI override is supported
-	 6,    0,  EAX,     17, hwp_flex, Flexible HWP is supported
-	 6,    0,  EAX,     18, hwp_fast, Fast access mode for the IA32_HWP_REQUEST MSR is supported
-#	 6,    0,  EAX,     19, resv, Reserved
-	 6,    0,  EAX,     20, hwp_ignr, Ignoring Idle Logical Processor HWP request is supported
-
-	 6,    0,  EBX,    3:0, therm_irq_thresh, Number of Interrupt Thresholds in Digital Thermal Sensor
-	 6,    0,  ECX,      0, aperfmperf, Presence of IA32_MPERF and IA32_APERF
-	 6,    0,  ECX,      3, energ_bias, Performance-energy bias preference supported
-
-# Leaf 07H
-#	ECX == 0
-# AVX512 refers to https://en.wikipedia.org/wiki/AVX-512
-# XXX: Do we really need to enumerate each and every AVX512 sub features
-
-	 7,    0,  EBX,      0, fsgsbase, RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE supported
-	 7,    0,  EBX,      1, tsc_adjust, TSC_ADJUST MSR supported
-	 7,    0,  EBX,      2, sgx, Software Guard Extensions
-	 7,    0,  EBX,      3, bmi1, BMI1
-	 7,    0,  EBX,      4, hle, Hardware Lock Elision
-	 7,    0,  EBX,      5, avx2, AVX2
-#	 7,    0,  EBX,      6, fdp_excp_only, x87 FPU Data Pointer updated only on x87 exceptions
-	 7,    0,  EBX,      7, smep, Supervisor-Mode Execution Prevention
-	 7,    0,  EBX,      8, bmi2, BMI2
-	 7,    0,  EBX,      9, rep_movsb, Enhanced REP MOVSB/STOSB
-	 7,    0,  EBX,     10, invpcid, INVPCID instruction
-	 7,    0,  EBX,     11, rtm, Restricted Transactional Memory
-	 7,    0,  EBX,     12, rdt_m, Intel RDT Monitoring capability
-	 7,    0,  EBX,     13, depc_fpu_cs_ds, Deprecates FPU CS and FPU DS
-	 7,    0,  EBX,     14, mpx, Memory Protection Extensions
-	 7,    0,  EBX,     15, rdt_a, Intel RDT Allocation capability
-	 7,    0,  EBX,     16, avx512f, AVX512 Foundation instr
-	 7,    0,  EBX,     17, avx512dq, AVX512 Double and Quadword AVX512 instr
-	 7,    0,  EBX,     18, rdseed, RDSEED instr
-	 7,    0,  EBX,     19, adx, ADX instr
-	 7,    0,  EBX,     20, smap, Supervisor Mode Access Prevention
-	 7,    0,  EBX,     21, avx512ifma, AVX512 Integer Fused Multiply Add
-#	 7,    0,  EBX,     22, resvd, resvd
-	 7,    0,  EBX,     23, clflushopt, CLFLUSHOPT instr
-	 7,    0,  EBX,     24, clwb, CLWB instr
-	 7,    0,  EBX,     25, intel_pt, Intel Processor Trace instr
-	 7,    0,  EBX,     26, avx512pf, Prefetch
-	 7,    0,  EBX,     27, avx512er, AVX512 Exponent Reciproca instr
-	 7,    0,  EBX,     28, avx512cd, AVX512 Conflict Detection instr
-	 7,    0,  EBX,     29, sha, Intel Secure Hash Algorithm Extensions instr
-	 7,    0,  EBX,     30, avx512bw, AVX512 Byte & Word instr
-	 7,    0,  EBX,     31, avx512vl, AVX512 Vector Length Extentions (VL)
-	 7,    0,  ECX,      0, prefetchwt1, X
-	 7,    0,  ECX,      1, avx512vbmi, AVX512 Vector Byte Manipulation Instructions
-	 7,    0,  ECX,      2, umip, User-mode Instruction Prevention
-
-	 7,    0,  ECX,      3, pku, Protection Keys for User-mode pages
-	 7,    0,  ECX,      4, ospke, CR4 PKE set to enable protection keys
-#	 7,    0,  ECX,   16:5, resvd, resvd
-	 7,    0,  ECX,  21:17, mawau, The value of MAWAU used by the BNDLDX and BNDSTX instructions in 64-bit mode
-	 7,    0,  ECX,     22, rdpid, RDPID and IA32_TSC_AUX
-#	 7,    0,  ECX,  29:23, resvd, resvd
-	 7,    0,  ECX,     30, sgx_lc, SGX Launch Configuration
-#	 7,    0,  ECX,     31, resvd, resvd
-
-# Leaf 08H
-#
-
-
-# Leaf 09H
-# Direct Cache Access (DCA) information
-	 9,    0,  ECX,   31:0, dca_cap, The value of IA32_PLATFORM_DCA_CAP
+# SPDX-License-Identifier: CC0-1.0
+# Generator: x86-cpuid-db v1.0
 
-# Leaf 0AH
-# Architectural Performance Monitoring
 #
-# Do we really need to print out the PMU related stuff?
-# Does normal user really care about it?
+# Auto-generated file.
+# Please submit all updates and bugfixes to https://x86-cpuid.org
 #
-       0xA,    0,  EAX,    7:0, pmu_ver, Performance Monitoring Unit version
-       0xA,    0,  EAX,   15:8, pmu_gp_cnt_num, Numer of general-purose PMU counters per logical CPU
-       0xA,    0,  EAX,  23:16, pmu_cnt_bits, Bit wideth of PMU counter
-       0xA,    0,  EAX,  31:24, pmu_ebx_bits, Length of EBX bit vector to enumerate PMU events
-
-       0xA,    0,  EBX,      0, pmu_no_core_cycle_evt, Core cycle event not available
-       0xA,    0,  EBX,      1, pmu_no_instr_ret_evt, Instruction retired event not available
-       0xA,    0,  EBX,      2, pmu_no_ref_cycle_evt, Reference cycles event not available
-       0xA,    0,  EBX,      3, pmu_no_llc_ref_evt, Last-level cache reference event not available
-       0xA,    0,  EBX,      4, pmu_no_llc_mis_evt, Last-level cache misses event not available
-       0xA,    0,  EBX,      5, pmu_no_br_instr_ret_evt, Branch instruction retired event not available
-       0xA,    0,  EBX,      6, pmu_no_br_mispredict_evt, Branch mispredict retired event not available
-
-       0xA,    0,  ECX,    4:0, pmu_fixed_cnt_num, Performance Monitoring Unit version
-       0xA,    0,  ECX,   12:5, pmu_fixed_cnt_bits, Numer of PMU counters per logical CPU
-
-# Leaf 0BH
-# Extended Topology Enumeration Leaf
-#
-
-       0xB,    0,  EAX,    4:0, id_shift, Number of bits to shift right on x2APIC ID to get a unique topology ID of the next level type
-       0xB,    0,  EBX,   15:0, cpu_nr, Number of logical processors at this level type
-       0xB,    0,  ECX,   15:8, lvl_type, 0-Invalid 1-SMT 2-Core
-       0xB,    0,  EDX,   31:0, x2apic_id, x2APIC ID the current logical processor
-
-
-# Leaf 0DH
-# Processor Extended State
 
-       0xD,    0,  EAX,      0, x87, X87 state
-       0xD,    0,  EAX,      1, sse, SSE state
-       0xD,    0,  EAX,      2, avx, AVX state
-       0xD,    0,  EAX,    4:3, mpx, MPX state
-       0xD,    0,  EAX,    7:5, avx512, AVX-512 state
-       0xD,    0,  EAX,      9, pkru, PKRU state
-
-       0xD,    0,  EBX,   31:0, max_sz_xcr0, Maximum size (bytes) required by enabled features in XCR0
-       0xD,    0,  ECX,   31:0, max_sz_xsave, Maximum size (bytes) of the XSAVE/XRSTOR save area
-
-       0xD,    1,  EAX,      0, xsaveopt, XSAVEOPT available
-       0xD,    1,  EAX,      1, xsavec, XSAVEC and compacted form supported
-       0xD,    1,  EAX,      2, xgetbv, XGETBV supported
-       0xD,    1,  EAX,      3, xsaves, XSAVES/XRSTORS and IA32_XSS supported
-
-       0xD,    1,  EBX,   31:0, max_sz_xcr0, Maximum size (bytes) required by enabled features in XCR0
-       0xD,    1,  ECX,      8, pt, PT state
-       0xD,    1,  ECX,      11, cet_usr, CET user state
-       0xD,    1,  ECX,      12, cet_supv, CET supervisor state
-       0xD,    1,  ECX,      13, hdc, HDC state
-       0xD,    1,  ECX,      16, hwp, HWP state
-
-# Leaf 0FH
-# Intel RDT Monitoring
-
-       0xF,    0,  EBX,   31:0, rmid_range, Maximum range (zero-based) of RMID within this physical processor of all types
-       0xF,    0,  EDX,      1, l3c_rdt_mon, L3 Cache RDT Monitoring supported
-
-       0xF,    1,  ECX,   31:0, rmid_range, Maximum range (zero-based) of RMID of this types
-       0xF,    1,  EDX,      0, l3c_ocp_mon, L3 Cache occupancy Monitoring supported
-       0xF,    1,  EDX,      1, l3c_tbw_mon, L3 Cache Total Bandwidth Monitoring supported
-       0xF,    1,  EDX,      2, l3c_lbw_mon, L3 Cache Local Bandwidth Monitoring supported
+# The basic row format is:
+#     LEAF, SUBLEAVES,  reg,    bits,    short_name             , long_description
+
+# Leaf 0H
+# Maximum standard leaf number + CPU vendor string
+
+         0,         0,  eax,    31:0,    max_std_leaf           , Highest cpuid standard leaf supported
+         0,         0,  ebx,    31:0,    cpu_vendorid_0         , CPU vendor ID string bytes 0 - 3
+         0,         0,  ecx,    31:0,    cpu_vendorid_2         , CPU vendor ID string bytes 8 - 11
+         0,         0,  edx,    31:0,    cpu_vendorid_1         , CPU vendor ID string bytes 4 - 7
+
+# Leaf 1H
+# CPU FMS (Family/Model/Stepping) + standard feature flags
+
+         1,         0,  eax,     3:0,    stepping               , Stepping ID
+         1,         0,  eax,     7:4,    base_model             , Base CPU model ID
+         1,         0,  eax,    11:8,    base_family_id         , Base CPU family ID
+         1,         0,  eax,   13:12,    cpu_type               , CPU type
+         1,         0,  eax,   19:16,    ext_model              , Extended CPU model ID
+         1,         0,  eax,   27:20,    ext_family             , Extended CPU family ID
+         1,         0,  ebx,     7:0,    brand_id               , Brand index
+         1,         0,  ebx,    15:8,    clflush_size           , CLFLUSH instruction cache line size
+         1,         0,  ebx,   23:16,    n_logical_cpu          , Logical CPU (HW threads) count
+         1,         0,  ebx,   31:24,    local_apic_id          , Initial local APIC physical ID
+         1,         0,  ecx,       0,    pni                    , Streaming SIMD Extensions 3 (SSE3)
+         1,         0,  ecx,       1,    pclmulqdq              , PCLMULQDQ instruction support
+         1,         0,  ecx,       2,    dtes64                 , 64-bit DS save area
+         1,         0,  ecx,       3,    monitor                , MONITOR/MWAIT support
+         1,         0,  ecx,       4,    ds_cpl                 , CPL Qualified Debug Store
+         1,         0,  ecx,       5,    vmx                    , Virtual Machine Extensions
+         1,         0,  ecx,       6,    smx                    , Safer Mode Extensions
+         1,         0,  ecx,       7,    est                    , Enhanced Intel SpeedStep
+         1,         0,  ecx,       8,    tm2                    , Thermal Monitor 2
+         1,         0,  ecx,       9,    ssse3                  , Supplemental SSE3
+         1,         0,  ecx,      10,    cid                    , L1 Context ID
+         1,         0,  ecx,      11,    sdbg                   , Sillicon Debug
+         1,         0,  ecx,      12,    fma                    , FMA extensions using YMM state
+         1,         0,  ecx,      13,    cx16                   , CMPXCHG16B instruction support
+         1,         0,  ecx,      14,    xtpr                   , xTPR Update Control
+         1,         0,  ecx,      15,    pdcm                   , Perfmon and Debug Capability
+         1,         0,  ecx,      17,    pcid                   , Process-context identifiers
+         1,         0,  ecx,      18,    dca                    , Direct Cache Access
+         1,         0,  ecx,      19,    sse4_1                 , SSE4.1
+         1,         0,  ecx,      20,    sse4_2                 , SSE4.2
+         1,         0,  ecx,      21,    x2apic                 , X2APIC support
+         1,         0,  ecx,      22,    movbe                  , MOVBE instruction support
+         1,         0,  ecx,      23,    popcnt                 , POPCNT instruction support
+         1,         0,  ecx,      24,    tsc_deadline_timer     , APIC timer one-shot operation
+         1,         0,  ecx,      25,    aes                    , AES instructions
+         1,         0,  ecx,      26,    xsave                  , XSAVE (and related instructions) support
+         1,         0,  ecx,      27,    osxsave                , XSAVE (and related instructions) are enabled by OS
+         1,         0,  ecx,      28,    avx                    , AVX instructions support
+         1,         0,  ecx,      29,    f16c                   , Half-precision floating-point conversion support
+         1,         0,  ecx,      30,    rdrand                 , RDRAND instruction support
+         1,         0,  ecx,      31,    guest_status           , System is running as guest; (para-)virtualized system
+         1,         0,  edx,       0,    fpu                    , Floating-Point Unit on-chip (x87)
+         1,         0,  edx,       1,    vme                    , Virtual-8086 Mode Extensions
+         1,         0,  edx,       2,    de                     , Debugging Extensions
+         1,         0,  edx,       3,    pse                    , Page Size Extension
+         1,         0,  edx,       4,    tsc                    , Time Stamp Counter
+         1,         0,  edx,       5,    msr                    , Model-Specific Registers (RDMSR and WRMSR support)
+         1,         0,  edx,       6,    pae                    , Physical Address Extensions
+         1,         0,  edx,       7,    mce                    , Machine Check Exception
+         1,         0,  edx,       8,    cx8                    , CMPXCHG8B instruction
+         1,         0,  edx,       9,    apic                   , APIC on-chip
+         1,         0,  edx,      11,    sep                    , SYSENTER, SYSEXIT, and associated MSRs
+         1,         0,  edx,      12,    mtrr                   , Memory Type Range Registers
+         1,         0,  edx,      13,    pge                    , Page Global Extensions
+         1,         0,  edx,      14,    mca                    , Machine Check Architecture
+         1,         0,  edx,      15,    cmov                   , Conditional Move Instruction
+         1,         0,  edx,      16,    pat                    , Page Attribute Table
+         1,         0,  edx,      17,    pse36                  , Page Size Extension (36-bit)
+         1,         0,  edx,      18,    pn                     , Processor Serial Number
+         1,         0,  edx,      19,    clflush                , CLFLUSH instruction
+         1,         0,  edx,      21,    dts                    , Debug Store
+         1,         0,  edx,      22,    acpi                   , Thermal monitor and clock control
+         1,         0,  edx,      23,    mmx                    , MMX instructions
+         1,         0,  edx,      24,    fxsr                   , FXSAVE and FXRSTOR instructions
+         1,         0,  edx,      25,    sse                    , SSE instructions
+         1,         0,  edx,      26,    sse2                   , SSE2 instructions
+         1,         0,  edx,      27,    ss                     , Self Snoop
+         1,         0,  edx,      28,    ht                     , Hyper-threading
+         1,         0,  edx,      29,    tm                     , Thermal Monitor
+         1,         0,  edx,      30,    ia64                   , Legacy IA-64 (Itanium) support bit, now resreved
+         1,         0,  edx,      31,    pbe                    , Pending Break Enable
+
+# Leaf 2H
+# Intel cache and TLB information one-byte descriptors
+
+         2,         0,  eax,     7:0,    iteration_count        , Number of times this CPUD leaf must be queried
+         2,         0,  eax,    15:8,    desc1                  , Descriptor #1
+         2,         0,  eax,   23:16,    desc2                  , Descriptor #2
+         2,         0,  eax,   30:24,    desc3                  , Descriptor #3
+         2,         0,  eax,      31,    eax_invalid            , Descriptors 1-3 are invalid if set
+         2,         0,  ebx,     7:0,    desc4                  , Descriptor #4
+         2,         0,  ebx,    15:8,    desc5                  , Descriptor #5
+         2,         0,  ebx,   23:16,    desc6                  , Descriptor #6
+         2,         0,  ebx,   30:24,    desc7                  , Descriptor #7
+         2,         0,  ebx,      31,    ebx_invalid            , Descriptors 4-7 are invalid if set
+         2,         0,  ecx,     7:0,    desc8                  , Descriptor #8
+         2,         0,  ecx,    15:8,    desc9                  , Descriptor #9
+         2,         0,  ecx,   23:16,    desc10                 , Descriptor #10
+         2,         0,  ecx,   30:24,    desc11                 , Descriptor #11
+         2,         0,  ecx,      31,    ecx_invalid            , Descriptors 8-11 are invalid if set
+         2,         0,  edx,     7:0,    desc12                 , Descriptor #12
+         2,         0,  edx,    15:8,    desc13                 , Descriptor #13
+         2,         0,  edx,   23:16,    desc14                 , Descriptor #14
+         2,         0,  edx,   30:24,    desc15                 , Descriptor #15
+         2,         0,  edx,      31,    edx_invalid            , Descriptors 12-15 are invalid if set
+
+# Leaf 4H
+# Intel deterministic cache parameters
+
+         4,      31:0,  eax,     4:0,    cache_type             , Cache type field
+         4,      31:0,  eax,     7:5,    cache_level            , Cache level (1-based)
+         4,      31:0,  eax,       8,    cache_self_init        , Self-initialializing cache level
+         4,      31:0,  eax,       9,    fully_associative      , Fully-associative cache
+         4,      31:0,  eax,   25:14,    num_threads_sharing    , Number logical CPUs sharing this cache
+         4,      31:0,  eax,   31:26,    num_cores_on_die       , Number of cores in the physical package
+         4,      31:0,  ebx,    11:0,    cache_linesize         , System coherency line size (0-based)
+         4,      31:0,  ebx,   21:12,    cache_npartitions      , Physical line partitions (0-based)
+         4,      31:0,  ebx,   31:22,    cache_nways            , Ways of associativity (0-based)
+         4,      31:0,  ecx,    30:0,    cache_nsets            , Cache number of sets (0-based)
+         4,      31:0,  edx,       0,    wbinvd_rll_no_guarantee, WBINVD/INVD not guaranteed for Remote Lower-Level caches
+         4,      31:0,  edx,       1,    ll_inclusive           , Cache is inclusive of Lower-Level caches
+         4,      31:0,  edx,       2,    complex_indexing       , Not a direct-mapped cache (complex function)
+
+# Leaf 5H
+# MONITOR/MWAIT instructions enumeration
+
+         5,         0,  eax,    15:0,    min_mon_size           , Smallest monitor-line size, in bytes
+         5,         0,  ebx,    15:0,    max_mon_size           , Largest monitor-line size, in bytes
+         5,         0,  ecx,       0,    mwait_ext              , Enumeration of MONITOR/MWAIT extensions is supported
+         5,         0,  ecx,       1,    mwait_irq_break        , Interrupts as a break-event for MWAIT is supported
+         5,         0,  edx,     3:0,    n_c0_substates         , Number of C0 sub C-states supported using MWAIT
+         5,         0,  edx,     7:4,    n_c1_substates         , Number of C1 sub C-states supported using MWAIT
+         5,         0,  edx,    11:8,    n_c2_substates         , Number of C2 sub C-states supported using MWAIT
+         5,         0,  edx,   15:12,    n_c3_substates         , Number of C3 sub C-states supported using MWAIT
+         5,         0,  edx,   19:16,    n_c4_substates         , Number of C4 sub C-states supported using MWAIT
+         5,         0,  edx,   23:20,    n_c5_substates         , Number of C5 sub C-states supported using MWAIT
+         5,         0,  edx,   27:24,    n_c6_substates         , Number of C6 sub C-states supported using MWAIT
+         5,         0,  edx,   31:28,    n_c7_substates         , Number of C7 sub C-states supported using MWAIT
+
+# Leaf 6H
+# Thermal and Power Management enumeration
+
+         6,         0,  eax,       0,    dtherm                 , Digital temprature sensor
+         6,         0,  eax,       1,    turbo_boost            , Intel Turbo Boost
+         6,         0,  eax,       2,    arat                   , Always-Running APIC Timer (not affected by p-state)
+         6,         0,  eax,       4,    pln                    , Power Limit Notification (PLN) event
+         6,         0,  eax,       5,    ecmd                   , Clock modulation duty cycle extension
+         6,         0,  eax,       6,    pts                    , Package thermal management
+         6,         0,  eax,       7,    hwp                    , HWP (Hardware P-states) base registers are supported
+         6,         0,  eax,       8,    hwp_notify             , HWP notification (IA32_HWP_INTERRUPT MSR)
+         6,         0,  eax,       9,    hwp_act_window         , HWP activity window (IA32_HWP_REQUEST[bits 41:32]) supported
+         6,         0,  eax,      10,    hwp_epp                , HWP Energy Performance Preference
+         6,         0,  eax,      11,    hwp_pkg_req            , HWP Package Level Request
+         6,         0,  eax,      13,    hdc_base_regs          , HDC base registers are supported
+         6,         0,  eax,      14,    turbo_boost_3_0        , Intel Turbo Boost Max 3.0
+         6,         0,  eax,      15,    hwp_capabilities       , HWP Highest Performance change
+         6,         0,  eax,      16,    hwp_peci_override      , HWP PECI override
+         6,         0,  eax,      17,    hwp_flexible           , Flexible HWP
+         6,         0,  eax,      18,    hwp_fast               , IA32_HWP_REQUEST MSR fast access mode
+         6,         0,  eax,      19,    hfi                    , HW_FEEDBACK MSRs supported
+         6,         0,  eax,      20,    hwp_ignore_idle        , Ignoring idle logical CPU HWP req is supported
+         6,         0,  eax,      23,    thread_director        , Intel thread director support
+         6,         0,  eax,      24,    therm_interrupt_bit25  , IA32_THERM_INTERRUPT MSR bit 25 is supported
+         6,         0,  ebx,     3:0,    n_therm_thresholds     , Digital thermometer thresholds
+         6,         0,  ecx,       0,    aperfmperf             , MPERF/APERF MSRs (effective frequency interface)
+         6,         0,  ecx,       3,    epb                    , IA32_ENERGY_PERF_BIAS MSR support
+         6,         0,  ecx,    15:8,    thrd_director_nclasses , Number of classes, Intel thread director
+         6,         0,  edx,       0,    perfcap_reporting      , Performance capability reporting
+         6,         0,  edx,       1,    encap_reporting        , Energy efficiency capability reporting
+         6,         0,  edx,    11:8,    feedback_sz            , HW feedback interface struct size, in 4K pages
+         6,         0,  edx,   31:16,    this_lcpu_hwfdbk_idx   , This logical CPU index @ HW feedback struct, 0-based
+
+# Leaf 7H
+# Extended CPU features enumeration
+
+         7,         0,  eax,    31:0,    leaf7_n_subleaves      , Number of cpuid 0x7 subleaves
+         7,         0,  ebx,       0,    fsgsbase               , FSBASE/GSBASE read/write support
+         7,         0,  ebx,       1,    tsc_adjust             , IA32_TSC_ADJUST MSR supported
+         7,         0,  ebx,       2,    sgx                    , Intel SGX (Software Guard Extensions)
+         7,         0,  ebx,       3,    bmi1                   , Bit manipulation extensions group 1
+         7,         0,  ebx,       4,    hle                    , Hardware Lock Elision
+         7,         0,  ebx,       5,    avx2                   , AVX2 instruction set
+         7,         0,  ebx,       6,    fdp_excptn_only        , FPU Data Pointer updated only on x87 exceptions
+         7,         0,  ebx,       7,    smep                   , Supervisor Mode Execution Protection
+         7,         0,  ebx,       8,    bmi2                   , Bit manipulation extensions group 2
+         7,         0,  ebx,       9,    erms                   , Enhanced REP MOVSB/STOSB
+         7,         0,  ebx,      10,    invpcid                , INVPCID instruction (Invalidate Processor Context ID)
+         7,         0,  ebx,      11,    rtm                    , Intel restricted transactional memory
+         7,         0,  ebx,      12,    cqm                    , Intel RDT-CMT / AMD Platform-QoS cache monitoring
+         7,         0,  ebx,      13,    zero_fcs_fds           , Deprecated FPU CS/DS (stored as zero)
+         7,         0,  ebx,      14,    mpx                    , Intel memory protection extensions
+         7,         0,  ebx,      15,    rdt_a                  , Intel RDT / AMD Platform-QoS Enforcemeent
+         7,         0,  ebx,      16,    avx512f                , AVX-512 foundation instructions
+         7,         0,  ebx,      17,    avx512dq               , AVX-512 double/quadword instructions
+         7,         0,  ebx,      18,    rdseed                 , RDSEED instruction
+         7,         0,  ebx,      19,    adx                    , ADCX/ADOX instructions
+         7,         0,  ebx,      20,    smap                   , Supervisor mode access prevention
+         7,         0,  ebx,      21,    avx512ifma             , AVX-512 integer fused multiply add
+         7,         0,  ebx,      23,    clflushopt             , CLFLUSHOPT instruction
+         7,         0,  ebx,      24,    clwb                   , CLWB instruction
+         7,         0,  ebx,      25,    intel_pt               , Intel processor trace
+         7,         0,  ebx,      26,    avx512pf               , AVX-512 prefetch instructions
+         7,         0,  ebx,      27,    avx512er               , AVX-512 exponent/reciprocal instrs
+         7,         0,  ebx,      28,    avx512cd               , AVX-512 conflict detection instrs
+         7,         0,  ebx,      29,    sha_ni                 , SHA/SHA256 instructions
+         7,         0,  ebx,      30,    avx512bw               , AVX-512 BW (byte/word granular) instructions
+         7,         0,  ebx,      31,    avx512vl               , AVX-512 VL (128/256 vector length) extensions
+         7,         0,  ecx,       0,    prefetchwt1            , PREFETCHWT1 (Intel Xeon Phi only)
+         7,         0,  ecx,       1,    avx512vbmi             , AVX-512 Vector byte manipulation instrs
+         7,         0,  ecx,       2,    umip                   , User mode instruction protection
+         7,         0,  ecx,       3,    pku                    , Protection keys for user-space
+         7,         0,  ecx,       4,    ospke                  , OS protection keys enable
+         7,         0,  ecx,       5,    waitpkg                , WAITPKG instructions
+         7,         0,  ecx,       6,    avx512_vbmi2           , AVX-512 vector byte manipulation instrs group 2
+         7,         0,  ecx,       7,    cet_ss                 , CET shadow stack features
+         7,         0,  ecx,       8,    gfni                   , Galois field new instructions
+         7,         0,  ecx,       9,    vaes                   , Vector AES instrs
+         7,         0,  ecx,      10,    vpclmulqdq             , VPCLMULQDQ 256-bit instruction support
+         7,         0,  ecx,      11,    avx512_vnni            , Vector neural network instructions
+         7,         0,  ecx,      12,    avx512_bitalg          , AVX-512 bit count/shiffle
+         7,         0,  ecx,      13,    tme                    , Intel total memory encryption
+         7,         0,  ecx,      14,    avx512_vpopcntdq       , AVX-512: POPCNT for vectors of DW/QW
+         7,         0,  ecx,      16,    la57                   , 57-bit linear addreses (five-level paging)
+         7,         0,  ecx,   21:17,    mawau_val_lm           , BNDLDX/BNDSTX MAWAU value in 64-bit mode
+         7,         0,  ecx,      22,    rdpid                  , RDPID instruction
+         7,         0,  ecx,      23,    key_locker             , Intel key locker support
+         7,         0,  ecx,      24,    bus_lock_detect        , OS bus-lock detection
+         7,         0,  ecx,      25,    cldemote               , CLDEMOTE instruction
+         7,         0,  ecx,      27,    movdiri                , MOVDIRI instruction
+         7,         0,  ecx,      28,    movdir64b              , MOVDIR64B instruction
+         7,         0,  ecx,      29,    enqcmd                 , Enqueue stores supported (ENQCMD{,S})
+         7,         0,  ecx,      30,    sgx_lc                 , Intel SGX launch configuration
+         7,         0,  ecx,      31,    pks                    , Protection keys for supervisor-mode pages
+         7,         0,  edx,       1,    sgx_keys               , Intel SGX attestation services
+         7,         0,  edx,       2,    avx512_4vnniw          , AVX-512 neural network instructions
+         7,         0,  edx,       3,    avx512_4fmaps          , AVX-512 multiply accumulation single precision
+         7,         0,  edx,       4,    fsrm                   , Fast short REP MOV
+         7,         0,  edx,       5,    uintr                  , CPU supports user interrupts
+         7,         0,  edx,       8,    avx512_vp2intersect    , VP2INTERSECT{D,Q} instructions
+         7,         0,  edx,       9,    srdbs_ctrl             , SRBDS mitigation MSR available
+         7,         0,  edx,      10,    md_clear               , VERW MD_CLEAR microcode support
+         7,         0,  edx,      11,    rtm_always_abort       , XBEGIN (RTM transaction) always aborts
+         7,         0,  edx,      13,    tsx_force_abort        , MSR TSX_FORCE_ABORT, RTM_ABORT bit, supported
+         7,         0,  edx,      14,    serialize              , SERIALIZE instruction
+         7,         0,  edx,      15,    hybrid_cpu             , The CPU is identified as a 'hybrid part'
+         7,         0,  edx,      16,    tsxldtrk               , TSX suspend/resume load address tracking
+         7,         0,  edx,      18,    pconfig                , PCONFIG instruction
+         7,         0,  edx,      19,    arch_lbr               , Intel architectural LBRs
+         7,         0,  edx,      20,    ibt                    , CET indirect branch tracking
+         7,         0,  edx,      22,    amx_bf16               , AMX-BF16: tile bfloat16 support
+         7,         0,  edx,      23,    avx512_fp16            , AVX-512 FP16 instructions
+         7,         0,  edx,      24,    amx_tile               , AMX-TILE: tile architecture support
+         7,         0,  edx,      25,    amx_int8               , AMX-INT8: tile 8-bit integer support
+         7,         0,  edx,      26,    spec_ctrl              , Speculation Control (IBRS/IBPB: indirect branch restrictions)
+         7,         0,  edx,      27,    intel_stibp            , Single thread indirect branch predictors
+         7,         0,  edx,      28,    flush_l1d              , FLUSH L1D cache: IA32_FLUSH_CMD MSR
+         7,         0,  edx,      29,    arch_capabilities      , Intel IA32_ARCH_CAPABILITIES MSR
+         7,         0,  edx,      30,    core_capabilities      , IA32_CORE_CAPABILITIES MSR
+         7,         0,  edx,      31,    spec_ctrl_ssbd         , Speculative store bypass disable
+         7,         1,  eax,       4,    avx_vnni               , AVX-VNNI instructions
+         7,         1,  eax,       5,    avx512_bf16            , AVX-512 bFloat16 instructions
+         7,         1,  eax,       6,    lass                   , Linear address space separation
+         7,         1,  eax,       7,    cmpccxadd              , CMPccXADD instructions
+         7,         1,  eax,       8,    arch_perfmon_ext       , ArchPerfmonExt: CPUID leaf 0x23 is supported
+         7,         1,  eax,      10,    fzrm                   , Fast zero-length REP MOVSB
+         7,         1,  eax,      11,    fsrs                   , Fast short REP STOSB
+         7,         1,  eax,      12,    fsrc                   , Fast Short REP CMPSB/SCASB
+         7,         1,  eax,      17,    fred                   , FRED: Flexible return and event delivery transitions
+         7,         1,  eax,      18,    lkgs                   , LKGS: Load 'kernel' (userspace) GS
+         7,         1,  eax,      19,    wrmsrns                , WRMSRNS instr (WRMSR-non-serializing)
+         7,         1,  eax,      21,    amx_fp16               , AMX-FP16: FP16 tile operations
+         7,         1,  eax,      22,    hreset                 , History reset support
+         7,         1,  eax,      23,    avx_ifma               , Integer fused multiply add
+         7,         1,  eax,      26,    lam                    , Linear address masking
+         7,         1,  eax,      27,    rd_wr_msrlist          , RDMSRLIST/WRMSRLIST instructions
+         7,         1,  ebx,       0,    intel_ppin             , Protected processor inventory number (PPIN{,_CTL} MSRs)
+         7,         1,  edx,       4,    avx_vnni_int8          , AVX-VNNI-INT8 instructions
+         7,         1,  edx,       5,    avx_ne_convert         , AVX-NE-CONVERT instructions
+         7,         1,  edx,       8,    amx_complex            , AMX-COMPLEX instructions (starting from Granite Rapids)
+         7,         1,  edx,      14,    prefetchit_0_1         , PREFETCHIT0/1 instructions
+         7,         1,  edx,      18,    cet_sss                , CET supervisor shadow stacks safe to use
+         7,         2,  edx,       0,    intel_psfd             , Intel predictive store forward disable
+         7,         2,  edx,       1,    ipred_ctrl             , MSR bits IA32_SPEC_CTRL.IPRED_DIS_{U,S}
+         7,         2,  edx,       2,    rrsba_ctrl             , MSR bits IA32_SPEC_CTRL.RRSBA_DIS_{U,S}
+         7,         2,  edx,       3,    ddp_ctrl               , MSR bit  IA32_SPEC_CTRL.DDPD_U
+         7,         2,  edx,       4,    bhi_ctrl               , MSR bit  IA32_SPEC_CTRL.BHI_DIS_S
+         7,         2,  edx,       5,    mcdt_no                , MCDT mitigation not needed
+         7,         2,  edx,       6,    uclock_disable         , UC-lock disable is supported
+
+# Leaf 9H
+# Intel DCA (Direct Cache Access) enumeration
+
+         9,         0,  eax,       0,    dca_enabled_in_bios    , DCA is enabled in BIOS
+
+# Leaf AH
+# Intel PMU (Performance Monitoring Unit) enumeration
+
+       0xa,         0,  eax,     7:0,    pmu_version            , Performance monitoring unit version ID
+       0xa,         0,  eax,    15:8,    pmu_n_gcounters        , Number of general PMU counters per logical CPU
+       0xa,         0,  eax,   23:16,    pmu_gcounters_nbits    , Bitwidth of PMU general counters
+       0xa,         0,  eax,   31:24,    pmu_cpuid_ebx_bits     , Length of cpuid leaf 0xa EBX bit vector
+       0xa,         0,  ebx,       0,    no_core_cycle_evt      , Core cycle event not available
+       0xa,         0,  ebx,       1,    no_insn_retired_evt    , Instruction retired event not available
+       0xa,         0,  ebx,       2,    no_refcycle_evt        , Reference cycles event not available
+       0xa,         0,  ebx,       3,    no_llc_ref_evt         , LLC-reference event not available
+       0xa,         0,  ebx,       4,    no_llc_miss_evt        , LLC-misses event not available
+       0xa,         0,  ebx,       5,    no_br_insn_ret_evt     , Branch instruction retired event not available
+       0xa,         0,  ebx,       6,    no_br_mispredict_evt   , Branch mispredict retired event not available
+       0xa,         0,  ebx,       7,    no_td_slots_evt        , Topdown slots event not available
+       0xa,         0,  ecx,    31:0,    pmu_fcounters_bitmap   , Fixed-function PMU counters support bitmap
+       0xa,         0,  edx,     4:0,    pmu_n_fcounters        , Number of fixed PMU counters
+       0xa,         0,  edx,    12:5,    pmu_fcounters_nbits    , Bitwidth of PMU fixed counters
+       0xa,         0,  edx,      15,    anythread_depr         , AnyThread deprecation
+
+# Leaf BH
+# CPUs v1 extended topology enumeration
+
+       0xb,       1:0,  eax,     4:0,    x2apic_id_shift        , Bit width of this level (previous levels inclusive)
+       0xb,       1:0,  ebx,    15:0,    domain_lcpus_count     , Logical CPUs count across all instances of this domain
+       0xb,       1:0,  ecx,     7:0,    domain_nr              , This domain level (subleaf ID)
+       0xb,       1:0,  ecx,    15:8,    domain_type            , This domain type
+       0xb,       1:0,  edx,    31:0,    x2apic_id              , x2APIC ID of current logical CPU
+
+# Leaf DH
+# Processor extended state enumeration
+
+       0xd,         0,  eax,       0,    xcr0_x87               , XCR0.X87 (bit 0) supported
+       0xd,         0,  eax,       1,    xcr0_sse               , XCR0.SEE (bit 1) supported
+       0xd,         0,  eax,       2,    xcr0_avx               , XCR0.AVX (bit 2) supported
+       0xd,         0,  eax,       3,    xcr0_mpx_bndregs       , XCR0.BNDREGS (bit 3) supported (MPX BND0-BND3 regs)
+       0xd,         0,  eax,       4,    xcr0_mpx_bndcsr        , XCR0.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS regs)
+       0xd,         0,  eax,       5,    xcr0_avx512_opmask     , XCR0.OPMASK (bit 5) supported (AVX-512 k0-k7 regs)
+       0xd,         0,  eax,       6,    xcr0_avx512_zmm_hi256  , XCR0.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 regs)
+       0xd,         0,  eax,       7,    xcr0_avx512_hi16_zmm   , XCR0.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 regs)
+       0xd,         0,  eax,       9,    xcr0_pkru              , XCR0.PKRU (bit 9) supported (XSAVE PKRU reg)
+       0xd,         0,  eax,      11,    xcr0_cet_u             , AMD XCR0.CET_U (bit 11) supported (CET supervisor state)
+       0xd,         0,  eax,      12,    xcr0_cet_s             , AMD XCR0.CET_S (bit 12) support (CET user state)
+       0xd,         0,  eax,      17,    xcr0_tileconfig        , XCR0.TILECONFIG (bit 17) supported (AMX can manage TILECONFIG)
+       0xd,         0,  eax,      18,    xcr0_tiledata          , XCR0.TILEDATA (bit 18) supported (AMX can manage TILEDATA)
+       0xd,         0,  ebx,    31:0,    xsave_sz_xcr0_enabled  , XSAVE/XRSTR area byte size, for XCR0 enabled features
+       0xd,         0,  ecx,    31:0,    xsave_sz_max           , XSAVE/XRSTR area max byte size, all CPU features
+       0xd,         0,  edx,      30,    xcr0_lwp               , AMD XCR0.LWP (bit 62) supported (Light-weight Profiling)
+       0xd,         1,  eax,       0,    xsaveopt               , XSAVEOPT instruction
+       0xd,         1,  eax,       1,    xsavec                 , XSAVEC instruction
+       0xd,         1,  eax,       2,    xgetbv1                , XGETBV instruction with ECX = 1
+       0xd,         1,  eax,       3,    xsaves                 , XSAVES/XRSTORS instructions (and XSS MSR)
+       0xd,         1,  eax,       4,    xfd                    , Extended feature disable support
+       0xd,         1,  ebx,    31:0,    xsave_sz_xcr0_xmms_enabled, XSAVE area size, all XCR0 and XMMS features enabled
+       0xd,         1,  ecx,       8,    xss_pt                 , PT state, supported
+       0xd,         1,  ecx,      10,    xss_pasid              , PASID state, supported
+       0xd,         1,  ecx,      11,    xss_cet_u              , CET user state, supported
+       0xd,         1,  ecx,      12,    xss_cet_p              , CET supervisor state, supported
+       0xd,         1,  ecx,      13,    xss_hdc                , HDC state, supported
+       0xd,         1,  ecx,      14,    xss_uintr              , UINTR state, supported
+       0xd,         1,  ecx,      15,    xss_lbr                , LBR state, supported
+       0xd,         1,  ecx,      16,    xss_hwp                , HWP state, supported
+       0xd,      63:2,  eax,    31:0,    xsave_sz               , Size of save area for subleaf-N feature, in bytes
+       0xd,      63:2,  ebx,    31:0,    xsave_offset           , Offset of save area for subleaf-N feature, in bytes
+       0xd,      63:2,  ecx,       0,    is_xss_bit             , Subleaf N describes an XSS bit, otherwise XCR0 bit
+       0xd,      63:2,  ecx,       1,    compacted_xsave_64byte_aligned, When compacted, subleaf-N feature xsave area is 64-byte aligned
+
+# Leaf FH
+# Intel RDT / AMD PQoS resource monitoring
+
+       0xf,         0,  ebx,    31:0,    core_rmid_max          , RMID max, within this core, all types (0-based)
+       0xf,         0,  edx,       1,    cqm_llc                , LLC QoS-monitoring supported
+       0xf,         1,  eax,     7:0,    l3c_qm_bitwidth        , L3 QoS-monitoring counter bitwidth (24-based)
+       0xf,         1,  eax,       8,    l3c_qm_overflow_bit    , QM_CTR MSR bit 61 is an overflow bit
+       0xf,         1,  ebx,    31:0,    l3c_qm_conver_factor   , QM_CTR MSR conversion factor to bytes
+       0xf,         1,  ecx,    31:0,    l3c_qm_rmid_max        , L3 QoS-monitoring max RMID
+       0xf,         1,  edx,       0,    cqm_occup_llc          , L3 QoS occupancy monitoring supported
+       0xf,         1,  edx,       1,    cqm_mbm_total          , L3 QoS total bandwidth monitoring supported
+       0xf,         1,  edx,       2,    cqm_mbm_local          , L3 QoS local bandwidth monitoring supported
 
 # Leaf 10H
-# Intel RDT Allocation
-
-      0x10,    0,  EBX,      1, l3c_rdt_alloc, L3 Cache Allocation supported
-      0x10,    0,  EBX,      2, l2c_rdt_alloc, L2 Cache Allocation supported
-      0x10,    0,  EBX,      3, mem_bw_alloc, Memory Bandwidth Allocation supported
-
+# Intel RDT / AMD PQoS allocation enumeration
+
+      0x10,         0,  ebx,       1,    cat_l3                 , L3 Cache Allocation Technology supported
+      0x10,         0,  ebx,       2,    cat_l2                 , L2 Cache Allocation Technology supported
+      0x10,         0,  ebx,       3,    mba                    , Memory Bandwidth Allocation supported
+      0x10,       2:1,  eax,     4:0,    cat_cbm_len            , L3/L2_CAT capacity bitmask length, minus-one notation
+      0x10,       2:1,  ebx,    31:0,    cat_units_bitmap       , L3/L2_CAT bitmap of allocation units
+      0x10,       2:1,  ecx,       1,    l3_cat_cos_infreq_updates, L3_CAT COS updates should be infrequent
+      0x10,       2:1,  ecx,       2,    cdp_l3                 , L3/L2_CAT CDP (Code and Data Prioritization)
+      0x10,       2:1,  ecx,       3,    cat_sparse_1s          , L3/L2_CAT non-contiguous 1s value supported
+      0x10,       2:1,  edx,    15:0,    cat_cos_max            , L3/L2_CAT max COS (Class of Service) supported
+      0x10,         3,  eax,    11:0,    mba_max_delay          , Max MBA throttling value; minus-one notation
+      0x10,         3,  ecx,       0,    per_thread_mba         , Per-thread MBA controls are supported
+      0x10,         3,  ecx,       2,    mba_delay_linear       , Delay values are linear
+      0x10,         3,  edx,    15:0,    mba_cos_max            , MBA max Class of Service supported
 
 # Leaf 12H
-# SGX Capability
-#
-# Some detailed SGX features not added yet
-
-      0x12,    0,  EAX,      0, sgx1, L3 Cache Allocation supported
-      0x12,    1,  EAX,      0, sgx2, L3 Cache Allocation supported
-
+# Intel Software Guard Extensions (SGX) enumeration
+
+      0x12,         0,  eax,       0,    sgx1                   , SGX1 leaf functions supported
+      0x12,         0,  eax,       1,    sgx2                   , SGX2 leaf functions supported
+      0x12,         0,  eax,       5,    enclv_leaves           , ENCLV leaves (E{INC,DEC}VIRTCHILD, ESETCONTEXT) supported
+      0x12,         0,  eax,       6,    encls_leaves           , ENCLS leaves (ENCLS ETRACKC, ERDINFO, ELDBC, ELDUC) supported
+      0x12,         0,  eax,       7,    enclu_everifyreport2   , ENCLU leaf EVERIFYREPORT2 supported
+      0x12,         0,  eax,      10,    encls_eupdatesvn       , ENCLS leaf EUPDATESVN supported
+      0x12,         0,  eax,      11,    sgx_edeccssa           , ENCLU leaf EDECCSSA supported
+      0x12,         0,  ebx,       0,    miscselect_exinfo      , SSA.MISC frame: reporting #PF and #GP exceptions inside enclave supported
+      0x12,         0,  ebx,       1,    miscselect_cpinfo      , SSA.MISC frame: reporting #CP exceptions inside enclave supported
+      0x12,         0,  edx,     7:0,    max_enclave_sz_not64   , Maximum enclave size in non-64-bit mode (log2)
+      0x12,         0,  edx,    15:8,    max_enclave_sz_64      , Maximum enclave size in 64-bit mode (log2)
+      0x12,         1,  eax,       0,    secs_attr_init         , ATTRIBUTES.INIT supported (enclave initialized by EINIT)
+      0x12,         1,  eax,       1,    secs_attr_debug        , ATTRIBUTES.DEBUG supported (enclave permits debugger read/write)
+      0x12,         1,  eax,       2,    secs_attr_mode64bit    , ATTRIBUTES.MODE64BIT supported (enclave runs in 64-bit mode)
+      0x12,         1,  eax,       4,    secs_attr_provisionkey , ATTRIBUTES.PROVISIONKEY supported (provisioning key available)
+      0x12,         1,  eax,       5,    secs_attr_einittoken_key, ATTRIBUTES.EINITTOKEN_KEY supported (EINIT token key available)
+      0x12,         1,  eax,       6,    secs_attr_cet          , ATTRIBUTES.CET supported (enable CET attributes)
+      0x12,         1,  eax,       7,    secs_attr_kss          , ATTRIBUTES.KSS supported (Key Separation and Sharing enabled)
+      0x12,         1,  eax,      10,    secs_attr_aexnotify    , ATTRIBUTES.AEXNOTIFY supported (enclave threads may get AEX notifications
+      0x12,         1,  ecx,       0,    xfrm_x87               , Enclave XFRM.X87 (bit 0) supported
+      0x12,         1,  ecx,       1,    xfrm_sse               , Enclave XFRM.SEE (bit 1) supported
+      0x12,         1,  ecx,       2,    xfrm_avx               , Enclave XFRM.AVX (bit 2) supported
+      0x12,         1,  ecx,       3,    xfrm_mpx_bndregs       , Enclave XFRM.BNDREGS (bit 3) supported (MPX BND0-BND3 regs)
+      0x12,         1,  ecx,       4,    xfrm_mpx_bndcsr        , Enclave XFRM.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS regs)
+      0x12,         1,  ecx,       5,    xfrm_avx512_opmask     , Enclave XFRM.OPMASK (bit 5) supported (AVX-512 k0-k7 regs)
+      0x12,         1,  ecx,       6,    xfrm_avx512_zmm_hi256  , Enclave XFRM.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 regs)
+      0x12,         1,  ecx,       7,    xfrm_avx512_hi16_zmm   , Enclave XFRM.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 regs)
+      0x12,         1,  ecx,       9,    xfrm_pkru              , Enclave XFRM.PKRU (bit 9) supported (XSAVE PKRU reg)
+      0x12,         1,  ecx,      17,    xfrm_tileconfig        , Enclave XFRM.TILECONFIG (bit 17) supported (AMX can manage TILECONFIG)
+      0x12,         1,  ecx,      18,    xfrm_tiledata          , Enclave XFRM.TILEDATA (bit 18) supported (AMX can manage TILEDATA)
+      0x12,      31:2,  eax,     3:0,    subleaf_type           , Subleaf type (dictates output layout)
+      0x12,      31:2,  eax,   31:12,    epc_sec_base_addr_0    , EPC section base addr, bits[12:31]
+      0x12,      31:2,  ebx,    19:0,    epc_sec_base_addr_1    , EPC section base addr, bits[32:51]
+      0x12,      31:2,  ecx,     3:0,    epc_sec_type           , EPC section type / property encoding
+      0x12,      31:2,  ecx,   31:12,    epc_sec_size_0         , EPC section size, bits[12:31]
+      0x12,      31:2,  edx,    19:0,    epc_sec_size_1         , EPC section size, bits[32:51]
 
 # Leaf 14H
-# Intel Processor Tracer
-#
+# Intel Processor Trace enumeration
+
+      0x14,         0,  eax,    31:0,    pt_max_subleaf         , Max cpuid 0x14 subleaf
+      0x14,         0,  ebx,       0,    cr3_filtering          , IA32_RTIT_CR3_MATCH is accessible
+      0x14,         0,  ebx,       1,    psb_cyc                , Configurable PSB and cycle-accurate mode
+      0x14,         0,  ebx,       2,    ip_filtering           , IP/TraceStop filtering; Warm-reset PT MSRs preservation
+      0x14,         0,  ebx,       3,    mtc_timing             , MTC timing packet; COFI-based packets suppression
+      0x14,         0,  ebx,       4,    ptwrite                , PTWRITE support
+      0x14,         0,  ebx,       5,    power_event_trace      , Power Event Trace support
+      0x14,         0,  ebx,       6,    psb_pmi_preserve       , PSB and PMI preservation support
+      0x14,         0,  ebx,       7,    event_trace            , Event Trace packet generation through IA32_RTIT_CTL.EventEn
+      0x14,         0,  ebx,       8,    tnt_disable            , TNT packet generation disable through IA32_RTIT_CTL.DisTNT
+      0x14,         0,  ecx,       0,    topa_output            , ToPA output scheme support
+      0x14,         0,  ecx,       1,    topa_multiple_entries  , ToPA tables can hold multiple entries
+      0x14,         0,  ecx,       2,    single_range_output    , Single-range output scheme supported
+      0x14,         0,  ecx,       3,    trance_transport_output, Trace Transport subsystem output support
+      0x14,         0,  ecx,      31,    ip_payloads_lip        , IP payloads have LIP values (CS base included)
+      0x14,         1,  eax,     2:0,    num_address_ranges     , Filtering number of configurable Address Ranges
+      0x14,         1,  eax,   31:16,    mtc_periods_bmp        , Bitmap of supported MTC period encodings
+      0x14,         1,  ebx,    15:0,    cycle_thresholds_bmp   , Bitmap of supported Cycle Threshold encodings
+      0x14,         1,  ebx,   31:16,    psb_periods_bmp        , Bitmap of supported Configurable PSB frequency encodings
 
 # Leaf 15H
-# Time Stamp Counter and Nominal Core Crystal Clock Information
+# Intel TSC (Time Stamp Counter) enumeration
 
-      0x15,    0,  EAX,   31:0, tsc_denominator, The denominator of the TSC/”core crystal clock” ratio
-      0x15,    0,  EBX,   31:0, tsc_numerator, The numerator of the TSC/”core crystal clock” ratio
-      0x15,    0,  ECX,   31:0, nom_freq, Nominal frequency of the core crystal clock in Hz
+      0x15,         0,  eax,    31:0,    tsc_denominator        , Denominator of the TSC/'core crystal clock' ratio
+      0x15,         0,  ebx,    31:0,    tsc_numerator          , Numerator of the TSC/'core crystal clock' ratio
+      0x15,         0,  ecx,    31:0,    cpu_crystal_hz         , Core crystal clock nominal frequency, in Hz
 
 # Leaf 16H
-# Processor Frequency Information
+# Intel processor fequency enumeration
 
-      0x16,    0,  EAX,   15:0, cpu_base_freq, Processor Base Frequency in MHz
-      0x16,    0,  EBX,   15:0, cpu_max_freq, Maximum Frequency in MHz
-      0x16,    0,  ECX,   15:0, bus_freq, Bus (Reference) Frequency in MHz
+      0x16,         0,  eax,    15:0,    cpu_base_mhz           , Processor base frequency, in MHz
+      0x16,         0,  ebx,    15:0,    cpu_max_mhz            , Processor max frequency, in MHz
+      0x16,         0,  ecx,    15:0,    bus_mhz                , Bus reference frequency, in MHz
 
 # Leaf 17H
-# System-On-Chip Vendor Attribute
-
-      0x17,    0,  EAX,   31:0, max_socid, Maximum input value of supported sub-leaf
-      0x17,    0,  EBX,   15:0, soc_vid, SOC Vendor ID
-      0x17,    0,  EBX,     16, std_vid, SOC Vendor ID is assigned via an industry standard scheme
-      0x17,    0,  ECX,   31:0, soc_pid, SOC Project ID assigned by vendor
-      0x17,    0,  EDX,   31:0, soc_sid, SOC Stepping ID
+# Intel SoC vendor attributes enumeration
+
+      0x17,         0,  eax,    31:0,    soc_max_subleaf        , Max cpuid leaf 0x17 subleaf
+      0x17,         0,  ebx,    15:0,    soc_vendor_id          , SoC vendor ID
+      0x17,         0,  ebx,      16,    is_vendor_scheme       , Assigned by industry enumaeratoion scheme (not Intel)
+      0x17,         0,  ecx,    31:0,    soc_proj_id            , SoC project ID, assigned by vendor
+      0x17,         0,  edx,    31:0,    soc_stepping_id        , Soc project stepping ID, assigned by vendor
+      0x17,       3:1,  eax,    31:0,    vendor_brand_a         , Vendor Brand ID string, bytes subleaf_nr * (0 -> 3)
+      0x17,       3:1,  ebx,    31:0,    vendor_brand_b         , Vendor Brand ID string, bytes subleaf_nr * (4 -> 7)
+      0x17,       3:1,  ecx,    31:0,    vendor_brand_c         , Vendor Brand ID string, bytes subleaf_nr * (8 -> 11)
+      0x17,       3:1,  edx,    31:0,    vendor_brand_d         , Vendor Brand ID string, bytes subleaf_nr * (12 -> 15)
 
 # Leaf 18H
-# Deterministic Address Translation Parameters
-
+# Intel determenestic address translation (TLB) parameters
+
+      0x18,      31:0,  eax,    31:0,    tlb_max_subleaf        , Max cpuid 0x18 subleaf
+      0x18,      31:0,  ebx,       0,    tlb_4k_page            , TLB 4KB-page entries supported
+      0x18,      31:0,  ebx,       1,    tlb_2m_page            , TLB 2MB-page entries supported
+      0x18,      31:0,  ebx,       2,    tlb_4m_page            , TLB 4MB-page entries supported
+      0x18,      31:0,  ebx,       3,    tlb_1g_page            , TLB 1GB-page entries supported
+      0x18,      31:0,  ebx,    10:8,    hard_partitioning      , (Hard/Soft) partitioning between logical CPUs sharing this struct
+      0x18,      31:0,  ebx,   31:16,    n_way_associative      , Ways of associativity
+      0x18,      31:0,  ecx,    31:0,    n_sets                 , Number of sets
+      0x18,      31:0,  edx,     4:0,    tlb_type               , Translation cache type (TLB type)
+      0x18,      31:0,  edx,     7:5,    tlb_cache_level        , Translation cache level (1-based)
+      0x18,      31:0,  edx,       8,    is_fully_associative   , Fully-associative structure
+      0x18,      31:0,  edx,   25:14,    tlb_max_addressible_ids, Max num of addressible IDs for logical CPUs sharing this TLB - 1
 
 # Leaf 19H
-# Key Locker Leaf
+# Intel Key Locker enumeration
 
+      0x19,         0,  eax,       0,    kl_cpl0_only           , CPL0-only key Locker restriction supported
+      0x19,         0,  eax,       1,    kl_no_encrypt          , No-encrypt key locker restriction supported
+      0x19,         0,  eax,       2,    kl_no_decrypt          , No-decrypt key locker restriction supported
+      0x19,         0,  ebx,       0,    aes_keylocker          , AES key locker instructions supported
+      0x19,         0,  ebx,       2,    aes_keylocker_wide     , AES wide key locker instructions supported
+      0x19,         0,  ebx,       4,    kl_msr_iwkey           , Key locker MSRs and IWKEY backups supported
+      0x19,         0,  ecx,       0,    loadiwkey_no_backup    , LOADIWKEY NoBackup parameter supported
+      0x19,         0,  ecx,       1,    iwkey_rand             , IWKEY randomization (KeySource encoding 1) supported
 
 # Leaf 1AH
-# Hybrid Information
-
-      0x1A,    0,  EAX,  31:24, core_type, 20H-Intel_Atom 40H-Intel_Core
-
+# Intel hybrid CPUs identification (e.g. Atom, Core)
+
+      0x1a,         0,  eax,    23:0,    core_native_model      , This core's native model ID
+      0x1a,         0,  eax,   31:24,    core_type              , This core's type
+
+# Leaf 1BH
+# Intel PCONFIG (Platform configuration) enumeration
+
+      0x1b,      31:0,  eax,    11:0,    pconfig_subleaf_type   , CPUID 0x1b subleaf type
+      0x1b,      31:0,  ebx,    31:0,    pconfig_target_id_x    , A supported PCONFIG target ID
+      0x1b,      31:0,  ecx,    31:0,    pconfig_target_id_y    , A supported PCONFIG target ID
+      0x1b,      31:0,  edx,    31:0,    pconfig_target_id_z    , A supported PCONFIG target ID
+
+# Leaf 1CH
+# Intel LBR (Last Branch Record) enumeration
+
+      0x1c,         0,  eax,       0,    lbr_depth_8            , Max stack depth (number of LBR entries) = 8
+      0x1c,         0,  eax,       1,    lbr_depth_16           , Max stack depth (number of LBR entries) = 16
+      0x1c,         0,  eax,       2,    lbr_depth_24           , Max stack depth (number of LBR entries) = 24
+      0x1c,         0,  eax,       3,    lbr_depth_32           , Max stack depth (number of LBR entries) = 32
+      0x1c,         0,  eax,       4,    lbr_depth_40           , Max stack depth (number of LBR entries) = 40
+      0x1c,         0,  eax,       5,    lbr_depth_48           , Max stack depth (number of LBR entries) = 48
+      0x1c,         0,  eax,       6,    lbr_depth_56           , Max stack depth (number of LBR entries) = 56
+      0x1c,         0,  eax,       7,    lbr_depth_64           , Max stack depth (number of LBR entries) = 64
+      0x1c,         0,  eax,      30,    lbr_deep_c_reset       , LBRs maybe cleared on MWAIT C-state > C1
+      0x1c,         0,  eax,      31,    lbr_ip_is_lip          , LBR IP contain Last IP, otherwise effective IP
+      0x1c,         0,  ebx,       0,    lbr_cpl                , CPL filtering (non-zero IA32_LBR_CTL[2:1]) supported
+      0x1c,         0,  ebx,       1,    lbr_branch_filter      , Branch filtering (non-zero IA32_LBR_CTL[22:16]) supported
+      0x1c,         0,  ebx,       2,    lbr_call_stack         , Call-stack mode (IA32_LBR_CTL[3] = 1) supported
+      0x1c,         0,  ecx,       0,    lbr_mispredict         , Branch misprediction bit supported (IA32_LBR_x_INFO[63])
+      0x1c,         0,  ecx,       1,    lbr_timed_lbr          , Timed LBRs (CPU cycles since last LBR entry) supported
+      0x1c,         0,  ecx,       2,    lbr_branch_type        , Branch type field (IA32_LBR_INFO_x[59:56]) supported
+      0x1c,         0,  ecx,   19:16,    lbr_events_gpc_bmp     , LBR PMU-events logging support; bitmap for first 4 GP (general-purpose) Counters
+
+# Leaf 1DH
+# Intel AMX (Advanced Matrix Extensions) tile information
+
+      0x1d,         0,  eax,    31:0,    amx_max_palette        , Highest palette ID / subleaf ID
+      0x1d,         1,  eax,    15:0,    amx_palette_size       , AMX palette total tiles size, in bytes
+      0x1d,         1,  eax,   31:16,    amx_tile_size          , AMX single tile's size, in bytes
+      0x1d,         1,  ebx,    15:0,    amx_tile_row_size      , AMX tile single row's size, in bytes
+      0x1d,         1,  ebx,   31:16,    amx_palette_nr_tiles   , AMX palette number of tiles
+      0x1d,         1,  ecx,    15:0,    amx_tile_nr_rows       , AMX tile max number of rows
+
+# Leaf 1EH
+# Intel AMX, TMUL (Tile-matrix MULtiply) accelerator unit enumeration
+
+      0x1e,         0,  ebx,     7:0,    tmul_maxk              , TMUL unit maximum height, K (rows or columns)
+      0x1e,         0,  ebx,    23:8,    tmul_maxn              , TMUL unit maxiumum SIMD dimension, N (column bytes)
 
 # Leaf 1FH
-# V2 Extended Topology - A preferred superset to leaf 0BH
-
-
-# According to SDM
-# 40000000H - 4FFFFFFFH is invalid range
+# Intel extended topology enumeration v2
+
+      0x1f,       5:0,  eax,     4:0,    x2apic_id_shift        , Bit width of this level (previous levels inclusive)
+      0x1f,       5:0,  ebx,    15:0,    domain_lcpus_count     , Logical CPUs count across all instances of this domain
+      0x1f,       5:0,  ecx,     7:0,    domain_level           , This domain level (subleaf ID)
+      0x1f,       5:0,  ecx,    15:8,    domain_type            , This domain type
+      0x1f,       5:0,  edx,    31:0,    x2apic_id              , x2APIC ID of current logical CPU
+
+# Leaf 20H
+# Intel HRESET (History Reset) enumeration
+
+      0x20,         0,  eax,    31:0,    hreset_nr_subleaves    , CPUID 0x20 max subleaf + 1
+      0x20,         0,  ebx,       0,    hreset_thread_director , HRESET of Intel thread director is supported
+
+# Leaf 21H
+# Intel TD (Trust Domain) guest execution environment enumeration
+
+      0x21,         0,  ebx,    31:0,    tdx_vendorid_0         , TDX vendor ID string bytes 0 - 3
+      0x21,         0,  ecx,    31:0,    tdx_vendorid_2         , CPU vendor ID string bytes 8 - 11
+      0x21,         0,  edx,    31:0,    tdx_vendorid_1         , CPU vendor ID string bytes 4 - 7
+
+# Leaf 23H
+# Intel Architectural Performance Monitoring Extended (ArchPerfmonExt)
+
+      0x23,         0,  eax,       1,    subleaf_1_counters     , Subleaf 1, PMU counters bitmaps, is valid
+      0x23,         0,  eax,       3,    subleaf_3_events       , Subleaf 3, PMU events bitmaps, is valid
+      0x23,         0,  ebx,       0,    unitmask2              , IA32_PERFEVTSELx MSRs UnitMask2 is supported
+      0x23,         0,  ebx,       1,    zbit                   , IA32_PERFEVTSELx MSRs Z-bit is supported
+      0x23,         1,  eax,    31:0,    pmu_gp_counters_bitmap , General-purpose PMU counters bitmap
+      0x23,         1,  ebx,    31:0,    pmu_f_counters_bitmap  , Fixed PMU counters bitmap
+      0x23,         3,  eax,       0,    core_cycles_evt        , Core cycles event supported
+      0x23,         3,  eax,       1,    insn_retired_evt       , Instructions retired event supported
+      0x23,         3,  eax,       2,    ref_cycles_evt         , Reference cycles event supported
+      0x23,         3,  eax,       3,    llc_refs_evt           , Last-level cache references event supported
+      0x23,         3,  eax,       4,    llc_misses_evt         , Last-level cache misses event supported
+      0x23,         3,  eax,       5,    br_insn_ret_evt        , Branch instruction retired event supported
+      0x23,         3,  eax,       6,    br_mispr_evt           , Branch mispredict retired event supported
+      0x23,         3,  eax,       7,    td_slots_evt           , Topdown slots event supported
+      0x23,         3,  eax,       8,    td_backend_bound_evt   , Topdown backend bound event supported
+      0x23,         3,  eax,       9,    td_bad_spec_evt        , Topdown bad speculation event supported
+      0x23,         3,  eax,      10,    td_frontend_bound_evt  , Topdown frontend bound event supported
+      0x23,         3,  eax,      11,    td_retiring_evt        , Topdown retiring event support
+
+# Leaf 40000000H
+# Maximum hypervisor standard leaf + hypervisor vendor string
+
+0x40000000,         0,  eax,    31:0,    max_hyp_leaf           , Maximum hypervisor standard leaf number
+0x40000000,         0,  ebx,    31:0,    hypervisor_id_0        , Hypervisor ID string bytes 0 - 3
+0x40000000,         0,  ecx,    31:0,    hypervisor_id_1        , Hypervisor ID string bytes 4 - 7
+0x40000000,         0,  edx,    31:0,    hypervisor_id_2        , Hypervisor ID string bytes 8 - 11
+
+# Leaf 80000000H
+# Maximum extended leaf number + CPU vendor string (AMD)
+
+0x80000000,         0,  eax,    31:0,    max_ext_leaf           , Maximum extended cpuid leaf supported
+0x80000000,         0,  ebx,    31:0,    cpu_vendorid_0         , Vendor ID string bytes 0 - 3
+0x80000000,         0,  ecx,    31:0,    cpu_vendorid_2         , Vendor ID string bytes 8 - 11
+0x80000000,         0,  edx,    31:0,    cpu_vendorid_1         , Vendor ID string bytes 4 - 7
 
 # Leaf 80000001H
-# Extended Processor Signature and Feature Bits
-
-0x80000001,    0,  EAX,  27:20, extfamily, Extended family
-0x80000001,    0,  EAX,  19:16, extmodel, Extended model
-0x80000001,    0,  EAX,   11:8, basefamily, Description of Family
-0x80000001,    0,  EAX,   11:8, basemodel, Model numbers vary with product
-0x80000001,    0,  EAX,    3:0, stepping, Processor stepping (revision) for a specific model
-
-0x80000001,    0,  EBX,  31:28, pkgtype, Specifies the package type
-
-0x80000001,    0,  ECX,      0, lahf_lm, LAHF/SAHF available in 64-bit mode
-0x80000001,    0,  ECX,      1, cmplegacy, Core multi-processing legacy mode
-0x80000001,    0,  ECX,      2, svm, Indicates support for: VMRUN, VMLOAD, VMSAVE, CLGI, VMMCALL, and INVLPGA
-0x80000001,    0,  ECX,      3, extapicspace, Extended APIC register space
-0x80000001,    0,  ECX,      4, altmovecr8, Indicates support for LOCK MOV CR0 means MOV CR8
-0x80000001,    0,  ECX,      5, lzcnt, LZCNT
-0x80000001,    0,  ECX,      6, sse4a, EXTRQ, INSERTQ, MOVNTSS, and MOVNTSD instruction support
-0x80000001,    0,  ECX,      7, misalignsse, Misaligned SSE Mode
-0x80000001,    0,  ECX,      8, prefetchw, PREFETCHW
-0x80000001,    0,  ECX,      9, osvw, OS Visible Work-around support
-0x80000001,    0,  ECX,     10, ibs, Instruction Based Sampling
-0x80000001,    0,  ECX,     11, xop, Extended operation support
-0x80000001,    0,  ECX,     12, skinit, SKINIT and STGI support
-0x80000001,    0,  ECX,     13, wdt, Watchdog timer support
-0x80000001,    0,  ECX,     15, lwp, Lightweight profiling support
-0x80000001,    0,  ECX,     16, fma4, Four-operand FMA instruction support
-0x80000001,    0,  ECX,     17, tce, Translation cache extension
-0x80000001,    0,  ECX,     22, TopologyExtensions, Indicates support for Core::X86::Cpuid::CachePropEax0 and Core::X86::Cpuid::ExtApicId
-0x80000001,    0,  ECX,     23, perfctrextcore, Indicates support for Core::X86::Msr::PERF_CTL0 - 5 and Core::X86::Msr::PERF_CTR
-0x80000001,    0,  ECX,     24, perfctrextdf, Indicates support for Core::X86::Msr::DF_PERF_CTL and Core::X86::Msr::DF_PERF_CTR
-0x80000001,    0,  ECX,     26, databreakpointextension, Indicates data breakpoint support for Core::X86::Msr::DR0_ADDR_MASK, Core::X86::Msr::DR1_ADDR_MASK, Core::X86::Msr::DR2_ADDR_MASK and Core::X86::Msr::DR3_ADDR_MASK
-0x80000001,    0,  ECX,     27, perftsc, Performance time-stamp counter supported
-0x80000001,    0,  ECX,     28, perfctrextllc, Indicates support for L3 performance counter extensions
-0x80000001,    0,  ECX,     29, mwaitextended, MWAITX and MONITORX capability is supported
-0x80000001,    0,  ECX,     30, admskextn, Indicates support for address mask extension (to 32 bits and to all 4 DRs) for instruction breakpoints
-
-0x80000001,    0,  EDX,      0, fpu, x87 floating point unit on-chip
-0x80000001,    0,  EDX,      1, vme, Virtual-mode enhancements
-0x80000001,    0,  EDX,      2, de, Debugging extensions, IO breakpoints, CR4.DE
-0x80000001,    0,  EDX,      3, pse, Page-size extensions (4 MB pages)
-0x80000001,    0,  EDX,      4, tsc, Time stamp counter, RDTSC/RDTSCP instructions, CR4.TSD
-0x80000001,    0,  EDX,      5, msr, Model-specific registers (MSRs), with RDMSR and WRMSR instructions
-0x80000001,    0,  EDX,      6, pae, Physical-address extensions (PAE)
-0x80000001,    0,  EDX,      7, mce, Machine Check Exception, CR4.MCE
-0x80000001,    0,  EDX,      8, cmpxchg8b, CMPXCHG8B instruction
-0x80000001,    0,  EDX,      9, apic, advanced programmable interrupt controller (APIC) exists and is enabled
-0x80000001,    0,  EDX,     11, sysret, SYSCALL/SYSRET supported
-0x80000001,    0,  EDX,     12, mtrr, Memory-type range registers
-0x80000001,    0,  EDX,     13, pge, Page global extension, CR4.PGE
-0x80000001,    0,  EDX,     14, mca, Machine check architecture, MCG_CAP
-0x80000001,    0,  EDX,     15, cmov, Conditional move instructions, CMOV, FCOMI, FCMOV
-0x80000001,    0,  EDX,     16, pat, Page attribute table
-0x80000001,    0,  EDX,     17, pse36, Page-size extensions
-0x80000001,    0,  EDX,     20, exec_dis, Execute Disable Bit available
-0x80000001,    0,  EDX,     22, mmxext, AMD extensions to MMX instructions
-0x80000001,    0,  EDX,     23, mmx, MMX instructions
-0x80000001,    0,  EDX,     24, fxsr, FXSAVE and FXRSTOR instructions
-0x80000001,    0,  EDX,     25, ffxsr, FXSAVE and FXRSTOR instruction optimizations
-0x80000001,    0,  EDX,     26, 1gb_page, 1GB page supported
-0x80000001,    0,  EDX,     27, rdtscp, RDTSCP and IA32_TSC_AUX are available
-0x80000001,    0,  EDX,     29, lm, 64b Architecture supported
-0x80000001,    0,  EDX,     30, threednowext, AMD extensions to 3DNow! instructions
-0x80000001,    0,  EDX,     31, threednow, 3DNow! instructions
-
-# Leaf 80000002H/80000003H/80000004H
-# Processor Brand String
+# Extended CPU feature identifiers
+
+0x80000001,         0,  eax,     3:0,    e_stepping_id          , Stepping ID
+0x80000001,         0,  eax,     7:4,    e_base_model           , Base processor model
+0x80000001,         0,  eax,    11:8,    e_base_family          , Base processor family
+0x80000001,         0,  eax,   19:16,    e_ext_model            , Extended processor model
+0x80000001,         0,  eax,   27:20,    e_ext_family           , Extended processor family
+0x80000001,         0,  ebx,    15:0,    brand_id               , Brand ID
+0x80000001,         0,  ebx,   31:28,    pkg_type               , Package type
+0x80000001,         0,  ecx,       0,    lahf_lm                , LAHF and SAHF in 64-bit mode
+0x80000001,         0,  ecx,       1,    cmp_legacy             , Multi-processing legacy mode (No HT)
+0x80000001,         0,  ecx,       2,    svm                    , Secure Virtual Machine
+0x80000001,         0,  ecx,       3,    extapic                , Extended APIC space
+0x80000001,         0,  ecx,       4,    cr8_legacy             , LOCK MOV CR0 means MOV CR8
+0x80000001,         0,  ecx,       5,    abm                    , LZCNT advanced bit manipulation
+0x80000001,         0,  ecx,       6,    sse4a                  , SSE4A support
+0x80000001,         0,  ecx,       7,    misalignsse            , Misaligned SSE mode
+0x80000001,         0,  ecx,       8,    3dnowprefetch          , 3DNow PREFETCH/PREFETCHW support
+0x80000001,         0,  ecx,       9,    osvw                   , OS visible workaround
+0x80000001,         0,  ecx,      10,    ibs                    , Instruction based sampling
+0x80000001,         0,  ecx,      11,    xop                    , XOP: extended operation (AVX instructions)
+0x80000001,         0,  ecx,      12,    skinit                 , SKINIT/STGI support
+0x80000001,         0,  ecx,      13,    wdt                    , Watchdog timer support
+0x80000001,         0,  ecx,      15,    lwp                    , Lightweight profiling
+0x80000001,         0,  ecx,      16,    fma4                   , 4-operand FMA instruction
+0x80000001,         0,  ecx,      17,    tce                    , Translation cache extension
+0x80000001,         0,  ecx,      19,    nodeid_msr             , NodeId MSR (0xc001100c)
+0x80000001,         0,  ecx,      21,    tbm                    , Trailing bit manipulations
+0x80000001,         0,  ecx,      22,    topoext                , Topology Extensions (cpuid leaf 0x8000001d)
+0x80000001,         0,  ecx,      23,    perfctr_core           , Core performance counter extensions
+0x80000001,         0,  ecx,      24,    perfctr_nb             , NB/DF performance counter extensions
+0x80000001,         0,  ecx,      26,    bpext                  , Data access breakpoint extension
+0x80000001,         0,  ecx,      27,    ptsc                   , Performance time-stamp counter
+0x80000001,         0,  ecx,      28,    perfctr_llc            , LLC (L3) performance counter extensions
+0x80000001,         0,  ecx,      29,    mwaitx                 , MWAITX/MONITORX support
+0x80000001,         0,  ecx,      30,    addr_mask_ext          , Breakpoint address mask extension (to bit 31)
+0x80000001,         0,  edx,       0,    e_fpu                  , Floating-Point Unit on-chip (x87)
+0x80000001,         0,  edx,       1,    e_vme                  , Virtual-8086 Mode Extensions
+0x80000001,         0,  edx,       2,    e_de                   , Debugging Extensions
+0x80000001,         0,  edx,       3,    e_pse                  , Page Size Extension
+0x80000001,         0,  edx,       4,    e_tsc                  , Time Stamp Counter
+0x80000001,         0,  edx,       5,    e_msr                  , Model-Specific Registers (RDMSR and WRMSR support)
+0x80000001,         0,  edx,       6,    pae                    , Physical Address Extensions
+0x80000001,         0,  edx,       7,    mce                    , Machine Check Exception
+0x80000001,         0,  edx,       8,    cx8                    , CMPXCHG8B instruction
+0x80000001,         0,  edx,       9,    apic                   , APIC on-chip
+0x80000001,         0,  edx,      11,    syscall                , SYSCALL and SYSRET instructions
+0x80000001,         0,  edx,      12,    mtrr                   , Memory Type Range Registers
+0x80000001,         0,  edx,      13,    pge                    , Page Global Extensions
+0x80000001,         0,  edx,      14,    mca                    , Machine Check Architecture
+0x80000001,         0,  edx,      15,    cmov                   , Conditional Move Instruction
+0x80000001,         0,  edx,      16,    pat                    , Page Attribute Table
+0x80000001,         0,  edx,      17,    pse36                  , Page Size Extension (36-bit)
+0x80000001,         0,  edx,      19,    mp                     , Out-of-spec AMD Multiprocessing bit
+0x80000001,         0,  edx,      20,    nx                     , No-execute page protection
+0x80000001,         0,  edx,      22,    mmxext                 , AMD MMX extensions
+0x80000001,         0,  edx,      24,    e_fxsr                 , FXSAVE and FXRSTOR instructions
+0x80000001,         0,  edx,      25,    fxsr_opt               , FXSAVE and FXRSTOR optimizations
+0x80000001,         0,  edx,      26,    pdpe1gb                , 1-GB large page support
+0x80000001,         0,  edx,      27,    rdtscp                 , RDTSCP instruction
+0x80000001,         0,  edx,      29,    lm                     , Long mode (x86-64, 64-bit support)
+0x80000001,         0,  edx,      30,    3dnowext               , AMD 3DNow extensions
+0x80000001,         0,  edx,      31,    3dnow                  , 3DNow instructions
+
+# Leaf 80000002H
+# CPU brand ID string, bytes 0 - 15
+
+0x80000002,         0,  eax,    31:0,    cpu_brandid_0          , CPU brand ID string, bytes 0 - 3
+0x80000002,         0,  ebx,    31:0,    cpu_brandid_1          , CPU brand ID string, bytes 4 - 7
+0x80000002,         0,  ecx,    31:0,    cpu_brandid_2          , CPU brand ID string, bytes 8 - 11
+0x80000002,         0,  edx,    31:0,    cpu_brandid_3          , CPU brand ID string, bytes 12 - 15
+
+# Leaf 80000003H
+# CPU brand ID string, bytes 16 - 31
+
+0x80000003,         0,  eax,    31:0,    cpu_brandid_4          , CPU brand ID string bytes, 16 - 19
+0x80000003,         0,  ebx,    31:0,    cpu_brandid_5          , CPU brand ID string bytes, 20 - 23
+0x80000003,         0,  ecx,    31:0,    cpu_brandid_6          , CPU brand ID string bytes, 24 - 27
+0x80000003,         0,  edx,    31:0,    cpu_brandid_7          , CPU brand ID string bytes, 28 - 31
+
+# Leaf 80000004H
+# CPU brand ID string, bytes 32 - 47
+
+0x80000004,         0,  eax,    31:0,    cpu_brandid_8          , CPU brand ID string, bytes 32 - 35
+0x80000004,         0,  ebx,    31:0,    cpu_brandid_9          , CPU brand ID string, bytes 36 - 39
+0x80000004,         0,  ecx,    31:0,    cpu_brandid_10         , CPU brand ID string, bytes 40 - 43
+0x80000004,         0,  edx,    31:0,    cpu_brandid_11         , CPU brand ID string, bytes 44 - 47
 
 # Leaf 80000005H
-# Reserved
+# AMD L1 cache and L1 TLB enumeration
+
+0x80000005,         0,  eax,     7:0,    l1_itlb_2m_4m_nentries , L1 ITLB #entires, 2M and 4M pages
+0x80000005,         0,  eax,    15:8,    l1_itlb_2m_4m_assoc    , L1 ITLB associativity, 2M and 4M pages
+0x80000005,         0,  eax,   23:16,    l1_dtlb_2m_4m_nentries , L1 DTLB #entires, 2M and 4M pages
+0x80000005,         0,  eax,   31:24,    l1_dtlb_2m_4m_assoc    , L1 DTLB associativity, 2M and 4M pages
+0x80000005,         0,  ebx,     7:0,    l1_itlb_4k_nentries    , L1 ITLB #entries, 4K pages
+0x80000005,         0,  ebx,    15:8,    l1_itlb_4k_assoc       , L1 ITLB associativity, 4K pages
+0x80000005,         0,  ebx,   23:16,    l1_dtlb_4k_nentries    , L1 DTLB #entries, 4K pages
+0x80000005,         0,  ebx,   31:24,    l1_dtlb_4k_assoc       , L1 DTLB associativity, 4K pages
+0x80000005,         0,  ecx,     7:0,    l1_dcache_line_size    , L1 dcache line size, in bytes
+0x80000005,         0,  ecx,    15:8,    l1_dcache_nlines       , L1 dcache lines per tag
+0x80000005,         0,  ecx,   23:16,    l1_dcache_assoc        , L1 dcache associativity
+0x80000005,         0,  ecx,   31:24,    l1_dcache_size_kb      , L1 dcache size, in KB
+0x80000005,         0,  edx,     7:0,    l1_icache_line_size    , L1 icache line size, in bytes
+0x80000005,         0,  edx,    15:8,    l1_icache_nlines       , L1 icache lines per tag
+0x80000005,         0,  edx,   23:16,    l1_icache_assoc        , L1 icache associativity
+0x80000005,         0,  edx,   31:24,    l1_icache_size_kb      , L1 icache size, in KB
 
 # Leaf 80000006H
-# Extended L2 Cache Features
-
-0x80000006,    0,  ECX,    7:0, clsize, Cache Line size in bytes
-0x80000006,    0,  ECX,  15:12, l2c_assoc, L2 Associativity
-0x80000006,    0,  ECX,  31:16, csize, Cache size in 1K units
-
+# (Mostly AMD) L2 TLB, L2 cache, and L3 cache enumeration
+
+0x80000006,         0,  eax,    11:0,    l2_itlb_2m_4m_nentries , L2 iTLB #entries, 2M and 4M pages
+0x80000006,         0,  eax,   15:12,    l2_itlb_2m_4m_assoc    , L2 iTLB associativity, 2M and 4M pages
+0x80000006,         0,  eax,   27:16,    l2_dtlb_2m_4m_nentries , L2 dTLB #entries, 2M and 4M pages
+0x80000006,         0,  eax,   31:28,    l2_dtlb_2m_4m_assoc    , L2 dTLB associativity, 2M and 4M pages
+0x80000006,         0,  ebx,    11:0,    l2_itlb_4k_nentries    , L2 iTLB #entries, 4K pages
+0x80000006,         0,  ebx,   15:12,    l2_itlb_4k_assoc       , L2 iTLB associativity, 4K pages
+0x80000006,         0,  ebx,   27:16,    l2_dtlb_4k_nentries    , L2 dTLB #entries, 4K pages
+0x80000006,         0,  ebx,   31:28,    l2_dtlb_4k_assoc       , L2 dTLB associativity, 4K pages
+0x80000006,         0,  ecx,     7:0,    l2_line_size           , L2 cache line size, in bytes
+0x80000006,         0,  ecx,    11:8,    l2_nlines              , L2 cache number of lines per tag
+0x80000006,         0,  ecx,   15:12,    l2_assoc               , L2 cache associativity
+0x80000006,         0,  ecx,   31:16,    l2_size_kb             , L2 cache size, in KB
+0x80000006,         0,  edx,     7:0,    l3_line_size           , L3 cache line size, in bytes
+0x80000006,         0,  edx,    11:8,    l3_nlines              , L3 cache number of lines per tag
+0x80000006,         0,  edx,   15:12,    l3_assoc               , L3 cache associativity
+0x80000006,         0,  edx,   31:18,    l3_size_range          , L3 cache size range
 
 # Leaf 80000007H
-
-0x80000007,    0,  EDX,      8, nonstop_tsc, Invariant TSC available
-
+# CPU power management (mostly AMD) and AMD RAS enumeration
+
+0x80000007,         0,  ebx,       0,    overflow_recov         , MCA overflow conditions not fatal
+0x80000007,         0,  ebx,       1,    succor                 , Software containment of UnCORRectable errors
+0x80000007,         0,  ebx,       2,    hw_assert              , Hardware assert MSRs
+0x80000007,         0,  ebx,       3,    smca                   , Scalable MCA (MCAX MSRs)
+0x80000007,         0,  ecx,    31:0,    cpu_pwr_sample_ratio   , CPU power sample time ratio
+0x80000007,         0,  edx,       0,    digital_temp           , Digital temprature sensor
+0x80000007,         0,  edx,       1,    powernow_freq_id       , PowerNOW! frequency scaling
+0x80000007,         0,  edx,       2,    powernow_volt_id       , PowerNOW! voltage scaling
+0x80000007,         0,  edx,       3,    thermal_trip           , THERMTRIP (Thermal Trip)
+0x80000007,         0,  edx,       4,    hw_thermal_control     , Hardware thermal control
+0x80000007,         0,  edx,       5,    sw_thermal_control     , Software thermal control
+0x80000007,         0,  edx,       6,    100mhz_steps           , 100 MHz multiplier control
+0x80000007,         0,  edx,       7,    hw_pstate              , Hardware P-state control
+0x80000007,         0,  edx,       8,    constant_tsc           , TSC ticks at constant rate across all P and C states
+0x80000007,         0,  edx,       9,    cpb                    , Core performance boost
+0x80000007,         0,  edx,      10,    eff_freq_ro            , Read-only effective frequency interface
+0x80000007,         0,  edx,      11,    proc_feedback          , Processor feedback interface (deprecated)
+0x80000007,         0,  edx,      12,    acc_power              , Processor power reporting interface
+0x80000007,         0,  edx,      13,    connected_standby      , CPU Connected Standby support
+0x80000007,         0,  edx,      14,    rapl                   , Runtime Average Power Limit interface
 
 # Leaf 80000008H
-
-0x80000008,    0,  EAX,    7:0, phy_adr_bits, Physical Address Bits
-0x80000008,    0,  EAX,   15:8, lnr_adr_bits, Linear Address Bits
-0x80000007,    0,  EBX,      9, wbnoinvd, WBNOINVD
-
-# 0x8000001E
-# EAX: Extended APIC ID
-0x8000001E,	0, EAX,   31:0, extended_apic_id, Extended APIC ID
-# EBX: Core Identifiers
-0x8000001E,	0, EBX,    7:0, core_id, Identifies the logical core ID
-0x8000001E,	0, EBX,   15:8, threads_per_core, The number of threads per core is threads_per_core + 1
-# ECX: Node Identifiers
-0x8000001E,	0, ECX,    7:0, node_id, Node ID
-0x8000001E,	0, ECX,   10:8, nodes_per_processor, Nodes per processor { 0: 1 node, else reserved }
-
-# 8000001F: AMD Secure Encryption
-0x8000001F,	0, EAX,	     0, sme,	Secure Memory Encryption
-0x8000001F,	0, EAX,      1, sev,	Secure Encrypted Virtualization
-0x8000001F,	0, EAX,      2, vmpgflush, VM Page Flush MSR
-0x8000001F,	0, EAX,      3, seves, SEV Encrypted State
-0x8000001F,	0, EBX,    5:0, c-bit, Page table bit number used to enable memory encryption
-0x8000001F,	0, EBX,   11:6, mem_encrypt_physaddr_width, Reduction of physical address space in bits with SME enabled
-0x8000001F,	0, ECX,   31:0, num_encrypted_guests, Maximum ASID value that may be used for an SEV-enabled guest
-0x8000001F,	0, EDX,   31:0, minimum_sev_asid, Minimum ASID value that must be used for an SEV-enabled, SEV-ES-disabled guest
+# CPU capacity parameters and extended feature flags (mostly AMD)
+
+0x80000008,         0,  eax,     7:0,    phys_addr_bits         , Max physical address bits
+0x80000008,         0,  eax,    15:8,    virt_addr_bits         , Max virtual address bits
+0x80000008,         0,  eax,   23:16,    guest_phys_addr_bits   , Max nested-paging guest physical address bits
+0x80000008,         0,  ebx,       0,    clzero                 , CLZERO supported
+0x80000008,         0,  ebx,       1,    irperf                 , Instruction retired counter MSR
+0x80000008,         0,  ebx,       2,    xsaveerptr             , XSAVE/XRSTOR always saves/restores FPU error pointers
+0x80000008,         0,  ebx,       3,    invlpgb                , INVLPGB broadcasts a TLB invalidate to all threads
+0x80000008,         0,  ebx,       4,    rdpru                  , RDPRU (Read Processor Register at User level) supported
+0x80000008,         0,  ebx,       6,    mba                    , Memory Bandwidth Allocation (AMD bit)
+0x80000008,         0,  ebx,       8,    mcommit                , MCOMMIT (Memory commit) supported
+0x80000008,         0,  ebx,       9,    wbnoinvd               , WBNOINVD supported
+0x80000008,         0,  ebx,      12,    amd_ibpb               , Indirect Branch Prediction Barrier
+0x80000008,         0,  ebx,      13,    wbinvd_int             , Interruptible WBINVD/WBNOINVD
+0x80000008,         0,  ebx,      14,    amd_ibrs               , Indirect Branch Restricted Speculation
+0x80000008,         0,  ebx,      15,    amd_stibp              , Single Thread Indirect Branch Prediction mode
+0x80000008,         0,  ebx,      16,    ibrs_always_on         , IBRS always-on preferred
+0x80000008,         0,  ebx,      17,    amd_stibp_always_on    , STIBP always-on preferred
+0x80000008,         0,  ebx,      18,    ibrs_fast              , IBRS is preferred over software solution
+0x80000008,         0,  ebx,      19,    ibrs_same_mode         , IBRS provides same mode protection
+0x80000008,         0,  ebx,      20,    no_efer_lmsle          , EFER[LMSLE] bit (Long-Mode Segment Limit Enable) unsupported
+0x80000008,         0,  ebx,      21,    tlb_flush_nested       , INVLPGB RAX[5] bit can be set (nested translations)
+0x80000008,         0,  ebx,      23,    amd_ppin               , Protected Processor Inventory Number
+0x80000008,         0,  ebx,      24,    amd_ssbd               , Speculative Store Bypass Disable
+0x80000008,         0,  ebx,      25,    virt_ssbd              , virtualized SSBD (Speculative Store Bypass Disable)
+0x80000008,         0,  ebx,      26,    amd_ssb_no             , SSBD not needed (fixed in HW)
+0x80000008,         0,  ebx,      27,    cppc                   , Collaborative Processor Performance Control
+0x80000008,         0,  ebx,      28,    amd_psfd               , Predictive Store Forward Disable
+0x80000008,         0,  ebx,      29,    btc_no                 , CPU not affected by Branch Type Confusion
+0x80000008,         0,  ebx,      30,    ibpb_ret               , IBPB clears RSB/RAS too
+0x80000008,         0,  ebx,      31,    brs                    , Branch Sampling supported
+0x80000008,         0,  ecx,     7:0,    cpu_nthreads           , Number of physical threads - 1
+0x80000008,         0,  ecx,   15:12,    apicid_coreid_len      , Number of thread core ID bits (shift) in APIC ID
+0x80000008,         0,  ecx,   17:16,    perf_tsc_len           , Performance time-stamp counter size
+0x80000008,         0,  edx,    15:0,    invlpgb_max_pages      , INVLPGB maximum page count
+0x80000008,         0,  edx,   31:16,    rdpru_max_reg_id       , RDPRU max register ID (ECX input)
+
+# Leaf 8000000AH
+# AMD SVM (Secure Virtual Machine) enumeration
+
+0x8000000a,         0,  eax,     7:0,    svm_version            , SVM revision number
+0x8000000a,         0,  ebx,    31:0,    svm_nasid              , Number of address space identifiers (ASID)
+0x8000000a,         0,  edx,       0,    npt                    , Nested paging
+0x8000000a,         0,  edx,       1,    lbrv                   , LBR virtualization
+0x8000000a,         0,  edx,       2,    svm_lock               , SVM lock
+0x8000000a,         0,  edx,       3,    nrip_save              , NRIP save support on #VMEXIT
+0x8000000a,         0,  edx,       4,    tsc_scale              , MSR based TSC rate control
+0x8000000a,         0,  edx,       5,    vmcb_clean             , VMCB clean bits support
+0x8000000a,         0,  edx,       6,    flushbyasid            , Flush by ASID + Extended VMCB TLB_Control
+0x8000000a,         0,  edx,       7,    decodeassists          , Decode Assists support
+0x8000000a,         0,  edx,      10,    pausefilter            , Pause intercept filter
+0x8000000a,         0,  edx,      12,    pfthreshold            , Pause filter threshold
+0x8000000a,         0,  edx,      13,    avic                   , Advanced virtual interrupt controller
+0x8000000a,         0,  edx,      15,    v_vmsave_vmload        , Virtual VMSAVE/VMLOAD (nested virt)
+0x8000000a,         0,  edx,      16,    vgif                   , Virtualize the Global Interrupt Flag
+0x8000000a,         0,  edx,      17,    gmet                   , Guest mode execution trap
+0x8000000a,         0,  edx,      18,    x2avic                 , Virtual x2APIC
+0x8000000a,         0,  edx,      19,    sss_check              , Supervisor Shadow Stack restrictions
+0x8000000a,         0,  edx,      20,    v_spec_ctrl            , Virtual SPEC_CTRL
+0x8000000a,         0,  edx,      21,    ro_gpt                 , Read-Only guest page table support
+0x8000000a,         0,  edx,      23,    h_mce_override         , Host MCE override
+0x8000000a,         0,  edx,      24,    tlbsync_int            , TLBSYNC intercept + INVLPGB/TLBSYNC in VMCB
+0x8000000a,         0,  edx,      25,    vnmi                   , NMI virtualization
+0x8000000a,         0,  edx,      26,    ibs_virt               , IBS Virtualization
+0x8000000a,         0,  edx,      27,    ext_lvt_off_chg        , Extended LVT offset fault change
+0x8000000a,         0,  edx,      28,    svme_addr_chk          , Guest SVME addr check
+
+# Leaf 80000019H
+# AMD TLB 1G-pages enumeration
+
+0x80000019,         0,  eax,    11:0,    l1_itlb_1g_nentries    , L1 iTLB #entries, 1G pages
+0x80000019,         0,  eax,   15:12,    l1_itlb_1g_assoc       , L1 iTLB associativity, 1G pages
+0x80000019,         0,  eax,   27:16,    l1_dtlb_1g_nentries    , L1 dTLB #entries, 1G pages
+0x80000019,         0,  eax,   31:28,    l1_dtlb_1g_assoc       , L1 dTLB associativity, 1G pages
+0x80000019,         0,  ebx,    11:0,    l2_itlb_1g_nentries    , L2 iTLB #entries, 1G pages
+0x80000019,         0,  ebx,   15:12,    l2_itlb_1g_assoc       , L2 iTLB associativity, 1G pages
+0x80000019,         0,  ebx,   27:16,    l2_dtlb_1g_nentries    , L2 dTLB #entries, 1G pages
+0x80000019,         0,  ebx,   31:28,    l2_dtlb_1g_assoc       , L2 dTLB associativity, 1G pages
+
+# Leaf 8000001AH
+# AMD instruction optimizations enumeration
+
+0x8000001a,         0,  eax,       0,    fp_128                 , Internal FP/SIMD exec data path is 128-bits wide
+0x8000001a,         0,  eax,       1,    movu_preferred         , SSE: MOVU* better than MOVL*/MOVH*
+0x8000001a,         0,  eax,       2,    fp_256                 , internal FP/SSE exec data path is 256-bits wide
+
+# Leaf 8000001BH
+# AMD IBS (Instruction-Based Sampling) enumeration
+
+0x8000001b,         0,  eax,       0,    ibs_flags_valid        , IBS feature flags valid
+0x8000001b,         0,  eax,       1,    ibs_fetch_sampling     , IBS fetch sampling supported
+0x8000001b,         0,  eax,       2,    ibs_op_sampling        , IBS execution sampling supported
+0x8000001b,         0,  eax,       3,    ibs_rdwr_op_counter    , IBS read/write of op counter supported
+0x8000001b,         0,  eax,       4,    ibs_op_count           , IBS OP counting mode supported
+0x8000001b,         0,  eax,       5,    ibs_branch_target      , IBS branch target address reporting supported
+0x8000001b,         0,  eax,       6,    ibs_op_counters_ext    , IBS IbsOpCurCnt/IbsOpMaxCnt extend by 7 bits
+0x8000001b,         0,  eax,       7,    ibs_rip_invalid_chk    , IBS invalid RIP indication supported
+0x8000001b,         0,  eax,       8,    ibs_op_branch_fuse     , IBS fused branch micro-op indication supported
+0x8000001b,         0,  eax,       9,    ibs_fetch_ctl_ext      , IBS Fetch Control Extended MSR (0xc001103c) supported
+0x8000001b,         0,  eax,      10,    ibs_op_data_4          , IBS op data 4 MSR supported
+0x8000001b,         0,  eax,      11,    ibs_l3_miss_filter     , IBS L3-miss filtering supported (Zen4+)
+
+# Leaf 8000001CH
+# AMD LWP (Lightweight Profiling)
+
+0x8000001c,         0,  eax,       0,    os_lwp_avail           , LWP is available to application programs (supported by OS)
+0x8000001c,         0,  eax,       1,    os_lpwval              , LWPVAL instruction (EventId=1) is supported by OS
+0x8000001c,         0,  eax,       2,    os_lwp_ire             , Instructions Retired Event (EventId=2) is supported by OS
+0x8000001c,         0,  eax,       3,    os_lwp_bre             , Branch Retired Event (EventId=3) is supported by OS
+0x8000001c,         0,  eax,       4,    os_lwp_dme             , DCache Miss Event (EventId=4) is supported by OS
+0x8000001c,         0,  eax,       5,    os_lwp_cnh             , CPU Clocks Not Halted event (EventId=5) is supported by OS
+0x8000001c,         0,  eax,       6,    os_lwp_rnh             , CPU Reference clocks Not Halted event (EventId=6) is supported by OS
+0x8000001c,         0,  eax,      29,    os_lwp_cont            , LWP sampling in continuous mode is supported by OS
+0x8000001c,         0,  eax,      30,    os_lwp_ptsc            , Performance Time Stamp Counter in event records is supported by OS
+0x8000001c,         0,  eax,      31,    os_lwp_int             , Interrupt on threshold overflow is supported by OS
+0x8000001c,         0,  ebx,     7:0,    lwp_lwpcb_sz           , LWP Control Block size, in quadwords
+0x8000001c,         0,  ebx,    15:8,    lwp_event_sz           , LWP event record size, in bytes
+0x8000001c,         0,  ebx,   23:16,    lwp_max_events         , LWP max supported EventId value (EventID 255 not included)
+0x8000001c,         0,  ebx,   31:24,    lwp_event_offset       , LWP events area offset in the LWP Control Block
+0x8000001c,         0,  ecx,     4:0,    lwp_latency_max        , Num of bits in cache latency counters (10 to 31)
+0x8000001c,         0,  ecx,       5,    lwp_data_adddr         , Cache miss events report the data address of the reference
+0x8000001c,         0,  ecx,     8:6,    lwp_latency_rnd        , Amount by which cache latency is rounded
+0x8000001c,         0,  ecx,    15:9,    lwp_version            , LWP implementation version
+0x8000001c,         0,  ecx,   23:16,    lwp_buf_min_sz         , LWP event ring buffer min size, in units of 32 event records
+0x8000001c,         0,  ecx,      28,    lwp_branch_predict     , Branches Retired events can be filtered
+0x8000001c,         0,  ecx,      29,    lwp_ip_filtering       , IP filtering (IPI, IPF, BaseIP, and LimitIP @ LWPCP) supported
+0x8000001c,         0,  ecx,      30,    lwp_cache_levels       , Cache-related events can be filtered by cache level
+0x8000001c,         0,  ecx,      31,    lwp_cache_latency      , Cache-related events can be filtered by latency
+0x8000001c,         0,  edx,       0,    hw_lwp_avail           , LWP is available in Hardware
+0x8000001c,         0,  edx,       1,    hw_lpwval              , LWPVAL instruction (EventId=1) is available in HW
+0x8000001c,         0,  edx,       2,    hw_lwp_ire             , Instructions Retired Event (EventId=2) is available in HW
+0x8000001c,         0,  edx,       3,    hw_lwp_bre             , Branch Retired Event (EventId=3) is available in HW
+0x8000001c,         0,  edx,       4,    hw_lwp_dme             , DCache Miss Event (EventId=4) is available in HW
+0x8000001c,         0,  edx,       5,    hw_lwp_cnh             , CPU Clocks Not Halted event (EventId=5) is available in HW
+0x8000001c,         0,  edx,       6,    hw_lwp_rnh             , CPU Reference clocks Not Halted event (EventId=6) is available in HW
+0x8000001c,         0,  edx,      29,    hw_lwp_cont            , LWP sampling in continuous mode is available in HW
+0x8000001c,         0,  edx,      30,    hw_lwp_ptsc            , Performance Time Stamp Counter in event records is available in HW
+0x8000001c,         0,  edx,      31,    hw_lwp_int             , Interrupt on threshold overflow is available in HW
+
+# Leaf 8000001DH
+# AMD deterministic cache parameters
+
+0x8000001d,      31:0,  eax,     4:0,    cache_type             , Cache type field
+0x8000001d,      31:0,  eax,     7:5,    cache_level            , Cache level (1-based)
+0x8000001d,      31:0,  eax,       8,    cache_self_init        , Self-initializing cache level
+0x8000001d,      31:0,  eax,       9,    fully_associative      , Fully-associative cache
+0x8000001d,      31:0,  eax,   25:14,    num_threads_sharing    , Number of logical CPUs sharing cache
+0x8000001d,      31:0,  ebx,    11:0,    cache_linesize         , System coherency line size (0-based)
+0x8000001d,      31:0,  ebx,   21:12,    cache_npartitions      , Physical line partitions (0-based)
+0x8000001d,      31:0,  ebx,   31:22,    cache_nways            , Ways of associativity (0-based)
+0x8000001d,      31:0,  ecx,    30:0,    cache_nsets            , Cache number of sets (0-based)
+0x8000001d,      31:0,  edx,       0,    wbinvd_rll_no_guarantee, WBINVD/INVD not guaranteed for Remote Lower-Level caches
+0x8000001d,      31:0,  edx,       1,    ll_inclusive           , Cache is inclusive of Lower-Level caches
+
+# Leaf 8000001EH
+# AMD CPU topology enumeration
+
+0x8000001e,         0,  eax,    31:0,    ext_apic_id            , Extended APIC ID
+0x8000001e,         0,  ebx,     7:0,    core_id                , Unique per-socket logical core unit ID
+0x8000001e,         0,  ebx,    15:8,    core_nthreas           , #Threads per core (zero-based)
+0x8000001e,         0,  ecx,     7:0,    node_id                , Node (die) ID of invoking logical CPU
+0x8000001e,         0,  ecx,    10:8,    nnodes_per_socket      , #nodes in invoking logical CPU's package/socket
+
+# Leaf 8000001FH
+# AMD encrypted memory capabilities enumeration (SME/SEV)
+
+0x8000001f,         0,  eax,       0,    sme                    , Secure Memory Encryption supported
+0x8000001f,         0,  eax,       1,    sev                    , Secure Encrypted Virtualization supported
+0x8000001f,         0,  eax,       2,    vm_page_flush          , VM Page Flush MSR (0xc001011e) available
+0x8000001f,         0,  eax,       3,    sev_es                 , SEV Encrypted State supported
+0x8000001f,         0,  eax,       4,    sev_nested_paging      , SEV secure nested paging supported
+0x8000001f,         0,  eax,       5,    vm_permission_levels   , VMPL supported
+0x8000001f,         0,  eax,       6,    rpmquery               , RPMQUERY instruction supported
+0x8000001f,         0,  eax,       7,    vmpl_sss               , VMPL supervisor shadwo stack supported
+0x8000001f,         0,  eax,       8,    secure_tsc             , Secure TSC supported
+0x8000001f,         0,  eax,       9,    v_tsc_aux              , Hardware virtualizes TSC_AUX
+0x8000001f,         0,  eax,      10,    sme_coherent           , HW enforces cache coherency across encryption domains
+0x8000001f,         0,  eax,      11,    req_64bit_hypervisor   , SEV guest mandates 64-bit hypervisor
+0x8000001f,         0,  eax,      12,    restricted_injection   , Restricted Injection supported
+0x8000001f,         0,  eax,      13,    alternate_injection    , Alternate Injection supported
+0x8000001f,         0,  eax,      14,    debug_swap             , SEV-ES: full debug state swap is supported
+0x8000001f,         0,  eax,      15,    disallow_host_ibs      , SEV-ES: Disallowing IBS use by the host is supported
+0x8000001f,         0,  eax,      16,    virt_transparent_enc   , Virtual Transparent Encryption
+0x8000001f,         0,  eax,      17,    vmgexit_paremeter      , VmgexitParameter is supported in SEV_FEATURES
+0x8000001f,         0,  eax,      18,    virt_tom_msr           , Virtual TOM MSR is supported
+0x8000001f,         0,  eax,      19,    virt_ibs               , IBS state virtualization is supported for SEV-ES guests
+0x8000001f,         0,  eax,      24,    vmsa_reg_protection    , VMSA register protection is supported
+0x8000001f,         0,  eax,      25,    smt_protection         , SMT protection is supported
+0x8000001f,         0,  eax,      28,    svsm_page_msr          , SVSM communication page MSR (0xc001f000h) is supported
+0x8000001f,         0,  eax,      29,    nested_virt_snp_msr    , VIRT_RMPUPDATE/VIRT_PSMASH MSRs are supported
+0x8000001f,         0,  ebx,     5:0,    pte_cbit_pos           , PTE bit number used to enable memory encryption
+0x8000001f,         0,  ebx,    11:6,    phys_addr_reduction_nbits, Reduction of phys address space when encryption is enabled, in bits
+0x8000001f,         0,  ebx,   15:12,    vmpl_count             , Number of VM permission levels (VMPL) supported
+0x8000001f,         0,  ecx,    31:0,    enc_guests_max         , Max supported number of simultaneous encrypted guests
+0x8000001f,         0,  edx,    31:0,    min_sev_asid_no_sev_es , Mininum ASID for SEV-enabled SEV-ES-disabled guest
+
+# Leaf 80000020H
+# AMD Platform QoS extended feature IDs
+
+0x80000020,         0,  ebx,       1,    mba                    , Memory Bandwidth Allocation support
+0x80000020,         0,  ebx,       2,    smba                   , Slow Memory Bandwidth Allocation support
+0x80000020,         0,  ebx,       3,    bmec                   , Bandwidth Monitoring Event Configuration support
+0x80000020,         0,  ebx,       4,    l3rr                   , L3 Range Reservation support
+0x80000020,         1,  eax,    31:0,    mba_limit_len          , MBA enforcement limit size
+0x80000020,         1,  edx,    31:0,    mba_cos_max            , MBA max Class of Service number (zero-based)
+0x80000020,         2,  eax,    31:0,    smba_limit_len         , SMBA enforcement limit size
+0x80000020,         2,  edx,    31:0,    smba_cos_max           , SMBA max Class of Service number (zero-based)
+0x80000020,         3,  ebx,     7:0,    bmec_num_events        , BMEC number of bandwidth events available
+0x80000020,         3,  ecx,       0,    bmec_local_reads       , Local NUMA reads can be tracked
+0x80000020,         3,  ecx,       1,    bmec_remote_reads      , Remote NUMA reads can be tracked
+0x80000020,         3,  ecx,       2,    bmec_local_nontemp_wr  , Local NUMA non-temporal writes can be tracked
+0x80000020,         3,  ecx,       3,    bmec_remote_nontemp_wr , Remote NUMA non-temporal writes can be tracked
+0x80000020,         3,  ecx,       4,    bmec_local_slow_mem_rd , Local NUMA slow-memory reads can be tracked
+0x80000020,         3,  ecx,       5,    bmec_remote_slow_mem_rd, Remote NUMA slow-memory reads can be tracked
+0x80000020,         3,  ecx,       6,    bmec_all_dirty_victims , Dirty QoS victims to all types of memory can be tracked
+
+# Leaf 80000021H
+# AMD extended features enumeration 2
+
+0x80000021,         0,  eax,       0,    no_nested_data_bp      , No nested data breakpoints
+0x80000021,         0,  eax,       1,    fsgs_non_serializing   , WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing
+0x80000021,         0,  eax,       2,    lfence_rdtsc           , LFENCE always serializing / synchronizes RDTSC
+0x80000021,         0,  eax,       3,    smm_page_cfg_lock      , SMM paging configuration lock is supported
+0x80000021,         0,  eax,       6,    null_sel_clr_base      , Null selector clears base
+0x80000021,         0,  eax,       7,    upper_addr_ignore      , EFER MSR Upper Address Ignore Enable bit supported
+0x80000021,         0,  eax,       8,    autoibrs               , EFER MSR Automatic IBRS enable bit supported
+0x80000021,         0,  eax,       9,    no_smm_ctl_msr         , SMM_CTL MSR (0xc0010116) is not present
+0x80000021,         0,  eax,      10,    fsrs_supported         , Fast Short Rep Stosb (FSRS) is supported
+0x80000021,         0,  eax,      11,    fsrc_supported         , Fast Short Repe Cmpsb (FSRC) is supported
+0x80000021,         0,  eax,      13,    prefetch_ctl_msr       , Prefetch control MSR is supported
+0x80000021,         0,  eax,      17,    user_cpuid_disable     , #GP when executing CPUID at CPL > 0 is supported
+0x80000021,         0,  eax,      18,    epsf_supported         , Enhanced Predictive Store Forwarding (EPSF) is supported
+0x80000021,         0,  ebx,    11:0,    microcode_patch_size   , Size of microcode patch, in 16-byte units
+
+# Leaf 80000022H
+# AMD Performance Monitoring v2 enumeration
+
+0x80000022,         0,  eax,       0,    perfmon_v2             , Performance monitoring v2 supported
+0x80000022,         0,  eax,       1,    lbr_v2                 , Last Branch Record v2 extensions (LBR Stack)
+0x80000022,         0,  eax,       2,    lbr_pmc_freeze         , Freezing core performance counters / LBR Stack supported
+0x80000022,         0,  ebx,     3:0,    n_pmc_core             , Number of core perfomance counters
+0x80000022,         0,  ebx,     9:4,    lbr_v2_stack_size      , Number of available LBR stack entries
+0x80000022,         0,  ebx,   15:10,    n_pmc_northbridge      , Number of available northbridge (data fabric) performance counters
+0x80000022,         0,  ebx,   21:16,    n_pmc_umc              , Number of available UMC performance counters
+0x80000022,         0,  ecx,    31:0,    active_umc_bitmask     , Active UMCs bitmask
+
+# Leaf 80000023H
+# AMD Secure Multi-key Encryption enumeration
+
+0x80000023,         0,  eax,       0,    mem_hmk_mode           , MEM-HMK encryption mode is supported
+0x80000023,         0,  ebx,    15:0,    mem_hmk_avail_keys     , MEM-HMK mode: total num of available encryption keys
+
+# Leaf 80000026H
+# AMD extended topology enumeration v2
+
+0x80000026,       3:0,  eax,     4:0,    x2apic_id_shift        , Bit width of this level (previous levels inclusive)
+0x80000026,       3:0,  eax,      29,    core_has_pwreff_ranking, This core has a power efficiency ranking
+0x80000026,       3:0,  eax,      30,    domain_has_hybrid_cores, This domain level has hybrid (E, P) cores
+0x80000026,       3:0,  eax,      31,    domain_core_count_asymm, The 'Core' domain has asymmetric cores count
+0x80000026,       3:0,  ebx,    15:0,    domain_lcpus_count     , Number of logical CPUs at this domain instance
+0x80000026,       3:0,  ebx,   23:16,    core_pwreff_ranking    , This core's static power efficiency ranking
+0x80000026,       3:0,  ebx,   27:24,    core_native_model_id   , This core's native model ID
+0x80000026,       3:0,  ebx,   31:28,    core_type              , This core's type
+0x80000026,       3:0,  ecx,     7:0,    domain_level           , This domain level (subleaf ID)
+0x80000026,       3:0,  ecx,    15:8,    domain_type            , This domain type
+0x80000026,       3:0,  edx,    31:0,    x2apic_id              , x2APIC ID of current logical CPU
diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c
index 24b7d017ec2c..1b25c0a95d3f 100644
--- a/tools/arch/x86/kcpuid/kcpuid.c
+++ b/tools/arch/x86/kcpuid/kcpuid.c
@@ -7,7 +7,8 @@
 #include <string.h>
 #include <getopt.h>
 
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#define ARRAY_SIZE(x)	(sizeof(x) / sizeof((x)[0]))
+#define min(a, b)	(((a) < (b)) ? (a) : (b))
 
 typedef unsigned int u32;
 typedef unsigned long long u64;
@@ -76,7 +77,6 @@ struct cpuid_range {
  */
 struct cpuid_range *leafs_basic, *leafs_ext;
 
-static int num_leafs;
 static bool is_amd;
 static bool show_details;
 static bool show_raw;
@@ -98,27 +98,17 @@ static inline void cpuid(u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
 
 static inline bool has_subleafs(u32 f)
 {
-	if (f == 0x7 || f == 0xd)
-		return true;
-
-	if (is_amd) {
-		if (f == 0x8000001d)
+	u32 with_subleaves[] = {
+		0x4,  0x7,  0xb,  0xd,  0xf,  0x10, 0x12,
+		0x14, 0x17, 0x18, 0x1b, 0x1d, 0x1f, 0x23,
+		0x8000001d, 0x80000020, 0x80000026,
+	};
+
+	for (unsigned i = 0; i < ARRAY_SIZE(with_subleaves); i++)
+		if (f == with_subleaves[i])
 			return true;
-		return false;
-	}
 
-	switch (f) {
-	case 0x4:
-	case 0xb:
-	case 0xf:
-	case 0x10:
-	case 0x14:
-	case 0x18:
-	case 0x1f:
-		return true;
-	default:
-		return false;
-	}
+	return false;
 }
 
 static void leaf_print_raw(struct subleaf *leaf)
@@ -204,15 +194,12 @@ static void raw_dump_range(struct cpuid_range *range)
 	}
 }
 
-#define MAX_SUBLEAF_NUM		32
+#define MAX_SUBLEAF_NUM		64
 struct cpuid_range *setup_cpuid_range(u32 input_eax)
 {
-	u32 max_func, idx_func;
-	int subleaf;
+	u32 max_func, idx_func, subleaf, max_subleaf;
+	u32 eax, ebx, ecx, edx, f = input_eax;
 	struct cpuid_range *range;
-	u32 eax, ebx, ecx, edx;
-	u32 f = input_eax;
-	int max_subleaf;
 	bool allzero;
 
 	eax = input_eax;
@@ -246,7 +233,6 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax)
 		allzero = cpuid_store(range, f, subleaf, eax, ebx, ecx, edx);
 		if (allzero)
 			continue;
-		num_leafs++;
 
 		if (!has_subleafs(f))
 			continue;
@@ -257,11 +243,18 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax)
 		 * Some can provide the exact number of subleafs,
 		 * others have to be tried (0xf)
 		 */
-		if (f == 0x7 || f == 0x14 || f == 0x17 || f == 0x18)
-			max_subleaf = (eax & 0xff) + 1;
-
+		if (f == 0x7 || f == 0x14 || f == 0x17 || f == 0x18 || f == 0x1d)
+			max_subleaf = min((eax & 0xff) + 1, max_subleaf);
 		if (f == 0xb)
 			max_subleaf = 2;
+		if (f == 0x1f)
+			max_subleaf = 6;
+		if (f == 0x23)
+			max_subleaf = 4;
+		if (f == 0x80000020)
+			max_subleaf = 4;
+		if (f == 0x80000026)
+			max_subleaf = 5;
 
 		for (subleaf = 1; subleaf < max_subleaf; subleaf++) {
 			eax = f;
@@ -272,7 +265,6 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax)
 						eax, ebx, ecx, edx);
 			if (allzero)
 				continue;
-			num_leafs++;
 		}
 
 	}
@@ -313,6 +305,8 @@ static int parse_line(char *line)
 	struct bits_desc *bdesc;
 	int reg_index;
 	char *start, *end;
+	u32 subleaf_start, subleaf_end;
+	unsigned bit_start, bit_end;
 
 	/* Skip comments and NULL line */
 	if (line[0] == '#' || line[0] == '\n')
@@ -351,13 +345,25 @@ static int parse_line(char *line)
 		return 0;
 
 	/* subleaf */
-	sub = strtoul(tokens[1], NULL, 0);
-	if ((int)sub > func->nr)
-		return -1;
+	buf = tokens[1];
+	end = strtok(buf, ":");
+	start = strtok(NULL, ":");
+	subleaf_end = strtoul(end, NULL, 0);
+
+	/* A subleaf range is given? */
+	if (start) {
+		subleaf_start = strtoul(start, NULL, 0);
+		subleaf_end = min(subleaf_end, (u32)(func->nr - 1));
+		if (subleaf_start > subleaf_end)
+			return 0;
+	} else {
+		subleaf_start = subleaf_end;
+		if (subleaf_start > (u32)(func->nr - 1))
+			return 0;
+	}
 
-	leaf = &func->leafs[sub];
+	/* register */
 	buf = tokens[2];
-
 	if (strcasestr(buf, "EAX"))
 		reg_index = R_EAX;
 	else if (strcasestr(buf, "EBX"))
@@ -369,23 +375,23 @@ static int parse_line(char *line)
 	else
 		goto err_exit;
 
-	reg = &leaf->info[reg_index];
-	bdesc = &reg->descs[reg->nr++];
-
 	/* bit flag or bits field */
 	buf = tokens[3];
-
 	end = strtok(buf, ":");
-	bdesc->end = strtoul(end, NULL, 0);
-	bdesc->start = bdesc->end;
-
-	/* start != NULL means it is bit fields */
 	start = strtok(NULL, ":");
-	if (start)
-		bdesc->start = strtoul(start, NULL, 0);
-
-	strcpy(bdesc->simp, tokens[4]);
-	strcpy(bdesc->detail, tokens[5]);
+	bit_end = strtoul(end, NULL, 0);
+	bit_start = (start) ? strtoul(start, NULL, 0) : bit_end;
+
+	for (sub = subleaf_start; sub <= subleaf_end; sub++) {
+		leaf = &func->leafs[sub];
+		reg = &leaf->info[reg_index];
+		bdesc = &reg->descs[reg->nr++];
+
+		bdesc->end = bit_end;
+		bdesc->start = bit_start;
+		strcpy(bdesc->simp, strtok(tokens[4], " \t"));
+		strcpy(bdesc->detail, tokens[5]);
+	}
 	return 0;
 
 err_exit:
@@ -452,8 +458,9 @@ static void decode_bits(u32 value, struct reg_desc *rdesc, enum cpuid_reg reg)
 		if (start == end) {
 			/* single bit flag */
 			if (value & (1 << start))
-				printf("\t%-20s %s%s\n",
+				printf("\t%-20s %s%s%s\n",
 					bdesc->simp,
+				        show_flags_only ? "" : "\t\t\t",
 					show_details ? "-" : "",
 					show_details ? bdesc->detail : ""
 					);
Re: [GIT pull] x86/misc for v6.12-rc1
Posted by pr-tracker-bot@kernel.org 2 months, 1 week ago
The pull request you sent on Tue, 17 Sep 2024 10:54:14 +0200 (CEST):

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-misc-2024-09-17

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/b50753547453613eb5d0fada99d55583852c42df

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html