[v4] x86: Support for Intel Microcode Staging Feature

[PATCH v4 2/6] x86/microcode/intel: Establish staging control logic

Posted by Chang S. Bae 1 month, 3 weeks ago

When microcode staging is initiated, operations are carried out through
an MMIO interface. Each package has a unique interface specified by the
IA32_MCU_STAGING_MBOX_ADDR MSR, which maps to a set of 32-bit registers.

Prepare staging with the following steps:

  1.  Ensure the microcode image is 32-bit aligned to match the MMIO
      register size.

  2.  Identify each MMIO interface based on its per-package scope.

  3.  Invoke the staging function for each identified interface, which
      will be implemented separately.

Also, define cpu_primary_thread_mask for the CONFIG_SMP=n case, allowing
consistent use when narrowing down primary threads to locate the
per-package interface.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Tested-by: Anselm Busse <abusse@amazon.de>
Link: https://lore.kernel.org/all/871pznq229.ffs@tglx
---
V2 -> V3:
* Remove a global variable and adjust stage_microcode() (Dave).
* Simplify for_each_cpu() loop control code
* Handle rdmsrl_on_cpu() return code explicitly (Chao)

V1 -> V2:
* Adjust to reference the staging_state struct.
* Add lockdep_assert_cpus_held() (Boris)
---
 arch/x86/include/asm/msr-index.h      |  2 ++
 arch/x86/include/asm/topology.h       |  1 +
 arch/x86/kernel/cpu/microcode/intel.c | 50 +++++++++++++++++++++++++++
 3 files changed, 53 insertions(+)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b65c3ba5fa14..0356155f9264 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -913,6 +913,8 @@
 #define MSR_IA32_UCODE_WRITE		0x00000079
 #define MSR_IA32_UCODE_REV		0x0000008b
 
+#define MSR_IA32_MCU_STAGING_MBOX_ADDR	0x000007a5
+
 /* Intel SGX Launch Enclave Public Key Hash MSRs */
 #define MSR_IA32_SGXLEPUBKEYHASH0	0x0000008C
 #define MSR_IA32_SGXLEPUBKEYHASH1	0x0000008D
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 6c79ee7c0957..91b5fc44ca62 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -235,6 +235,7 @@ static inline bool topology_is_primary_thread(unsigned int cpu)
 static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
 static inline int topology_max_smt_threads(void) { return 1; }
 static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
+#define cpu_primary_thread_mask cpu_none_mask
 #endif /* !CONFIG_SMP */
 
 static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 371ca6eac00e..468c4d3d5d66 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -299,6 +299,55 @@ static __init struct microcode_intel *scan_microcode(void *data, size_t size,
 	return size ? NULL : patch;
 }
 
+/*
+ * Handle the staging process using the mailbox MMIO interface.
+ * Return the result state.
+ */
+static enum ucode_state do_stage(u64 mmio_pa)
+{
+	pr_debug_once("Staging implementation is pending.\n");
+	return UCODE_ERROR;
+}
+
+static void stage_microcode(void)
+{
+	unsigned int pkg_id = UINT_MAX;
+	enum ucode_state ret;
+	int cpu, err;
+	u64 mmio_pa;
+
+	if (!IS_ALIGNED(get_totalsize(&ucode_patch_late->hdr), sizeof(u32)))
+		return;
+
+	lockdep_assert_cpus_held();
+
+	/*
+	 * The MMIO address is unique per package, and all the SMT
+	 * primary threads are online here. Find each MMIO space by
+	 * their package ids to avoid duplicate staging.
+	 */
+	for_each_cpu(cpu, cpu_primary_thread_mask) {
+		if (topology_logical_package_id(cpu) == pkg_id)
+			continue;
+		pkg_id = topology_logical_package_id(cpu);
+
+		err = rdmsrl_on_cpu(cpu, MSR_IA32_MCU_STAGING_MBOX_ADDR, &mmio_pa);
+		if (WARN_ON_ONCE(err))
+			return;
+
+		ret = do_stage(mmio_pa);
+		if (ret != UCODE_OK) {
+			pr_err("Error: staging failed with %s for CPU%d at package %u.\n",
+			       ret == UCODE_TIMEOUT ? "timeout" : "error state",
+			       cpu, pkg_id);
+			return;
+		}
+	}
+
+	pr_info("Staging of patch revision 0x%x succeeded.\n",
+		((struct microcode_header_intel *)ucode_patch_late)->rev);
+}
+
 static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci,
 					  struct microcode_intel *mc,
 					  u32 *cur_rev)
@@ -627,6 +676,7 @@ static struct microcode_ops microcode_intel_ops = {
 	.collect_cpu_info	= collect_cpu_info,
 	.apply_microcode	= apply_microcode_late,
 	.finalize_late_load	= finalize_late_load,
+	.stage_microcode	= stage_microcode,
 	.use_nmi		= IS_ENABLED(CONFIG_X86_64),
 };
 
-- 
2.48.1

Re: [PATCH v4 2/6] x86/microcode/intel: Establish staging control logic

Posted by Dave Hansen 1 month, 3 weeks ago

On 8/13/25 10:26, Chang S. Bae wrote:
> When microcode staging is initiated, operations are carried out through
> an MMIO interface. Each package has a unique interface specified by the
> IA32_MCU_STAGING_MBOX_ADDR MSR, which maps to a set of 32-bit registers.
> 
> Prepare staging with the following steps:
> 
>   1.  Ensure the microcode image is 32-bit aligned to match the MMIO
>       register size.
> 
>   2.  Identify each MMIO interface based on its per-package scope.
> 
>   3.  Invoke the staging function for each identified interface, which
>       will be implemented separately.
> 
> Also, define cpu_primary_thread_mask for the CONFIG_SMP=n case, allowing
> consistent use when narrowing down primary threads to locate the
> per-package interface.
...
>  static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
> +#define cpu_primary_thread_mask cpu_none_mask
>  #endif /* !CONFIG_SMP */

Isn't 'cpu_none_mask' a mask containing no CPUs? How can that possible
work here:

	for_each_cpu(cpu, cpu_primary_thread_mask) {

? Wouldn't it just not run through the for loop at all on CONFIG_SMP=n?
Is that what we want for some reason? I would have thought that we'd
still want to find the MMIO address for CPU 0, the one and only CPU.

>  static inline void arch_fix_phys_package_id(int num, u32 slot)
> diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
> index 371ca6eac00e..468c4d3d5d66 100644
> --- a/arch/x86/kernel/cpu/microcode/intel.c
> +++ b/arch/x86/kernel/cpu/microcode/intel.c
> @@ -299,6 +299,55 @@ static __init struct microcode_intel *scan_microcode(void *data, size_t size,
>  	return size ? NULL : patch;
>  }
>  
> +/*
> + * Handle the staging process using the mailbox MMIO interface.
> + * Return the result state.
> + */
> +static enum ucode_state do_stage(u64 mmio_pa)
> +{
> +	pr_debug_once("Staging implementation is pending.\n");
> +	return UCODE_ERROR;
> +}
> +
> +static void stage_microcode(void)
> +{
> +	unsigned int pkg_id = UINT_MAX;
> +	enum ucode_state ret;
> +	int cpu, err;
> +	u64 mmio_pa;
> +
> +	if (!IS_ALIGNED(get_totalsize(&ucode_patch_late->hdr), sizeof(u32)))
> +		return;
> +
> +	lockdep_assert_cpus_held();
> +
> +	/*
> +	 * The MMIO address is unique per package, and all the SMT
> +	 * primary threads are online here. Find each MMIO space by
> +	 * their package ids to avoid duplicate staging.
> +	 */
> +	for_each_cpu(cpu, cpu_primary_thread_mask) {
> +		if (topology_logical_package_id(cpu) == pkg_id)
> +			continue;
> +		pkg_id = topology_logical_package_id(cpu);
> +
> +		err = rdmsrl_on_cpu(cpu, MSR_IA32_MCU_STAGING_MBOX_ADDR, &mmio_pa);
> +		if (WARN_ON_ONCE(err))
> +			return;
> +
> +		ret = do_stage(mmio_pa);
> +		if (ret != UCODE_OK) {
> +			pr_err("Error: staging failed with %s for CPU%d at package %u.\n",
> +			       ret == UCODE_TIMEOUT ? "timeout" : "error state",
> +			       cpu, pkg_id);
> +			return;
> +		}
> +	}
> +
> +	pr_info("Staging of patch revision 0x%x succeeded.\n",
> +		((struct microcode_header_intel *)ucode_patch_late)->rev);
> +}
Hmmm. Consider:

	static struct microcode_intel *ucode_patch_late __read_mostly;

and:

	struct microcode_intel {
	        struct microcode_header_intel   hdr;
	        unsigned int                    bits[];
	};

So isn't this whole ugly cast thing equivalent to:

	ucode_patch_late->hdr.rev

?

Re: [PATCH v4 2/6] x86/microcode/intel: Establish staging control logic

Posted by Chang S. Bae 1 month, 3 weeks ago

On 8/13/2025 11:21 AM, Dave Hansen wrote:
> On 8/13/25 10:26, Chang S. Bae wrote:
>>
>> Also, define cpu_primary_thread_mask for the CONFIG_SMP=n case, allowing
>> consistent use when narrowing down primary threads to locate the
>> per-package interface.
> ...
>>   static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
>> +#define cpu_primary_thread_mask cpu_none_mask
>>   #endif /* !CONFIG_SMP */
> 
> Isn't 'cpu_none_mask' a mask containing no CPUs? How can that possible
> work here:
> 
> 	for_each_cpu(cpu, cpu_primary_thread_mask) {
> 
> ? Wouldn't it just not run through the for loop at all on CONFIG_SMP=n?
> Is that what we want for some reason? I would have thought that we'd
> still want to find the MMIO address for CPU 0, the one and only CPU.

Yeah, right.

Then, looking at it again, I see this:

config MICROCODE_LATE_LOADING
	bool "Late microcode loading (DANGEROUS)"
	default n
	depends on MICROCODE && SMP

This optimization only applies to the late-loading path. But, today I 
also had to clarify this dependency for myself. At least, my changelog 
could've made it clearer, sorry.

>> +
>> +	pr_info("Staging of patch revision 0x%x succeeded.\n",
>> +		((struct microcode_header_intel *)ucode_patch_late)->rev);
>> +}
> Hmmm. Consider:
> 
> 	static struct microcode_intel *ucode_patch_late __read_mostly;
> 
> and:
> 
> 	struct microcode_intel {
> 	        struct microcode_header_intel   hdr;
> 	        unsigned int                    bits[];
> 	};
> 
> So isn't this whole ugly cast thing equivalent to:
> 
> 	ucode_patch_late->hdr.rev
> 
> ?

Indeed. I must have been blind to that bit of ugliness. Thanks for 
spotting on it!

Chang

Re: [PATCH v4 2/6] x86/microcode/intel: Establish staging control logic

Posted by Dave Hansen 1 month, 3 weeks ago

On 8/13/25 13:46, Chang S. Bae wrote:
>> Isn't 'cpu_none_mask' a mask containing no CPUs? How can that possible
>> work here:
>>
>>     for_each_cpu(cpu, cpu_primary_thread_mask) {
>>
>> ? Wouldn't it just not run through the for loop at all on CONFIG_SMP=n?
>> Is that what we want for some reason? I would have thought that we'd
>> still want to find the MMIO address for CPU 0, the one and only CPU.
> 
> Yeah, right.
> 
> Then, looking at it again, I see this:
> 
> config MICROCODE_LATE_LOADING
>     bool "Late microcode loading (DANGEROUS)"
>     default n
>     depends on MICROCODE && SMP
> 
> This optimization only applies to the late-loading path. But, today I
> also had to clarify this dependency for myself. At least, my changelog
> could've made it clearer, sorry.

I'm not following.

I _think_ you're trying to say that it's a "no harm, no foul" situation
because this user of 'cpu_primary_thread_mask' won't even get compiled
in the buggy !SMP case.

But that's not the problem. The issue is that this line of code:

#define cpu_primary_thread_mask cpu_none_mask

reads as 100% bogus to me. Even on !SMP kernels,
'cpu_primary_thread_mask' should have one CPU in it. Right? The _one_
thread that's present is a primary thread. If this were a mask for
secondary threads, 'cpu_none_mask' would make sense. But it's not.

So could we please make use 'cpu_primary_thread_mask' is getting defined
correctly whether it's really getting compiled into the end image or not?

Re: [PATCH v4 2/6] x86/microcode/intel: Establish staging control logic

Posted by Chang S. Bae 1 month, 3 weeks ago

On 8/13/2025 1:55 PM, Dave Hansen wrote:
> 
> But that's not the problem. The issue is that this line of code:
> 
> #define cpu_primary_thread_mask cpu_none_mask

With CONFIG_SMP=n, on the core side (include/linux/cpu_smt.h), the code 
clarifies there is no SMT:

# define cpu_smt_control               (CPU_SMT_NOT_IMPLEMENTED)

This leads kernel/cpu.c to return an empty mask:

static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
{
	return cpu_none_mask;
}

On the x86 side, the definition is explicit that “primary threads” are 
SMT threads (arch/x86/kernel/smpboot.c):

/* CPUs which are the primary SMT threads */
struct cpumask __cpu_primary_thread_mask __read_mostly;

And via ifdeffery, this mask is only available to SMP kernels.

So it seems I had been subscribing this model -- no primary threads 
without SMP.

> reads as 100% bogus to me. Even on !SMP kernels,
> 'cpu_primary_thread_mask' should have one CPU in it. Right? The _one_
> thread that's present is a primary thread. If this were a mask for
> secondary threads, 'cpu_none_mask' would make sense. But it's not.

Your confidence made me take another look.

Digging into the history, I found that x86 used to have this in the !SMP 
case:

static inline bool topology_is_primary_thread(unsigned int cpu)
{
	return true;
}

That stayed until the recent commit 4b455f59945aa ("cpu/SMT: Provide a 
default topology_is_primary_thread()"), which now defines it in 
include/linux/topology.h with this telling comment:

/*
  * When disabling SMT, the primary thread of the SMT will remain
  * enabled/active. Architectures that have a special primary thread
  * (e.g. x86) need to override this function. ...
  */

This comment basically supports your point.

> So could we please make use 'cpu_primary_thread_mask' is getting defined
> correctly whether it's really getting compiled into the end image or not?

Given that, I’m thinking of simplifying the x86 side a bit -- by making 
the primary thread mask configured and available regardless of 
CONFIG_SMP, matching the behavior of other cpumasks. And its relevant 
helpers are also available, like in the attached diff.

I think the change still aligns x86 with the core code -- especially 
with the note in topology_is_primary_thread(). With that, the user may 
be introduced here.
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 6c79ee7c0957..281252af6e9d 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -218,6 +218,12 @@ static inline unsigned int topology_amd_nodes_per_pkg(void)
 	return __amd_nodes_per_pkg;
 }
 
+#else /* CONFIG_SMP */
+static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
+static inline int topology_max_smt_threads(void) { return 1; }
+static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
+#endif /* !CONFIG_SMP */
+
 extern struct cpumask __cpu_primary_thread_mask;
 #define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
 
@@ -231,12 +237,6 @@ static inline bool topology_is_primary_thread(unsigned int cpu)
 }
 #define topology_is_primary_thread topology_is_primary_thread
 
-#else /* CONFIG_SMP */
-static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
-static inline int topology_max_smt_threads(void) { return 1; }
-static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
-#endif /* !CONFIG_SMP */
-
 static inline void arch_fix_phys_package_id(int num, u32 slot)
 {
 }
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index e35ccdc84910..946004d7dd1d 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -52,6 +52,9 @@ u32 cpuid_to_apicid[] __ro_after_init = { [0 ... NR_CPUS - 1] = BAD_APICID, };
 /* Bitmaps to mark registered APICs at each topology domain */
 static struct { DECLARE_BITMAP(map, MAX_LOCAL_APIC); } apic_maps[TOPO_MAX_DOMAIN] __ro_after_init;
 
+/* CPUs which are the primary SMT threads */
+struct cpumask __cpu_primary_thread_mask __read_mostly;
+
 /*
  * Keep track of assigned, disabled and rejected CPUs. Present assigned
  * with 1 as CPU #0 is reserved for the boot CPU.
@@ -75,15 +78,11 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
 	return phys_id == (u64)cpuid_to_apicid[cpu];
 }
 
-#ifdef CONFIG_SMP
 static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid)
 {
 	if (!(apicid & (__max_threads_per_core - 1)))
 		cpumask_set_cpu(cpu, &__cpu_primary_thread_mask);
 }
-#else
-static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
-#endif
 
 /*
  * Convert the APIC ID to a domain level ID by masking out the low bits
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 33e166f6ab12..7804175d2d87 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -103,9 +103,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map);
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
 EXPORT_PER_CPU_SYMBOL(cpu_die_map);
 
-/* CPUs which are the primary SMT threads */
-struct cpumask __cpu_primary_thread_mask __read_mostly;
-
 /* Representing CPUs for which sibling maps can be computed */
 static cpumask_var_t cpu_sibling_setup_mask;

[PATCH] x86/cpu/topology: Make primary thread mask available with SMP=n

Posted by Chang S. Bae 1 month, 1 week ago

cpu_primary_thread_mask is only defined when CONFIG_SMP=y. However, even
in UP kernels there is always exactly one CPU, which can reasonably be
treated as the primary thread.

Historically, topology_is_primary_thread() always returned true with
CONFIG_SMP=n. A recent commit:

  4b455f59945aa ("cpu/SMT: Provide a default topology_is_primary_thread()")

replaced it with a generic implementation with the note:

  "When disabling SMT, the primary thread of the SMT will remain
   enabled/active. Architectures that have a special primary thread (e.g.
   x86) need to override this function. ..."

For consistency and clarity, make the primary thread mask available
regardless of SMP, similar to cpu_possible_mask and cpu_present_mask.

Move __cpu_primary_thread_mask into common code to prevent build issues.
Let cpu_mark_primary_thread() configure the mask even for UP kernels,
alongside other masks. Then, topology_is_primary_thread() can
consistently reference it.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
---
This improvement was identified from feedback on a feature-enabling
series [*], where a user of this mask is introduced. It is posted here as
a standalone patch for clarity and self-containment. The next revision of
that series will depend on this.

[*] https://lore.kernel.org/lkml/20250813172649.15474-1-chang.seok.bae@intel.com/
---
 arch/x86/include/asm/topology.h       | 12 ++++++------
 arch/x86/kernel/cpu/topology.c        |  4 ----
 arch/x86/kernel/cpu/topology_common.c |  3 +++
 arch/x86/kernel/smpboot.c             |  3 ---
 4 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 6c79ee7c0957..281252af6e9d 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -218,6 +218,12 @@ static inline unsigned int topology_amd_nodes_per_pkg(void)
 	return __amd_nodes_per_pkg;
 }
 
+#else /* CONFIG_SMP */
+static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
+static inline int topology_max_smt_threads(void) { return 1; }
+static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
+#endif /* !CONFIG_SMP */
+
 extern struct cpumask __cpu_primary_thread_mask;
 #define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
 
@@ -231,12 +237,6 @@ static inline bool topology_is_primary_thread(unsigned int cpu)
 }
 #define topology_is_primary_thread topology_is_primary_thread
 
-#else /* CONFIG_SMP */
-static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
-static inline int topology_max_smt_threads(void) { return 1; }
-static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
-#endif /* !CONFIG_SMP */
-
 static inline void arch_fix_phys_package_id(int num, u32 slot)
 {
 }
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index dcf05c64dd82..6a76caf813d6 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -75,15 +75,11 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
 	return phys_id == (u64)cpuid_to_apicid[cpu];
 }
 
-#ifdef CONFIG_SMP
 static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid)
 {
 	if (!(apicid & (__max_threads_per_core - 1)))
 		cpumask_set_cpu(cpu, &__cpu_primary_thread_mask);
 }
-#else
-static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
-#endif
 
 /*
  * Convert the APIC ID to a domain level ID by masking out the low bits
diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c
index b5a5e1411469..71625795d711 100644
--- a/arch/x86/kernel/cpu/topology_common.c
+++ b/arch/x86/kernel/cpu/topology_common.c
@@ -16,6 +16,9 @@ EXPORT_SYMBOL_GPL(x86_topo_system);
 unsigned int __amd_nodes_per_pkg __ro_after_init;
 EXPORT_SYMBOL_GPL(__amd_nodes_per_pkg);
 
+/* CPUs which are the primary SMT threads */
+struct cpumask __cpu_primary_thread_mask __read_mostly;
+
 void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom,
 		      unsigned int shift, unsigned int ncpus)
 {
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 33e166f6ab12..7804175d2d87 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -103,9 +103,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map);
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
 EXPORT_PER_CPU_SYMBOL(cpu_die_map);
 
-/* CPUs which are the primary SMT threads */
-struct cpumask __cpu_primary_thread_mask __read_mostly;
-
 /* Representing CPUs for which sibling maps can be computed */
 static cpumask_var_t cpu_sibling_setup_mask;
 
-- 
2.48.1

Re: [PATCH] x86/cpu/topology: Make primary thread mask available with SMP=n

Posted by Chang S. Bae 1 month, 1 week ago

On 8/22/2025 3:39 PM, Chang S. Bae wrote:
> ... The next revision of
> that series will depend on this.
Fold into the V5 posting now:
https://lore.kernel.org/lkml/20250823155214.17465-2-chang.seok.bae@intel.com/

Thanks,
Chang

[PATCH v4a 2/6] x86/microcode/intel: Establish staging control logic

Posted by Chang S. Bae 1 month, 1 week ago

When microcode staging is initiated, operations are carried out through
an MMIO interface. Each package has a unique interface specified by the
IA32_MCU_STAGING_MBOX_ADDR MSR, which maps to a set of 32-bit registers.

Prepare staging with the following steps:

  1.  Ensure the microcode image is 32-bit aligned to match the MMIO
      register size.

  2.  Identify each MMIO interface based on its per-package scope.

  3.  Invoke the staging function for each identified interface, which
      will be implemented separately.

Also, define cpu_primary_thread_mask for the CONFIG_SMP=n case, allowing
consistent use when narrowing down primary threads to locate the
per-package interface.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Tested-by: Anselm Busse <abusse@amazon.de>
Link: https://lore.kernel.org/all/871pznq229.ffs@tglx
---
V4 -> V4a:
* Rebase on the primary thread cpumask fix (Dave)
* Clean up the rev print code (Dave)
* rdmsrl_on_cpu() -> rdmsrq_on_cpu (Chao)
---
 arch/x86/include/asm/msr-index.h      |  2 ++
 arch/x86/kernel/cpu/microcode/intel.c | 49 +++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b65c3ba5fa14..0356155f9264 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -913,6 +913,8 @@
 #define MSR_IA32_UCODE_WRITE		0x00000079
 #define MSR_IA32_UCODE_REV		0x0000008b
 
+#define MSR_IA32_MCU_STAGING_MBOX_ADDR	0x000007a5
+
 /* Intel SGX Launch Enclave Public Key Hash MSRs */
 #define MSR_IA32_SGXLEPUBKEYHASH0	0x0000008C
 #define MSR_IA32_SGXLEPUBKEYHASH1	0x0000008D
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 371ca6eac00e..d309fb1f058f 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -299,6 +299,54 @@ static __init struct microcode_intel *scan_microcode(void *data, size_t size,
 	return size ? NULL : patch;
 }
 
+/*
+ * Handle the staging process using the mailbox MMIO interface.
+ * Return the result state.
+ */
+static enum ucode_state do_stage(u64 mmio_pa)
+{
+	pr_debug_once("Staging implementation is pending.\n");
+	return UCODE_ERROR;
+}
+
+static void stage_microcode(void)
+{
+	unsigned int pkg_id = UINT_MAX;
+	enum ucode_state ret;
+	int cpu, err;
+	u64 mmio_pa;
+
+	if (!IS_ALIGNED(get_totalsize(&ucode_patch_late->hdr), sizeof(u32)))
+		return;
+
+	lockdep_assert_cpus_held();
+
+	/*
+	 * The MMIO address is unique per package, and all the SMT
+	 * primary threads are online here. Find each MMIO space by
+	 * their package ids to avoid duplicate staging.
+	 */
+	for_each_cpu(cpu, cpu_primary_thread_mask) {
+		if (topology_logical_package_id(cpu) == pkg_id)
+			continue;
+		pkg_id = topology_logical_package_id(cpu);
+
+		err = rdmsrq_on_cpu(cpu, MSR_IA32_MCU_STAGING_MBOX_ADDR, &mmio_pa);
+		if (WARN_ON_ONCE(err))
+			return;
+
+		ret = do_stage(mmio_pa);
+		if (ret != UCODE_OK) {
+			pr_err("Error: staging failed with %s for CPU%d at package %u.\n",
+			       ret == UCODE_TIMEOUT ? "timeout" : "error state",
+			       cpu, pkg_id);
+			return;
+		}
+	}
+
+	pr_info("Staging of patch revision 0x%x succeeded.\n", ucode_patch_late->hdr.rev);
+}
+
 static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci,
 					  struct microcode_intel *mc,
 					  u32 *cur_rev)
@@ -627,6 +675,7 @@ static struct microcode_ops microcode_intel_ops = {
 	.collect_cpu_info	= collect_cpu_info,
 	.apply_microcode	= apply_microcode_late,
 	.finalize_late_load	= finalize_late_load,
+	.stage_microcode	= stage_microcode,
 	.use_nmi		= IS_ENABLED(CONFIG_X86_64),
 };
 
-- 
2.48.1

Re: [PATCH v4a 2/6] x86/microcode/intel: Establish staging control logic

Posted by Dave Hansen 1 month, 1 week ago

What's with the "v4a" stuff? Is this an attempt to torture test b4? ;)

Seriously, though, it's been more than a week since the last series.
Just send a new series, please.