When microcode staging is initiated, operations are carried out through
an MMIO interface. Each package has a unique interface specified by the
IA32_MCU_STAGING_MBOX_ADDR MSR, which maps to a set of 32-bit registers.
Prepare staging with the following steps:
1. Ensure the microcode image is 32-bit aligned to match the MMIO
register size.
2. Identify each MMIO interface based on its per-package scope.
3. Invoke the staging function for each identified interface, which
will be implemented separately.
Also, define cpu_primary_thread_mask for the CONFIG_SMP=n case, allowing
consistent use when narrowing down primary threads to locate the
per-package interface.
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Tested-by: Anselm Busse <abusse@amazon.de>
Link: https://lore.kernel.org/all/871pznq229.ffs@tglx
---
V2 -> V3:
* Remove a global variable and adjust stage_microcode() (Dave).
* Simplify for_each_cpu() loop control code
* Handle rdmsrl_on_cpu() return code explicitly (Chao)
V1 -> V2:
* Adjust to reference the staging_state struct.
* Add lockdep_assert_cpus_held() (Boris)
---
arch/x86/include/asm/msr-index.h | 2 ++
arch/x86/include/asm/topology.h | 1 +
arch/x86/kernel/cpu/microcode/intel.c | 50 +++++++++++++++++++++++++++
3 files changed, 53 insertions(+)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b65c3ba5fa14..0356155f9264 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -913,6 +913,8 @@
#define MSR_IA32_UCODE_WRITE 0x00000079
#define MSR_IA32_UCODE_REV 0x0000008b
+#define MSR_IA32_MCU_STAGING_MBOX_ADDR 0x000007a5
+
/* Intel SGX Launch Enclave Public Key Hash MSRs */
#define MSR_IA32_SGXLEPUBKEYHASH0 0x0000008C
#define MSR_IA32_SGXLEPUBKEYHASH1 0x0000008D
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 6c79ee7c0957..91b5fc44ca62 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -235,6 +235,7 @@ static inline bool topology_is_primary_thread(unsigned int cpu)
static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
static inline int topology_max_smt_threads(void) { return 1; }
static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
+#define cpu_primary_thread_mask cpu_none_mask
#endif /* !CONFIG_SMP */
static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 371ca6eac00e..468c4d3d5d66 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -299,6 +299,55 @@ static __init struct microcode_intel *scan_microcode(void *data, size_t size,
return size ? NULL : patch;
}
+/*
+ * Handle the staging process using the mailbox MMIO interface.
+ * Return the result state.
+ */
+static enum ucode_state do_stage(u64 mmio_pa)
+{
+ pr_debug_once("Staging implementation is pending.\n");
+ return UCODE_ERROR;
+}
+
+static void stage_microcode(void)
+{
+ unsigned int pkg_id = UINT_MAX;
+ enum ucode_state ret;
+ int cpu, err;
+ u64 mmio_pa;
+
+ if (!IS_ALIGNED(get_totalsize(&ucode_patch_late->hdr), sizeof(u32)))
+ return;
+
+ lockdep_assert_cpus_held();
+
+ /*
+ * The MMIO address is unique per package, and all the SMT
+ * primary threads are online here. Find each MMIO space by
+ * their package ids to avoid duplicate staging.
+ */
+ for_each_cpu(cpu, cpu_primary_thread_mask) {
+ if (topology_logical_package_id(cpu) == pkg_id)
+ continue;
+ pkg_id = topology_logical_package_id(cpu);
+
+ err = rdmsrl_on_cpu(cpu, MSR_IA32_MCU_STAGING_MBOX_ADDR, &mmio_pa);
+ if (WARN_ON_ONCE(err))
+ return;
+
+ ret = do_stage(mmio_pa);
+ if (ret != UCODE_OK) {
+ pr_err("Error: staging failed with %s for CPU%d at package %u.\n",
+ ret == UCODE_TIMEOUT ? "timeout" : "error state",
+ cpu, pkg_id);
+ return;
+ }
+ }
+
+ pr_info("Staging of patch revision 0x%x succeeded.\n",
+ ((struct microcode_header_intel *)ucode_patch_late)->rev);
+}
+
static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci,
struct microcode_intel *mc,
u32 *cur_rev)
@@ -627,6 +676,7 @@ static struct microcode_ops microcode_intel_ops = {
.collect_cpu_info = collect_cpu_info,
.apply_microcode = apply_microcode_late,
.finalize_late_load = finalize_late_load,
+ .stage_microcode = stage_microcode,
.use_nmi = IS_ENABLED(CONFIG_X86_64),
};
--
2.48.1
On 8/13/25 10:26, Chang S. Bae wrote:
> When microcode staging is initiated, operations are carried out through
> an MMIO interface. Each package has a unique interface specified by the
> IA32_MCU_STAGING_MBOX_ADDR MSR, which maps to a set of 32-bit registers.
>
> Prepare staging with the following steps:
>
> 1. Ensure the microcode image is 32-bit aligned to match the MMIO
> register size.
>
> 2. Identify each MMIO interface based on its per-package scope.
>
> 3. Invoke the staging function for each identified interface, which
> will be implemented separately.
>
> Also, define cpu_primary_thread_mask for the CONFIG_SMP=n case, allowing
> consistent use when narrowing down primary threads to locate the
> per-package interface.
...
> static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
> +#define cpu_primary_thread_mask cpu_none_mask
> #endif /* !CONFIG_SMP */
Isn't 'cpu_none_mask' a mask containing no CPUs? How can that possible
work here:
for_each_cpu(cpu, cpu_primary_thread_mask) {
? Wouldn't it just not run through the for loop at all on CONFIG_SMP=n?
Is that what we want for some reason? I would have thought that we'd
still want to find the MMIO address for CPU 0, the one and only CPU.
> static inline void arch_fix_phys_package_id(int num, u32 slot)
> diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
> index 371ca6eac00e..468c4d3d5d66 100644
> --- a/arch/x86/kernel/cpu/microcode/intel.c
> +++ b/arch/x86/kernel/cpu/microcode/intel.c
> @@ -299,6 +299,55 @@ static __init struct microcode_intel *scan_microcode(void *data, size_t size,
> return size ? NULL : patch;
> }
>
> +/*
> + * Handle the staging process using the mailbox MMIO interface.
> + * Return the result state.
> + */
> +static enum ucode_state do_stage(u64 mmio_pa)
> +{
> + pr_debug_once("Staging implementation is pending.\n");
> + return UCODE_ERROR;
> +}
> +
> +static void stage_microcode(void)
> +{
> + unsigned int pkg_id = UINT_MAX;
> + enum ucode_state ret;
> + int cpu, err;
> + u64 mmio_pa;
> +
> + if (!IS_ALIGNED(get_totalsize(&ucode_patch_late->hdr), sizeof(u32)))
> + return;
> +
> + lockdep_assert_cpus_held();
> +
> + /*
> + * The MMIO address is unique per package, and all the SMT
> + * primary threads are online here. Find each MMIO space by
> + * their package ids to avoid duplicate staging.
> + */
> + for_each_cpu(cpu, cpu_primary_thread_mask) {
> + if (topology_logical_package_id(cpu) == pkg_id)
> + continue;
> + pkg_id = topology_logical_package_id(cpu);
> +
> + err = rdmsrl_on_cpu(cpu, MSR_IA32_MCU_STAGING_MBOX_ADDR, &mmio_pa);
> + if (WARN_ON_ONCE(err))
> + return;
> +
> + ret = do_stage(mmio_pa);
> + if (ret != UCODE_OK) {
> + pr_err("Error: staging failed with %s for CPU%d at package %u.\n",
> + ret == UCODE_TIMEOUT ? "timeout" : "error state",
> + cpu, pkg_id);
> + return;
> + }
> + }
> +
> + pr_info("Staging of patch revision 0x%x succeeded.\n",
> + ((struct microcode_header_intel *)ucode_patch_late)->rev);
> +}
Hmmm. Consider:
static struct microcode_intel *ucode_patch_late __read_mostly;
and:
struct microcode_intel {
struct microcode_header_intel hdr;
unsigned int bits[];
};
So isn't this whole ugly cast thing equivalent to:
ucode_patch_late->hdr.rev
?
On 8/13/2025 11:21 AM, Dave Hansen wrote:
> On 8/13/25 10:26, Chang S. Bae wrote:
>>
>> Also, define cpu_primary_thread_mask for the CONFIG_SMP=n case, allowing
>> consistent use when narrowing down primary threads to locate the
>> per-package interface.
> ...
>> static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
>> +#define cpu_primary_thread_mask cpu_none_mask
>> #endif /* !CONFIG_SMP */
>
> Isn't 'cpu_none_mask' a mask containing no CPUs? How can that possible
> work here:
>
> for_each_cpu(cpu, cpu_primary_thread_mask) {
>
> ? Wouldn't it just not run through the for loop at all on CONFIG_SMP=n?
> Is that what we want for some reason? I would have thought that we'd
> still want to find the MMIO address for CPU 0, the one and only CPU.
Yeah, right.
Then, looking at it again, I see this:
config MICROCODE_LATE_LOADING
bool "Late microcode loading (DANGEROUS)"
default n
depends on MICROCODE && SMP
This optimization only applies to the late-loading path. But, today I
also had to clarify this dependency for myself. At least, my changelog
could've made it clearer, sorry.
>> +
>> + pr_info("Staging of patch revision 0x%x succeeded.\n",
>> + ((struct microcode_header_intel *)ucode_patch_late)->rev);
>> +}
> Hmmm. Consider:
>
> static struct microcode_intel *ucode_patch_late __read_mostly;
>
> and:
>
> struct microcode_intel {
> struct microcode_header_intel hdr;
> unsigned int bits[];
> };
>
> So isn't this whole ugly cast thing equivalent to:
>
> ucode_patch_late->hdr.rev
>
> ?
Indeed. I must have been blind to that bit of ugliness. Thanks for
spotting on it!
Chang
On 8/13/25 13:46, Chang S. Bae wrote:
>> Isn't 'cpu_none_mask' a mask containing no CPUs? How can that possible
>> work here:
>>
>> for_each_cpu(cpu, cpu_primary_thread_mask) {
>>
>> ? Wouldn't it just not run through the for loop at all on CONFIG_SMP=n?
>> Is that what we want for some reason? I would have thought that we'd
>> still want to find the MMIO address for CPU 0, the one and only CPU.
>
> Yeah, right.
>
> Then, looking at it again, I see this:
>
> config MICROCODE_LATE_LOADING
> bool "Late microcode loading (DANGEROUS)"
> default n
> depends on MICROCODE && SMP
>
> This optimization only applies to the late-loading path. But, today I
> also had to clarify this dependency for myself. At least, my changelog
> could've made it clearer, sorry.
I'm not following.
I _think_ you're trying to say that it's a "no harm, no foul" situation
because this user of 'cpu_primary_thread_mask' won't even get compiled
in the buggy !SMP case.
But that's not the problem. The issue is that this line of code:
#define cpu_primary_thread_mask cpu_none_mask
reads as 100% bogus to me. Even on !SMP kernels,
'cpu_primary_thread_mask' should have one CPU in it. Right? The _one_
thread that's present is a primary thread. If this were a mask for
secondary threads, 'cpu_none_mask' would make sense. But it's not.
So could we please make use 'cpu_primary_thread_mask' is getting defined
correctly whether it's really getting compiled into the end image or not?
On 8/13/2025 1:55 PM, Dave Hansen wrote:
>
> But that's not the problem. The issue is that this line of code:
>
> #define cpu_primary_thread_mask cpu_none_mask
With CONFIG_SMP=n, on the core side (include/linux/cpu_smt.h), the code
clarifies there is no SMT:
# define cpu_smt_control (CPU_SMT_NOT_IMPLEMENTED)
This leads kernel/cpu.c to return an empty mask:
static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
{
return cpu_none_mask;
}
On the x86 side, the definition is explicit that “primary threads” are
SMT threads (arch/x86/kernel/smpboot.c):
/* CPUs which are the primary SMT threads */
struct cpumask __cpu_primary_thread_mask __read_mostly;
And via ifdeffery, this mask is only available to SMP kernels.
So it seems I had been subscribing this model -- no primary threads
without SMP.
> reads as 100% bogus to me. Even on !SMP kernels,
> 'cpu_primary_thread_mask' should have one CPU in it. Right? The _one_
> thread that's present is a primary thread. If this were a mask for
> secondary threads, 'cpu_none_mask' would make sense. But it's not.
Your confidence made me take another look.
Digging into the history, I found that x86 used to have this in the !SMP
case:
static inline bool topology_is_primary_thread(unsigned int cpu)
{
return true;
}
That stayed until the recent commit 4b455f59945aa ("cpu/SMT: Provide a
default topology_is_primary_thread()"), which now defines it in
include/linux/topology.h with this telling comment:
/*
* When disabling SMT, the primary thread of the SMT will remain
* enabled/active. Architectures that have a special primary thread
* (e.g. x86) need to override this function. ...
*/
This comment basically supports your point.
> So could we please make use 'cpu_primary_thread_mask' is getting defined
> correctly whether it's really getting compiled into the end image or not?
Given that, I’m thinking of simplifying the x86 side a bit -- by making
the primary thread mask configured and available regardless of
CONFIG_SMP, matching the behavior of other cpumasks. And its relevant
helpers are also available, like in the attached diff.
I think the change still aligns x86 with the core code -- especially
with the note in topology_is_primary_thread(). With that, the user may
be introduced here.
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 6c79ee7c0957..281252af6e9d 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -218,6 +218,12 @@ static inline unsigned int topology_amd_nodes_per_pkg(void)
return __amd_nodes_per_pkg;
}
+#else /* CONFIG_SMP */
+static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
+static inline int topology_max_smt_threads(void) { return 1; }
+static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
+#endif /* !CONFIG_SMP */
+
extern struct cpumask __cpu_primary_thread_mask;
#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
@@ -231,12 +237,6 @@ static inline bool topology_is_primary_thread(unsigned int cpu)
}
#define topology_is_primary_thread topology_is_primary_thread
-#else /* CONFIG_SMP */
-static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
-static inline int topology_max_smt_threads(void) { return 1; }
-static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
-#endif /* !CONFIG_SMP */
-
static inline void arch_fix_phys_package_id(int num, u32 slot)
{
}
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index e35ccdc84910..946004d7dd1d 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -52,6 +52,9 @@ u32 cpuid_to_apicid[] __ro_after_init = { [0 ... NR_CPUS - 1] = BAD_APICID, };
/* Bitmaps to mark registered APICs at each topology domain */
static struct { DECLARE_BITMAP(map, MAX_LOCAL_APIC); } apic_maps[TOPO_MAX_DOMAIN] __ro_after_init;
+/* CPUs which are the primary SMT threads */
+struct cpumask __cpu_primary_thread_mask __read_mostly;
+
/*
* Keep track of assigned, disabled and rejected CPUs. Present assigned
* with 1 as CPU #0 is reserved for the boot CPU.
@@ -75,15 +78,11 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
return phys_id == (u64)cpuid_to_apicid[cpu];
}
-#ifdef CONFIG_SMP
static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid)
{
if (!(apicid & (__max_threads_per_core - 1)))
cpumask_set_cpu(cpu, &__cpu_primary_thread_mask);
}
-#else
-static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
-#endif
/*
* Convert the APIC ID to a domain level ID by masking out the low bits
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 33e166f6ab12..7804175d2d87 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -103,9 +103,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map);
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
EXPORT_PER_CPU_SYMBOL(cpu_die_map);
-/* CPUs which are the primary SMT threads */
-struct cpumask __cpu_primary_thread_mask __read_mostly;
-
/* Representing CPUs for which sibling maps can be computed */
static cpumask_var_t cpu_sibling_setup_mask;
cpu_primary_thread_mask is only defined when CONFIG_SMP=y. However, even
in UP kernels there is always exactly one CPU, which can reasonably be
treated as the primary thread.
Historically, topology_is_primary_thread() always returned true with
CONFIG_SMP=n. A recent commit:
4b455f59945aa ("cpu/SMT: Provide a default topology_is_primary_thread()")
replaced it with a generic implementation with the note:
"When disabling SMT, the primary thread of the SMT will remain
enabled/active. Architectures that have a special primary thread (e.g.
x86) need to override this function. ..."
For consistency and clarity, make the primary thread mask available
regardless of SMP, similar to cpu_possible_mask and cpu_present_mask.
Move __cpu_primary_thread_mask into common code to prevent build issues.
Let cpu_mark_primary_thread() configure the mask even for UP kernels,
alongside other masks. Then, topology_is_primary_thread() can
consistently reference it.
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
---
This improvement was identified from feedback on a feature-enabling
series [*], where a user of this mask is introduced. It is posted here as
a standalone patch for clarity and self-containment. The next revision of
that series will depend on this.
[*] https://lore.kernel.org/lkml/20250813172649.15474-1-chang.seok.bae@intel.com/
---
arch/x86/include/asm/topology.h | 12 ++++++------
arch/x86/kernel/cpu/topology.c | 4 ----
arch/x86/kernel/cpu/topology_common.c | 3 +++
arch/x86/kernel/smpboot.c | 3 ---
4 files changed, 9 insertions(+), 13 deletions(-)
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 6c79ee7c0957..281252af6e9d 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -218,6 +218,12 @@ static inline unsigned int topology_amd_nodes_per_pkg(void)
return __amd_nodes_per_pkg;
}
+#else /* CONFIG_SMP */
+static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
+static inline int topology_max_smt_threads(void) { return 1; }
+static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
+#endif /* !CONFIG_SMP */
+
extern struct cpumask __cpu_primary_thread_mask;
#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
@@ -231,12 +237,6 @@ static inline bool topology_is_primary_thread(unsigned int cpu)
}
#define topology_is_primary_thread topology_is_primary_thread
-#else /* CONFIG_SMP */
-static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
-static inline int topology_max_smt_threads(void) { return 1; }
-static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
-#endif /* !CONFIG_SMP */
-
static inline void arch_fix_phys_package_id(int num, u32 slot)
{
}
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index dcf05c64dd82..6a76caf813d6 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -75,15 +75,11 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
return phys_id == (u64)cpuid_to_apicid[cpu];
}
-#ifdef CONFIG_SMP
static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid)
{
if (!(apicid & (__max_threads_per_core - 1)))
cpumask_set_cpu(cpu, &__cpu_primary_thread_mask);
}
-#else
-static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
-#endif
/*
* Convert the APIC ID to a domain level ID by masking out the low bits
diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c
index b5a5e1411469..71625795d711 100644
--- a/arch/x86/kernel/cpu/topology_common.c
+++ b/arch/x86/kernel/cpu/topology_common.c
@@ -16,6 +16,9 @@ EXPORT_SYMBOL_GPL(x86_topo_system);
unsigned int __amd_nodes_per_pkg __ro_after_init;
EXPORT_SYMBOL_GPL(__amd_nodes_per_pkg);
+/* CPUs which are the primary SMT threads */
+struct cpumask __cpu_primary_thread_mask __read_mostly;
+
void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom,
unsigned int shift, unsigned int ncpus)
{
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 33e166f6ab12..7804175d2d87 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -103,9 +103,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map);
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
EXPORT_PER_CPU_SYMBOL(cpu_die_map);
-/* CPUs which are the primary SMT threads */
-struct cpumask __cpu_primary_thread_mask __read_mostly;
-
/* Representing CPUs for which sibling maps can be computed */
static cpumask_var_t cpu_sibling_setup_mask;
--
2.48.1
On 8/22/2025 3:39 PM, Chang S. Bae wrote: > ... The next revision of > that series will depend on this. Fold into the V5 posting now: https://lore.kernel.org/lkml/20250823155214.17465-2-chang.seok.bae@intel.com/ Thanks, Chang
When microcode staging is initiated, operations are carried out through
an MMIO interface. Each package has a unique interface specified by the
IA32_MCU_STAGING_MBOX_ADDR MSR, which maps to a set of 32-bit registers.
Prepare staging with the following steps:
1. Ensure the microcode image is 32-bit aligned to match the MMIO
register size.
2. Identify each MMIO interface based on its per-package scope.
3. Invoke the staging function for each identified interface, which
will be implemented separately.
Also, define cpu_primary_thread_mask for the CONFIG_SMP=n case, allowing
consistent use when narrowing down primary threads to locate the
per-package interface.
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Tested-by: Anselm Busse <abusse@amazon.de>
Link: https://lore.kernel.org/all/871pznq229.ffs@tglx
---
V4 -> V4a:
* Rebase on the primary thread cpumask fix (Dave)
* Clean up the rev print code (Dave)
* rdmsrl_on_cpu() -> rdmsrq_on_cpu (Chao)
---
arch/x86/include/asm/msr-index.h | 2 ++
arch/x86/kernel/cpu/microcode/intel.c | 49 +++++++++++++++++++++++++++
2 files changed, 51 insertions(+)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b65c3ba5fa14..0356155f9264 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -913,6 +913,8 @@
#define MSR_IA32_UCODE_WRITE 0x00000079
#define MSR_IA32_UCODE_REV 0x0000008b
+#define MSR_IA32_MCU_STAGING_MBOX_ADDR 0x000007a5
+
/* Intel SGX Launch Enclave Public Key Hash MSRs */
#define MSR_IA32_SGXLEPUBKEYHASH0 0x0000008C
#define MSR_IA32_SGXLEPUBKEYHASH1 0x0000008D
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 371ca6eac00e..d309fb1f058f 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -299,6 +299,54 @@ static __init struct microcode_intel *scan_microcode(void *data, size_t size,
return size ? NULL : patch;
}
+/*
+ * Handle the staging process using the mailbox MMIO interface.
+ * Return the result state.
+ */
+static enum ucode_state do_stage(u64 mmio_pa)
+{
+ pr_debug_once("Staging implementation is pending.\n");
+ return UCODE_ERROR;
+}
+
+static void stage_microcode(void)
+{
+ unsigned int pkg_id = UINT_MAX;
+ enum ucode_state ret;
+ int cpu, err;
+ u64 mmio_pa;
+
+ if (!IS_ALIGNED(get_totalsize(&ucode_patch_late->hdr), sizeof(u32)))
+ return;
+
+ lockdep_assert_cpus_held();
+
+ /*
+ * The MMIO address is unique per package, and all the SMT
+ * primary threads are online here. Find each MMIO space by
+ * their package ids to avoid duplicate staging.
+ */
+ for_each_cpu(cpu, cpu_primary_thread_mask) {
+ if (topology_logical_package_id(cpu) == pkg_id)
+ continue;
+ pkg_id = topology_logical_package_id(cpu);
+
+ err = rdmsrq_on_cpu(cpu, MSR_IA32_MCU_STAGING_MBOX_ADDR, &mmio_pa);
+ if (WARN_ON_ONCE(err))
+ return;
+
+ ret = do_stage(mmio_pa);
+ if (ret != UCODE_OK) {
+ pr_err("Error: staging failed with %s for CPU%d at package %u.\n",
+ ret == UCODE_TIMEOUT ? "timeout" : "error state",
+ cpu, pkg_id);
+ return;
+ }
+ }
+
+ pr_info("Staging of patch revision 0x%x succeeded.\n", ucode_patch_late->hdr.rev);
+}
+
static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci,
struct microcode_intel *mc,
u32 *cur_rev)
@@ -627,6 +675,7 @@ static struct microcode_ops microcode_intel_ops = {
.collect_cpu_info = collect_cpu_info,
.apply_microcode = apply_microcode_late,
.finalize_late_load = finalize_late_load,
+ .stage_microcode = stage_microcode,
.use_nmi = IS_ENABLED(CONFIG_X86_64),
};
--
2.48.1
© 2016 - 2026 Red Hat, Inc.