When microcode staging is initiated, operations are carried out through
an MMIO interface. Each package has a unique interface specified by the
IA32_MCU_STAGING_MBOX_ADDR MSR, which maps to a set of 32-bit registers.
Prepare staging with the following steps:
1. Ensure the microcode image is 32-bit aligned to match the MMIO
register size.
2. Identify each MMIO interface based on its per-package scope.
3. Invoke the staging function for each identified interface, which
will be implemented separately.
Also, define cpu_primary_thread_mask for the CONFIG_SMP=n case, allowing
consistent use when narrowing down primary threads to locate the
per-package interface.
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Tested-by: Anselm Busse <abusse@amazon.de>
Link: https://lore.kernel.org/all/871pznq229.ffs@tglx
---
V2 -> V3:
* Remove a global variable and adjust stage_microcode() (Dave).
* Simplify for_each_cpu() loop control code
* Handle rdmsrl_on_cpu() return code explicitly (Chao)
V1 -> V2:
* Adjust to reference the staging_state struct.
* Add lockdep_assert_cpus_held() (Boris)
---
arch/x86/include/asm/msr-index.h | 2 ++
arch/x86/include/asm/topology.h | 1 +
arch/x86/kernel/cpu/microcode/intel.c | 50 +++++++++++++++++++++++++++
3 files changed, 53 insertions(+)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b65c3ba5fa14..0356155f9264 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -913,6 +913,8 @@
#define MSR_IA32_UCODE_WRITE 0x00000079
#define MSR_IA32_UCODE_REV 0x0000008b
+#define MSR_IA32_MCU_STAGING_MBOX_ADDR 0x000007a5
+
/* Intel SGX Launch Enclave Public Key Hash MSRs */
#define MSR_IA32_SGXLEPUBKEYHASH0 0x0000008C
#define MSR_IA32_SGXLEPUBKEYHASH1 0x0000008D
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 6c79ee7c0957..91b5fc44ca62 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -235,6 +235,7 @@ static inline bool topology_is_primary_thread(unsigned int cpu)
static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
static inline int topology_max_smt_threads(void) { return 1; }
static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
+#define cpu_primary_thread_mask cpu_none_mask
#endif /* !CONFIG_SMP */
static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 371ca6eac00e..468c4d3d5d66 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -299,6 +299,55 @@ static __init struct microcode_intel *scan_microcode(void *data, size_t size,
return size ? NULL : patch;
}
+/*
+ * Handle the staging process using the mailbox MMIO interface.
+ * Return the result state.
+ */
+static enum ucode_state do_stage(u64 mmio_pa)
+{
+ pr_debug_once("Staging implementation is pending.\n");
+ return UCODE_ERROR;
+}
+
+static void stage_microcode(void)
+{
+ unsigned int pkg_id = UINT_MAX;
+ enum ucode_state ret;
+ int cpu, err;
+ u64 mmio_pa;
+
+ if (!IS_ALIGNED(get_totalsize(&ucode_patch_late->hdr), sizeof(u32)))
+ return;
+
+ lockdep_assert_cpus_held();
+
+ /*
+ * The MMIO address is unique per package, and all the SMT
+ * primary threads are online here. Find each MMIO space by
+ * their package ids to avoid duplicate staging.
+ */
+ for_each_cpu(cpu, cpu_primary_thread_mask) {
+ if (topology_logical_package_id(cpu) == pkg_id)
+ continue;
+ pkg_id = topology_logical_package_id(cpu);
+
+ err = rdmsrl_on_cpu(cpu, MSR_IA32_MCU_STAGING_MBOX_ADDR, &mmio_pa);
+ if (WARN_ON_ONCE(err))
+ return;
+
+ ret = do_stage(mmio_pa);
+ if (ret != UCODE_OK) {
+ pr_err("Error: staging failed with %s for CPU%d at package %u.\n",
+ ret == UCODE_TIMEOUT ? "timeout" : "error state",
+ cpu, pkg_id);
+ return;
+ }
+ }
+
+ pr_info("Staging of patch revision 0x%x succeeded.\n",
+ ((struct microcode_header_intel *)ucode_patch_late)->rev);
+}
+
static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci,
struct microcode_intel *mc,
u32 *cur_rev)
@@ -627,6 +676,7 @@ static struct microcode_ops microcode_intel_ops = {
.collect_cpu_info = collect_cpu_info,
.apply_microcode = apply_microcode_late,
.finalize_late_load = finalize_late_load,
+ .stage_microcode = stage_microcode,
.use_nmi = IS_ENABLED(CONFIG_X86_64),
};
--
2.48.1
On 8/13/25 10:26, Chang S. Bae wrote: > When microcode staging is initiated, operations are carried out through > an MMIO interface. Each package has a unique interface specified by the > IA32_MCU_STAGING_MBOX_ADDR MSR, which maps to a set of 32-bit registers. > > Prepare staging with the following steps: > > 1. Ensure the microcode image is 32-bit aligned to match the MMIO > register size. > > 2. Identify each MMIO interface based on its per-package scope. > > 3. Invoke the staging function for each identified interface, which > will be implemented separately. > > Also, define cpu_primary_thread_mask for the CONFIG_SMP=n case, allowing > consistent use when narrowing down primary threads to locate the > per-package interface. ... > static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; } > +#define cpu_primary_thread_mask cpu_none_mask > #endif /* !CONFIG_SMP */ Isn't 'cpu_none_mask' a mask containing no CPUs? How can that possible work here: for_each_cpu(cpu, cpu_primary_thread_mask) { ? Wouldn't it just not run through the for loop at all on CONFIG_SMP=n? Is that what we want for some reason? I would have thought that we'd still want to find the MMIO address for CPU 0, the one and only CPU. > static inline void arch_fix_phys_package_id(int num, u32 slot) > diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c > index 371ca6eac00e..468c4d3d5d66 100644 > --- a/arch/x86/kernel/cpu/microcode/intel.c > +++ b/arch/x86/kernel/cpu/microcode/intel.c > @@ -299,6 +299,55 @@ static __init struct microcode_intel *scan_microcode(void *data, size_t size, > return size ? NULL : patch; > } > > +/* > + * Handle the staging process using the mailbox MMIO interface. > + * Return the result state. > + */ > +static enum ucode_state do_stage(u64 mmio_pa) > +{ > + pr_debug_once("Staging implementation is pending.\n"); > + return UCODE_ERROR; > +} > + > +static void stage_microcode(void) > +{ > + unsigned int pkg_id = UINT_MAX; > + enum ucode_state ret; > + int cpu, err; > + u64 mmio_pa; > + > + if (!IS_ALIGNED(get_totalsize(&ucode_patch_late->hdr), sizeof(u32))) > + return; > + > + lockdep_assert_cpus_held(); > + > + /* > + * The MMIO address is unique per package, and all the SMT > + * primary threads are online here. Find each MMIO space by > + * their package ids to avoid duplicate staging. > + */ > + for_each_cpu(cpu, cpu_primary_thread_mask) { > + if (topology_logical_package_id(cpu) == pkg_id) > + continue; > + pkg_id = topology_logical_package_id(cpu); > + > + err = rdmsrl_on_cpu(cpu, MSR_IA32_MCU_STAGING_MBOX_ADDR, &mmio_pa); > + if (WARN_ON_ONCE(err)) > + return; > + > + ret = do_stage(mmio_pa); > + if (ret != UCODE_OK) { > + pr_err("Error: staging failed with %s for CPU%d at package %u.\n", > + ret == UCODE_TIMEOUT ? "timeout" : "error state", > + cpu, pkg_id); > + return; > + } > + } > + > + pr_info("Staging of patch revision 0x%x succeeded.\n", > + ((struct microcode_header_intel *)ucode_patch_late)->rev); > +} Hmmm. Consider: static struct microcode_intel *ucode_patch_late __read_mostly; and: struct microcode_intel { struct microcode_header_intel hdr; unsigned int bits[]; }; So isn't this whole ugly cast thing equivalent to: ucode_patch_late->hdr.rev ?
On 8/13/2025 11:21 AM, Dave Hansen wrote: > On 8/13/25 10:26, Chang S. Bae wrote: >> >> Also, define cpu_primary_thread_mask for the CONFIG_SMP=n case, allowing >> consistent use when narrowing down primary threads to locate the >> per-package interface. > ... >> static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; } >> +#define cpu_primary_thread_mask cpu_none_mask >> #endif /* !CONFIG_SMP */ > > Isn't 'cpu_none_mask' a mask containing no CPUs? How can that possible > work here: > > for_each_cpu(cpu, cpu_primary_thread_mask) { > > ? Wouldn't it just not run through the for loop at all on CONFIG_SMP=n? > Is that what we want for some reason? I would have thought that we'd > still want to find the MMIO address for CPU 0, the one and only CPU. Yeah, right. Then, looking at it again, I see this: config MICROCODE_LATE_LOADING bool "Late microcode loading (DANGEROUS)" default n depends on MICROCODE && SMP This optimization only applies to the late-loading path. But, today I also had to clarify this dependency for myself. At least, my changelog could've made it clearer, sorry. >> + >> + pr_info("Staging of patch revision 0x%x succeeded.\n", >> + ((struct microcode_header_intel *)ucode_patch_late)->rev); >> +} > Hmmm. Consider: > > static struct microcode_intel *ucode_patch_late __read_mostly; > > and: > > struct microcode_intel { > struct microcode_header_intel hdr; > unsigned int bits[]; > }; > > So isn't this whole ugly cast thing equivalent to: > > ucode_patch_late->hdr.rev > > ? Indeed. I must have been blind to that bit of ugliness. Thanks for spotting on it! Chang
On 8/13/25 13:46, Chang S. Bae wrote: >> Isn't 'cpu_none_mask' a mask containing no CPUs? How can that possible >> work here: >> >> for_each_cpu(cpu, cpu_primary_thread_mask) { >> >> ? Wouldn't it just not run through the for loop at all on CONFIG_SMP=n? >> Is that what we want for some reason? I would have thought that we'd >> still want to find the MMIO address for CPU 0, the one and only CPU. > > Yeah, right. > > Then, looking at it again, I see this: > > config MICROCODE_LATE_LOADING > bool "Late microcode loading (DANGEROUS)" > default n > depends on MICROCODE && SMP > > This optimization only applies to the late-loading path. But, today I > also had to clarify this dependency for myself. At least, my changelog > could've made it clearer, sorry. I'm not following. I _think_ you're trying to say that it's a "no harm, no foul" situation because this user of 'cpu_primary_thread_mask' won't even get compiled in the buggy !SMP case. But that's not the problem. The issue is that this line of code: #define cpu_primary_thread_mask cpu_none_mask reads as 100% bogus to me. Even on !SMP kernels, 'cpu_primary_thread_mask' should have one CPU in it. Right? The _one_ thread that's present is a primary thread. If this were a mask for secondary threads, 'cpu_none_mask' would make sense. But it's not. So could we please make use 'cpu_primary_thread_mask' is getting defined correctly whether it's really getting compiled into the end image or not?
On 8/13/2025 1:55 PM, Dave Hansen wrote: > > But that's not the problem. The issue is that this line of code: > > #define cpu_primary_thread_mask cpu_none_mask With CONFIG_SMP=n, on the core side (include/linux/cpu_smt.h), the code clarifies there is no SMT: # define cpu_smt_control (CPU_SMT_NOT_IMPLEMENTED) This leads kernel/cpu.c to return an empty mask: static inline const struct cpumask *cpuhp_get_primary_thread_mask(void) { return cpu_none_mask; } On the x86 side, the definition is explicit that “primary threads” are SMT threads (arch/x86/kernel/smpboot.c): /* CPUs which are the primary SMT threads */ struct cpumask __cpu_primary_thread_mask __read_mostly; And via ifdeffery, this mask is only available to SMP kernels. So it seems I had been subscribing this model -- no primary threads without SMP. > reads as 100% bogus to me. Even on !SMP kernels, > 'cpu_primary_thread_mask' should have one CPU in it. Right? The _one_ > thread that's present is a primary thread. If this were a mask for > secondary threads, 'cpu_none_mask' would make sense. But it's not. Your confidence made me take another look. Digging into the history, I found that x86 used to have this in the !SMP case: static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } That stayed until the recent commit 4b455f59945aa ("cpu/SMT: Provide a default topology_is_primary_thread()"), which now defines it in include/linux/topology.h with this telling comment: /* * When disabling SMT, the primary thread of the SMT will remain * enabled/active. Architectures that have a special primary thread * (e.g. x86) need to override this function. ... */ This comment basically supports your point. > So could we please make use 'cpu_primary_thread_mask' is getting defined > correctly whether it's really getting compiled into the end image or not? Given that, I’m thinking of simplifying the x86 side a bit -- by making the primary thread mask configured and available regardless of CONFIG_SMP, matching the behavior of other cpumasks. And its relevant helpers are also available, like in the attached diff. I think the change still aligns x86 with the core code -- especially with the note in topology_is_primary_thread(). With that, the user may be introduced here. diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 6c79ee7c0957..281252af6e9d 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -218,6 +218,12 @@ static inline unsigned int topology_amd_nodes_per_pkg(void) return __amd_nodes_per_pkg; } +#else /* CONFIG_SMP */ +static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } +static inline int topology_max_smt_threads(void) { return 1; } +static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; } +#endif /* !CONFIG_SMP */ + extern struct cpumask __cpu_primary_thread_mask; #define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask) @@ -231,12 +237,6 @@ static inline bool topology_is_primary_thread(unsigned int cpu) } #define topology_is_primary_thread topology_is_primary_thread -#else /* CONFIG_SMP */ -static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } -static inline int topology_max_smt_threads(void) { return 1; } -static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; } -#endif /* !CONFIG_SMP */ - static inline void arch_fix_phys_package_id(int num, u32 slot) { } diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index e35ccdc84910..946004d7dd1d 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -52,6 +52,9 @@ u32 cpuid_to_apicid[] __ro_after_init = { [0 ... NR_CPUS - 1] = BAD_APICID, }; /* Bitmaps to mark registered APICs at each topology domain */ static struct { DECLARE_BITMAP(map, MAX_LOCAL_APIC); } apic_maps[TOPO_MAX_DOMAIN] __ro_after_init; +/* CPUs which are the primary SMT threads */ +struct cpumask __cpu_primary_thread_mask __read_mostly; + /* * Keep track of assigned, disabled and rejected CPUs. Present assigned * with 1 as CPU #0 is reserved for the boot CPU. @@ -75,15 +78,11 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id) return phys_id == (u64)cpuid_to_apicid[cpu]; } -#ifdef CONFIG_SMP static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { if (!(apicid & (__max_threads_per_core - 1))) cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); } -#else -static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } -#endif /* * Convert the APIC ID to a domain level ID by masking out the low bits diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 33e166f6ab12..7804175d2d87 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -103,9 +103,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map); DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); EXPORT_PER_CPU_SYMBOL(cpu_die_map); -/* CPUs which are the primary SMT threads */ -struct cpumask __cpu_primary_thread_mask __read_mostly; - /* Representing CPUs for which sibling maps can be computed */ static cpumask_var_t cpu_sibling_setup_mask;
cpu_primary_thread_mask is only defined when CONFIG_SMP=y. However, even
in UP kernels there is always exactly one CPU, which can reasonably be
treated as the primary thread.
Historically, topology_is_primary_thread() always returned true with
CONFIG_SMP=n. A recent commit:
4b455f59945aa ("cpu/SMT: Provide a default topology_is_primary_thread()")
replaced it with a generic implementation with the note:
"When disabling SMT, the primary thread of the SMT will remain
enabled/active. Architectures that have a special primary thread (e.g.
x86) need to override this function. ..."
For consistency and clarity, make the primary thread mask available
regardless of SMP, similar to cpu_possible_mask and cpu_present_mask.
Move __cpu_primary_thread_mask into common code to prevent build issues.
Let cpu_mark_primary_thread() configure the mask even for UP kernels,
alongside other masks. Then, topology_is_primary_thread() can
consistently reference it.
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
---
This improvement was identified from feedback on a feature-enabling
series [*], where a user of this mask is introduced. It is posted here as
a standalone patch for clarity and self-containment. The next revision of
that series will depend on this.
[*] https://lore.kernel.org/lkml/20250813172649.15474-1-chang.seok.bae@intel.com/
---
arch/x86/include/asm/topology.h | 12 ++++++------
arch/x86/kernel/cpu/topology.c | 4 ----
arch/x86/kernel/cpu/topology_common.c | 3 +++
arch/x86/kernel/smpboot.c | 3 ---
4 files changed, 9 insertions(+), 13 deletions(-)
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 6c79ee7c0957..281252af6e9d 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -218,6 +218,12 @@ static inline unsigned int topology_amd_nodes_per_pkg(void)
return __amd_nodes_per_pkg;
}
+#else /* CONFIG_SMP */
+static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
+static inline int topology_max_smt_threads(void) { return 1; }
+static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
+#endif /* !CONFIG_SMP */
+
extern struct cpumask __cpu_primary_thread_mask;
#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
@@ -231,12 +237,6 @@ static inline bool topology_is_primary_thread(unsigned int cpu)
}
#define topology_is_primary_thread topology_is_primary_thread
-#else /* CONFIG_SMP */
-static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
-static inline int topology_max_smt_threads(void) { return 1; }
-static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
-#endif /* !CONFIG_SMP */
-
static inline void arch_fix_phys_package_id(int num, u32 slot)
{
}
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index dcf05c64dd82..6a76caf813d6 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -75,15 +75,11 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
return phys_id == (u64)cpuid_to_apicid[cpu];
}
-#ifdef CONFIG_SMP
static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid)
{
if (!(apicid & (__max_threads_per_core - 1)))
cpumask_set_cpu(cpu, &__cpu_primary_thread_mask);
}
-#else
-static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
-#endif
/*
* Convert the APIC ID to a domain level ID by masking out the low bits
diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c
index b5a5e1411469..71625795d711 100644
--- a/arch/x86/kernel/cpu/topology_common.c
+++ b/arch/x86/kernel/cpu/topology_common.c
@@ -16,6 +16,9 @@ EXPORT_SYMBOL_GPL(x86_topo_system);
unsigned int __amd_nodes_per_pkg __ro_after_init;
EXPORT_SYMBOL_GPL(__amd_nodes_per_pkg);
+/* CPUs which are the primary SMT threads */
+struct cpumask __cpu_primary_thread_mask __read_mostly;
+
void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom,
unsigned int shift, unsigned int ncpus)
{
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 33e166f6ab12..7804175d2d87 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -103,9 +103,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map);
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
EXPORT_PER_CPU_SYMBOL(cpu_die_map);
-/* CPUs which are the primary SMT threads */
-struct cpumask __cpu_primary_thread_mask __read_mostly;
-
/* Representing CPUs for which sibling maps can be computed */
static cpumask_var_t cpu_sibling_setup_mask;
--
2.48.1
On 8/22/2025 3:39 PM, Chang S. Bae wrote: > ... The next revision of > that series will depend on this. Fold into the V5 posting now: https://lore.kernel.org/lkml/20250823155214.17465-2-chang.seok.bae@intel.com/ Thanks, Chang
When microcode staging is initiated, operations are carried out through
an MMIO interface. Each package has a unique interface specified by the
IA32_MCU_STAGING_MBOX_ADDR MSR, which maps to a set of 32-bit registers.
Prepare staging with the following steps:
1. Ensure the microcode image is 32-bit aligned to match the MMIO
register size.
2. Identify each MMIO interface based on its per-package scope.
3. Invoke the staging function for each identified interface, which
will be implemented separately.
Also, define cpu_primary_thread_mask for the CONFIG_SMP=n case, allowing
consistent use when narrowing down primary threads to locate the
per-package interface.
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Tested-by: Anselm Busse <abusse@amazon.de>
Link: https://lore.kernel.org/all/871pznq229.ffs@tglx
---
V4 -> V4a:
* Rebase on the primary thread cpumask fix (Dave)
* Clean up the rev print code (Dave)
* rdmsrl_on_cpu() -> rdmsrq_on_cpu (Chao)
---
arch/x86/include/asm/msr-index.h | 2 ++
arch/x86/kernel/cpu/microcode/intel.c | 49 +++++++++++++++++++++++++++
2 files changed, 51 insertions(+)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b65c3ba5fa14..0356155f9264 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -913,6 +913,8 @@
#define MSR_IA32_UCODE_WRITE 0x00000079
#define MSR_IA32_UCODE_REV 0x0000008b
+#define MSR_IA32_MCU_STAGING_MBOX_ADDR 0x000007a5
+
/* Intel SGX Launch Enclave Public Key Hash MSRs */
#define MSR_IA32_SGXLEPUBKEYHASH0 0x0000008C
#define MSR_IA32_SGXLEPUBKEYHASH1 0x0000008D
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 371ca6eac00e..d309fb1f058f 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -299,6 +299,54 @@ static __init struct microcode_intel *scan_microcode(void *data, size_t size,
return size ? NULL : patch;
}
+/*
+ * Handle the staging process using the mailbox MMIO interface.
+ * Return the result state.
+ */
+static enum ucode_state do_stage(u64 mmio_pa)
+{
+ pr_debug_once("Staging implementation is pending.\n");
+ return UCODE_ERROR;
+}
+
+static void stage_microcode(void)
+{
+ unsigned int pkg_id = UINT_MAX;
+ enum ucode_state ret;
+ int cpu, err;
+ u64 mmio_pa;
+
+ if (!IS_ALIGNED(get_totalsize(&ucode_patch_late->hdr), sizeof(u32)))
+ return;
+
+ lockdep_assert_cpus_held();
+
+ /*
+ * The MMIO address is unique per package, and all the SMT
+ * primary threads are online here. Find each MMIO space by
+ * their package ids to avoid duplicate staging.
+ */
+ for_each_cpu(cpu, cpu_primary_thread_mask) {
+ if (topology_logical_package_id(cpu) == pkg_id)
+ continue;
+ pkg_id = topology_logical_package_id(cpu);
+
+ err = rdmsrq_on_cpu(cpu, MSR_IA32_MCU_STAGING_MBOX_ADDR, &mmio_pa);
+ if (WARN_ON_ONCE(err))
+ return;
+
+ ret = do_stage(mmio_pa);
+ if (ret != UCODE_OK) {
+ pr_err("Error: staging failed with %s for CPU%d at package %u.\n",
+ ret == UCODE_TIMEOUT ? "timeout" : "error state",
+ cpu, pkg_id);
+ return;
+ }
+ }
+
+ pr_info("Staging of patch revision 0x%x succeeded.\n", ucode_patch_late->hdr.rev);
+}
+
static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci,
struct microcode_intel *mc,
u32 *cur_rev)
@@ -627,6 +675,7 @@ static struct microcode_ops microcode_intel_ops = {
.collect_cpu_info = collect_cpu_info,
.apply_microcode = apply_microcode_late,
.finalize_late_load = finalize_late_load,
+ .stage_microcode = stage_microcode,
.use_nmi = IS_ENABLED(CONFIG_X86_64),
};
--
2.48.1
© 2016 - 2025 Red Hat, Inc.