Intel microcode adds some meta-data to report a minimum required revision
before this new microcode can be safely late loaded. There are no generic
mechanism to declare support for all vendors.
Add generic support to microcode core to declare such support, this allows
late-loading to be permitted in those architectures that report support
for safe late loading.
Late loading has added support for
- New images declaring a required minimum base version before a late-load
is performed.
Tainting only happens on architectures that don't support minimum required
version reporting.
Add a new variable in microcode_ops to allow an architecture to declare
support for safe microcode late loading.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: x86 <x86@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Alison Schofield <alison.schofield@intel.com>
Cc: Reinette Chatre <reinette.chatre@intel.com>
Cc: Thomas Gleixner (Intel) <tglx@linutronix.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Stefan Talpalaru <stefantalpalaru@yahoo.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Peter Zilstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Andrew Cooper <Andrew.Cooper3@citrix.com>
---
arch/x86/include/asm/microcode.h | 2 ++
arch/x86/kernel/cpu/microcode/core.c | 25 ++++++++++++++++++++-----
arch/x86/kernel/cpu/microcode/intel.c | 1 +
arch/x86/Kconfig | 7 ++++---
4 files changed, 27 insertions(+), 8 deletions(-)
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index d5a58bde091c..3d48143e84a9 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -33,6 +33,8 @@ enum ucode_state {
};
struct microcode_ops {
+ bool safe_late_load;
+
enum ucode_state (*request_microcode_fw) (int cpu, struct device *);
void (*microcode_fini_cpu) (int cpu);
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index c361882baf63..446ddf3fcc29 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -472,6 +472,7 @@ static ssize_t reload_store(struct device *dev,
enum ucode_state tmp_ret = UCODE_OK;
int bsp = boot_cpu_data.cpu_index;
unsigned long val;
+ bool safe_late_load = false;
ssize_t ret = 0;
ret = kstrtoul(buf, 0, &val);
@@ -487,13 +488,22 @@ static ssize_t reload_store(struct device *dev,
if (ret)
goto put;
+ safe_late_load = microcode_ops->safe_late_load;
+
+ /*
+ * If safe loading indication isn't present, bail out.
+ */
+ if (!safe_late_load) {
+ pr_err("Attempting late microcode loading - it is dangerous and taints the kernel.\n");
+ pr_err("You should switch to early loading, if possible.\n");
+ ret = -EINVAL;
+ goto put;
+ }
+
tmp_ret = microcode_ops->request_microcode_fw(bsp, µcode_pdev->dev);
if (tmp_ret != UCODE_NEW)
goto put;
- pr_err("Attempting late microcode loading - it is dangerous and taints the kernel.\n");
- pr_err("You should switch to early loading, if possible.\n");
-
mutex_lock(µcode_mutex);
ret = microcode_reload_late();
mutex_unlock(µcode_mutex);
@@ -501,11 +511,16 @@ static ssize_t reload_store(struct device *dev,
put:
cpus_read_unlock();
+ /*
+ * Only taint if a successful load and vendor doesn't support
+ * safe_late_load
+ */
+ if (!(ret && safe_late_load))
+ add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+
if (ret == 0)
ret = size;
- add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
-
return ret;
}
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 6046f90a47b2..eba4f463ef1c 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -806,6 +806,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device)
}
static struct microcode_ops microcode_intel_ops = {
+ .safe_late_load = true,
.request_microcode_fw = request_microcode_fw,
.collect_cpu_info = collect_cpu_info,
.apply_microcode = apply_microcode_intel,
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3604074a878b..ddc4130e6f8c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1352,15 +1352,16 @@ config MICROCODE_AMD
processors will be enabled.
config MICROCODE_LATE_LOADING
- bool "Late microcode loading (DANGEROUS)"
- default n
+ bool "Late microcode loading"
+ default y
depends on MICROCODE
help
Loading microcode late, when the system is up and executing instructions
is a tricky business and should be avoided if possible. Just the sequence
of synchronizing all cores and SMT threads is one fragile dance which does
not guarantee that cores might not softlock after the loading. Therefore,
- use this at your own risk. Late loading taints the kernel too.
+ use this at your own risk. Late loading taints the kernel, if it
+ doesn't support a minimum required base version before an update.
config X86_MSR
tristate "/dev/cpu/*/msr - Model-specific register support"
--
2.34.1
Ashok! On Fri, Jan 13 2023 at 09:29, Ashok Raj wrote: > Intel microcode adds some meta-data to report a minimum required revision > before this new microcode can be safely late loaded. There are no generic s/this new microcode/a new microcode revision/ Changelogs are not restricted by twitter posting rules. > mechanism to declare support for all vendors. > > Add generic support to microcode core to declare such support, this allows > late-loading to be permitted in those architectures that report support > for safe late loading. > > Late loading has added support for > > - New images declaring a required minimum base version before a late-load > is performed. > > Tainting only happens on architectures that don't support minimum required > version reporting. > > Add a new variable in microcode_ops to allow an architecture to declare > support for safe microcode late loading. > @@ -487,13 +488,22 @@ static ssize_t reload_store(struct device *dev, > if (ret) > goto put; > > + safe_late_load = microcode_ops->safe_late_load; > + > + /* > + * If safe loading indication isn't present, bail out. > + */ > + if (!safe_late_load) { > + pr_err("Attempting late microcode loading - it is dangerous and taints the kernel.\n"); > + pr_err("You should switch to early loading, if possible.\n"); > + ret = -EINVAL; > + goto put; > + } > + > tmp_ret = microcode_ops->request_microcode_fw(bsp, µcode_pdev->dev); > if (tmp_ret != UCODE_NEW) > goto put; > > - pr_err("Attempting late microcode loading - it is dangerous and taints the kernel.\n"); > - pr_err("You should switch to early loading, if possible.\n"); > - Why are you not moving the pr_err()s right away (in 1/5) to the place where you move it now? > mutex_lock(µcode_mutex); > ret = microcode_reload_late(); > mutex_unlock(µcode_mutex); > @@ -501,11 +511,16 @@ static ssize_t reload_store(struct device *dev, > put: > cpus_read_unlock(); > > + /* > + * Only taint if a successful load and vendor doesn't support > + * safe_late_load > + */ > + if (!(ret && safe_late_load)) > + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); The resulting code is undecodable garbage. Whats worse is that the existing logic in this code is broken already. #1 ssize_t ret = 0; This 'ret = 0' assignment is pointless as ret is immediately overwritten by the next line: ret = kstrtoul(buf, 0, &val); if (ret) return ret; if (val != 1) return size; Now this is really useful. If the value is invalid, i.e. it causes the function to abort immediately it returns 'size' which means the write was successful. Oh well. Now lets look at a few lines further down: #2 ssize_t ret = 0; ... ret = check_online_cpus(); if (ret) goto put; ... put: ... add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); ... return ret; Why are we tainting the kernel when there was absolutely ZERO action done here? All what check_online_cpus() figured out was that not enough CPUs were online, right? That justfies a error return, but the taint is bogus, no? The next bogosity is: ssize_t ret = 0; ... tmp_ret = microcode_ops->request_microcode_fw(bsp, µcode_pdev->dev); if (tmp_ret != UCODE_NEW) goto put; ... put: ... add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); if (ret == 0) ret = size; return ret; IOW, the microcode request can fail for whatever reason and the return value is unconditionally 'size' which means the write to the sysfs file is successfull. #3 Not to talk about the completely broken error handling in the actual microcode loading case in __reload_late()::wait_for_siblings code path. Maybe more #... How does any of this make sense and allows sensible scripting of this interface? Surely you spent several orders of magnitude more time to stare at this code than I did during this review, no? Now instead of noticing and fixing any of this nonsense you are duct taping this whole safe_late_load handling into that mess to make it even more incomprehensible. If you expected an alternative patch here, then I have to disappoint you. I'm not presenting you the proper solution this time on a silver tablet because I'm in the process of taming my 'let me fix this for you' reflex to prepare for my retirement some years down the road. But you should have enough hints to fix all of this for real, right? Thanks, tglx
On Fri, Jan 20, 2023 at 01:15:04AM +0100, Thomas Gleixner wrote: > Ashok! I know I'm in trouble when it starts like this :-( > > On Fri, Jan 13 2023 at 09:29, Ashok Raj wrote: > > Intel microcode adds some meta-data to report a minimum required revision > > before this new microcode can be safely late loaded. There are no generic > > s/this new microcode/a new microcode revision/ > > Changelogs are not restricted by twitter posting rules. :-) > > > mechanism to declare support for all vendors. > > > > Add generic support to microcode core to declare such support, this allows > > late-loading to be permitted in those architectures that report support > > for safe late loading. > > > > Late loading has added support for > > > > - New images declaring a required minimum base version before a late-load > > is performed. > > > > Tainting only happens on architectures that don't support minimum required > > version reporting. > > > > Add a new variable in microcode_ops to allow an architecture to declare > > support for safe microcode late loading. > > @@ -487,13 +488,22 @@ static ssize_t reload_store(struct device *dev, > > if (ret) > > goto put; > > > > + safe_late_load = microcode_ops->safe_late_load; > > + > > + /* > > + * If safe loading indication isn't present, bail out. > > + */ > > + if (!safe_late_load) { > > + pr_err("Attempting late microcode loading - it is dangerous and taints the kernel.\n"); > > + pr_err("You should switch to early loading, if possible.\n"); > > + ret = -EINVAL; > > + goto put; > > + } > > + > > tmp_ret = microcode_ops->request_microcode_fw(bsp, µcode_pdev->dev); > > if (tmp_ret != UCODE_NEW) > > goto put; > > > > - pr_err("Attempting late microcode loading - it is dangerous and taints the kernel.\n"); > > - pr_err("You should switch to early loading, if possible.\n"); > > - > > Why are you not moving the pr_err()s right away (in 1/5) to the place > where you move it now? Could have, didn't occur then. But I can move them to the proper place in patch1. > > > mutex_lock(µcode_mutex); > > ret = microcode_reload_late(); > > mutex_unlock(µcode_mutex); > > @@ -501,11 +511,16 @@ static ssize_t reload_store(struct device *dev, > > put: > > cpus_read_unlock(); > > > > + /* > > + * Only taint if a successful load and vendor doesn't support > > + * safe_late_load > > + */ > > + if (!(ret && safe_late_load)) > > + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); > > The resulting code is undecodable garbage. Whats worse is that the > existing logic in this code is broken already. Yes, I agree, its hard to comprehend. I'll open it up a little to it makes sense. if successfully loaded, and !safe_late_load add_taint() > > #1 > ssize_t ret = 0; > > This 'ret = 0' assignment is pointless as ret is immediately overwritten > by the next line: This was existing code, but I can certainly remove the unneeded initialization. > > ret = kstrtoul(buf, 0, &val); > if (ret) > return ret; > > if (val != 1) > return size; > > Now this is really useful. If the value is invalid, i.e. it causes the > function to abort immediately it returns 'size' which means the write > was successful. Oh well. Yes, its a bit awkward. This is how its been forever. I wasn't sure if the purpose was values other than 1 don't throw error, so it could be used to accommodate some extended functionality say "echo X" in the future. I'm not suggesting such use :-), but thought that maybe the reason to not report error. If its acceptable to return like -EINVAL or something we could do that, so there is some error user can catch in user space. > > Now lets look at a few lines further down: > > #2 > > ssize_t ret = 0; > ... > ret = check_online_cpus(); > if (ret) > goto put; > ... > put: > ... > add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); > ... > return ret; > > Why are we tainting the kernel when there was absolutely ZERO action > done here? All what check_online_cpus() figured out was that not enough > CPUs were online, right? That justfies a error return, but the taint is > bogus, no? Agree! This was the code that was introduced in 5.19 when we turned off late-loading in the kernel. We try to fix it here, i.e only taint if the loading was successful and safe_late_load wasn't set. It should change after this patch? Or maybe you meant fix it to not taint always before doing this change? > > The next bogosity is: > > ssize_t ret = 0; > ... > tmp_ret = microcode_ops->request_microcode_fw(bsp, µcode_pdev->dev); > if (tmp_ret != UCODE_NEW) > goto put; > ... > put: > ... > add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); > > if (ret == 0) > ret = size; > > return ret; > > IOW, the microcode request can fail for whatever reason and the return > value is unconditionally 'size' which means the write to the sysfs file > is successfull. Loading can fail for some known reasons - No file found - File is either same or older rev than loaded Should we return proper codes? Certainly can, but since this has been around all this time, I'm worried someone who depends on this working this way will now see failures when it didn't in the past. > > #3 > > Not to talk about the completely broken error handling in the actual > microcode loading case in __reload_late()::wait_for_siblings code path. > > Maybe more #... I'll need to stare at it more than I have .. If its busted, its not popping out. It's a path that all CPUs go through for the exit rendezvous. We let the secondary also do an apply_microcode(), just to update the revision in the per-cpu structures. I could be wrong, but if we didn't update the per-cpu rev, /proc/cpuinfo was reporting the old values. I remember doing this way back in 2018 Spectre time. Guess a multiple choice might be useful :-).. I'll keep looking though! > > How does any of this make sense and allows sensible scripting of this > interface? > > Surely you spent several orders of magnitude more time to stare at this > code than I did during this review, no? Sadly yes! > > Now instead of noticing and fixing any of this nonsense you are duct > taping this whole safe_late_load handling into that mess to make it even > more incomprehensible. safe_late_loading didn't change any of the old algorithms for late-loading itself. All I used it was a mechanism to inform the core that the vendor supports some way to tell the minrev is comprehended. This doesn't change any of the code paths we take for late-load. When safe_late_load is supported, - we don't issue a warning, or taint the kernel. - Vendor provides a way to check if the new microcode has the proper meta-data and honor that. Did you have something more in mind? > > If you expected an alternative patch here, then I have to disappoint > you. Disappointed .. No.. I'm glad this is coming up after 4 years. The next Part3 that has the NMI handling sort of has something similar to hold HT siblings in NMI before the update completes in primary. Better now than late .. Thanks for all the direction. > > I'm not presenting you the proper solution this time on a silver tablet > because I'm in the process of taming my 'let me fix this for you' reflex > to prepare for my retirement some years down the road. > > But you should have enough hints to fix all of this for real, right? Yes, once i can spot all those holes :-) Cheers, Ashok
Hi Thomas and Boris, This mini series is in response to Thomas's feedback here[1]. I hope this addresses all/most of your concerns you raised in this thread. Sorry if I missed any, drop more hints and I'll fix them up. Patch3 needs an AMD change as well. I wasn't confident looking at the source that was returning the patchid vs reading fromt he real MSR. I'll check with Boris to confirm before I submit this again. [1] https://lore.kernel.org/lkml/87y1pygiyf.ffs@tglx/ Now that the other patches are now staged in tip/x86/microcode, this series applies on top of that. Cheers, Ashok Ashok Raj (4): x86/microcode: Taint kernel only if microcode loading was successful x86/microcode: Report invalid writes to reload sysfs file x86/microcode/intel: Fix collect_cpu_info() to reflect current microcode x86/microcode: Do not call apply_microde() on sibling threads arch/x86/kernel/cpu/microcode/core.c | 28 ++++++++++++++------------- arch/x86/kernel/cpu/microcode/intel.c | 10 +++++++++- 2 files changed, 24 insertions(+), 14 deletions(-) Cc: LKML <linux-kernel@vger.kernel.org> Cc: x86 <x86@kernel.org> Cc: Ingo Molnar <mingo@kernel.org> Cc: Tony Luck <tony.luck@intel.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Alison Schofield <alison.schofield@intel.com> Cc: Reinette Chatre <reinette.chatre@intel.com> Cc: Thomas Gleixner (Intel) <tglx@linutronix.de> Cc: Tom Lendacky <thomas.lendacky@amd.com> Cc: Stefan Talpalaru <stefantalpalaru@yahoo.com> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Rafael J. Wysocki <rafael@kernel.org> Cc: Peter Zilstra (Intel) <peterz@infradead.org> Cc: Andy Lutomirski <luto@kernel.org> Cc: Andrew Cooper <Andrew.Cooper3@citrix.com> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Martin Pohlack <mpohlack@amazon.de> -- 2.34.1
Hi Thomas, On Sat, Jan 21, 2023 at 01:35:08PM -0800, Ashok Raj wrote: > Hi Thomas and Boris, > > This mini series is in response to Thomas's feedback here[1]. > > I hope this addresses all/most of your concerns you raised in this thread. > > Sorry if I missed any, drop more hints and I'll fix them up. > > Patch3 needs an AMD change as well. I wasn't confident looking at the > source that was returning the patchid vs reading fromt he real MSR. I'll > check with Boris to confirm before I submit this again. > > [1] https://lore.kernel.org/lkml/87y1pygiyf.ffs@tglx/ I can send you a new series with these cleanups. If you think I left anything out please let me know. Boris: Do you have any feedback on this Part2 of the path series? I can wrap those as well with these that address Thomas's feedback if possible. Thanks, Ashok
On Thu, Jan 26, 2023 at 08:52:30AM -0800, Ashok Raj wrote: > Boris: Do you have any feedback on this Part2 of the path series? I can > wrap those as well with these that address Thomas's feedback if possible. I have no clue what I need to look at first - the part 2 or the cleanup? And what goes where. I think you should send a proper, tested(!), new revision of the whole set for review. Thx. -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
Microcode updates are applied at the core, and both threads of HT siblings
will notice the update.
During late-load, after the primary has updated the microcode, it also
reflects that in the per-cpu structure (cpuinfo_x86) holding the current
revision.
Since the sibling hasn't had a chance to update the per-cpu revision,
the current code calls apply_microcode() just as a way to verify and also
update the per-cpu revision number.
But in the odd case when primary returned with an error, the secondary will
try to perform a patchload and the primary has already been released to the
system. This could be problematic.
Replace apply_microcode() with a call to collect_cpu_info() and let that
call also update the per-cpu structure instead of returning the previously
cached values.
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: x86 <x86@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Alison Schofield <alison.schofield@intel.com>
Cc: Reinette Chatre <reinette.chatre@intel.com>
Cc: Thomas Gleixner (Intel) <tglx@linutronix.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Stefan Talpalaru <stefantalpalaru@yahoo.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Peter Zilstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Andrew Cooper <Andrew.Cooper3@citrix.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Martin Pohlack <mpohlack@amazon.de>
---
arch/x86/kernel/cpu/microcode/core.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 6ade3d59c404..089636b1643f 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -386,6 +386,7 @@ static int __wait_for_cpus(atomic_t *t, long long timeout)
*/
static int __reload_late(void *info)
{
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
int cpu = smp_processor_id();
enum ucode_state err;
int ret = 0;
@@ -422,12 +423,11 @@ static int __reload_late(void *info)
/*
* At least one thread has completed update on each core.
- * For others, simply call the update to make sure the
- * per-cpu cpuinfo can be updated with right microcode
- * revision.
+ * For siblings, collect the cpuinfo and update the
+ * per-cpu cpuinfo with the current microcode revision.
*/
if (cpumask_first(topology_sibling_cpumask(cpu)) != cpu)
- err = microcode_ops->apply_microcode(cpu);
+ microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig);
return ret;
}
--
2.34.1
On Sat, Jan 21, 2023 at 01:35:12PM -0800, Ashok Raj wrote: [snip] > --- > arch/x86/kernel/cpu/microcode/core.c | 8 ++++---- > 1 file changed, 4 insertions(+), 4 deletions(-) > > diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c > index 6ade3d59c404..089636b1643f 100644 > --- a/arch/x86/kernel/cpu/microcode/core.c > +++ b/arch/x86/kernel/cpu/microcode/core.c > @@ -386,6 +386,7 @@ static int __wait_for_cpus(atomic_t *t, long long timeout) > */ > static int __reload_late(void *info) > { > + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; In a quest to keep the christmas tree effect, screwed up the ordering. I fixed it before i resent the next time. Modified diff below. > int cpu = smp_processor_id(); > enum ucode_state err; > int ret = 0; > @@ -422,12 +423,11 @@ static int __reload_late(void *info) > > /* > * At least one thread has completed update on each core. > - * For others, simply call the update to make sure the > - * per-cpu cpuinfo can be updated with right microcode > - * revision. > + * For siblings, collect the cpuinfo and update the > + * per-cpu cpuinfo with the current microcode revision. > */ > if (cpumask_first(topology_sibling_cpumask(cpu)) != cpu) > - err = microcode_ops->apply_microcode(cpu); > + microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig); > > return ret; > } > -- > 2.34.1 > diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 6ade3d59c404..07764c1a2dd3 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -387,6 +387,7 @@ static int __wait_for_cpus(atomic_t *t, long long timeout) static int __reload_late(void *info) { int cpu = smp_processor_id(); + struct ucode_cpu_info *uci; enum ucode_state err; int ret = 0; @@ -422,12 +423,13 @@ static int __reload_late(void *info) /* * At least one thread has completed update on each core. - * For others, simply call the update to make sure the - * per-cpu cpuinfo can be updated with right microcode - * revision. + * For siblings, collect the cpuinfo and update the + * per-cpu cpuinfo with the current microcode revision. */ - if (cpumask_first(topology_sibling_cpumask(cpu)) != cpu) - err = microcode_ops->apply_microcode(cpu); + if (cpumask_first(topology_sibling_cpumask(cpu)) != cpu) { + uci = ucode_cpu_info + cpu; + microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig); + } return ret; }
Currently when late loading is aborted due to check_online_cpu(), kernel
still ends up tainting the kernel.
Taint only when microcode loading was successful.
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: x86 <x86@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Alison Schofield <alison.schofield@intel.com>
Cc: Reinette Chatre <reinette.chatre@intel.com>
Cc: Thomas Gleixner (Intel) <tglx@linutronix.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Stefan Talpalaru <stefantalpalaru@yahoo.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Peter Zilstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Andrew Cooper <Andrew.Cooper3@citrix.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Martin Pohlack <mpohlack@amazon.de>
---
arch/x86/kernel/cpu/microcode/core.c | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index d7cbc83df9b6..c5d80ff00b4e 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -475,6 +475,7 @@ static ssize_t reload_store(struct device *dev,
enum ucode_state tmp_ret = UCODE_OK;
int bsp = boot_cpu_data.cpu_index;
unsigned long val;
+ int load_ret = -1;
ssize_t ret = 0;
ret = kstrtoul(buf, 0, &val);
@@ -495,16 +496,20 @@ static ssize_t reload_store(struct device *dev,
goto put;
mutex_lock(µcode_mutex);
- ret = microcode_reload_late();
+ load_ret = microcode_reload_late();
mutex_unlock(µcode_mutex);
put:
cpus_read_unlock();
- if (ret == 0)
+ /*
+ * Taint only when loading was successful
+ */
+ if (load_ret == 0) {
ret = size;
-
- add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+ add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+ pr_warn("Microcode late loading tainted the kernel\n");
+ }
return ret;
}
--
2.34.1
Currently microcode reload sysfs file only accepts a value of 1. But when
user writes other values we don't report error, but just treat them as
silent success without performing a load.
Report those erroneous writes back to user.
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: x86 <x86@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Alison Schofield <alison.schofield@intel.com>
Cc: Reinette Chatre <reinette.chatre@intel.com>
Cc: Thomas Gleixner (Intel) <tglx@linutronix.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Stefan Talpalaru <stefantalpalaru@yahoo.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Peter Zilstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Andrew Cooper <Andrew.Cooper3@citrix.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Martin Pohlack <mpohlack@amazon.de>
---
arch/x86/kernel/cpu/microcode/core.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index c5d80ff00b4e..6ade3d59c404 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -479,11 +479,8 @@ static ssize_t reload_store(struct device *dev,
ssize_t ret = 0;
ret = kstrtoul(buf, 0, &val);
- if (ret)
- return ret;
-
- if (val != 1)
- return size;
+ if (ret || val != 1)
+ return -EINVAL;
cpus_read_lock();
--
2.34.1
Currently collect_cpu_info() is only returning what was cached earlier
instead of reading the current revision from the proper MSR.
Collect the current revision and report that value instead of reflecting
what was cached in the past.
[TBD:
Need to change microcode/amd.c. I didn't quite follow the logic since
it reports the patch from the pathfile instead of reading the real
PATCH_LEVEL MSR.
]
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: x86 <x86@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Alison Schofield <alison.schofield@intel.com>
Cc: Reinette Chatre <reinette.chatre@intel.com>
Cc: Thomas Gleixner (Intel) <tglx@linutronix.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Stefan Talpalaru <stefantalpalaru@yahoo.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Peter Zilstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Andrew Cooper <Andrew.Cooper3@citrix.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Martin Pohlack <mpohlack@amazon.de>
---
arch/x86/kernel/cpu/microcode/intel.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 4471d418f28a..be830944178c 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -543,6 +543,13 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
{
struct cpuinfo_x86 *c = &cpu_data(cpu_num);
unsigned int val[2];
+ int rev;
+
+ /*
+ * intel_get_microcode_revision() reads a per-core MSR
+ * to read the revision (MSR_IA32_UCODE_REV).
+ */
+ WARN_ON_ONCE(cpu_num != smp_processor_id());
memset(csig, 0, sizeof(*csig));
@@ -554,7 +561,8 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
csig->pf = 1 << ((val[1] >> 18) & 7);
}
- csig->rev = c->microcode;
+ rev = intel_get_microcode_revision();
+ csig->rev = c->microcode = rev;
return 0;
}
--
2.34.1
© 2016 - 2025 Red Hat, Inc.