Commit 4c132d1d844a ("x86/futex: Remove .fixup usage") introduced a new
extable fixup type, EX_TYPE_EFAULT_REG, and later patches updated the
extable fixup type for copy-from-user operations, changing it from
EX_TYPE_UACCESS to EX_TYPE_EFAULT_REG.
Specifically, commit 99641e094d6c ("x86/uaccess: Remove .fixup usage")
altered the extable fixup type for the get_user family, while commit
4c132d1d844a ("x86/futex: Remove .fixup usage") addressed the futex
operations. This change inadvertently caused a regression where the error
context for some copy-from-user operations no longer functions as an
in-kernel recovery context, leading to kernel panics with the message:
"Machine check: Data load in unrecoverable area of kernel."
To fix the regression, add EX_TYPE_EFAULT_REG as a in-kernel recovery
context for copy-from-user operations.
Signed-off-by: Shuai Xue <xueshuai@linux.alibaba.com>
Fixes: 4c132d1d844a ("x86/futex: Remove .fixup usage")
Cc: stable@vger.kernel.org
---
arch/x86/kernel/cpu/mce/severity.c | 21 ++++++++++++++++-----
1 file changed, 16 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index dac4d64dfb2a..14c2d71c3ce1 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -16,6 +16,7 @@
#include <asm/traps.h>
#include <asm/insn.h>
#include <asm/insn-eval.h>
+#include <linux/extable.h>
#include "internal.h"
@@ -285,7 +286,8 @@ static bool is_copy_from_user(struct pt_regs *regs)
*/
static noinstr int error_context(struct mce *m, struct pt_regs *regs)
{
- int fixup_type;
+ const struct exception_table_entry *e;
+ int fixup_type, imm;
bool copy_user;
if ((m->cs & 3) == 3)
@@ -294,9 +296,14 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
if (!mc_recoverable(m->mcgstatus))
return IN_KERNEL;
+ e = search_exception_tables(m->ip);
+ if (!e)
+ return IN_KERNEL;
+
/* Allow instrumentation around external facilities usage. */
instrumentation_begin();
- fixup_type = ex_get_fixup_type(m->ip);
+ fixup_type = FIELD_GET(EX_DATA_TYPE_MASK, e->data);
+ imm = FIELD_GET(EX_DATA_IMM_MASK, e->data);
copy_user = is_copy_from_user(regs);
instrumentation_end();
@@ -304,9 +311,13 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
case EX_TYPE_UACCESS:
if (!copy_user)
return IN_KERNEL;
- m->kflags |= MCE_IN_KERNEL_COPYIN;
- fallthrough;
-
+ m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV;
+ return IN_KERNEL_RECOV;
+ case EX_TYPE_IMM_REG:
+ if (!copy_user || imm != -EFAULT)
+ return IN_KERNEL;
+ m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV;
+ return IN_KERNEL_RECOV;
case EX_TYPE_FAULT_MCE_SAFE:
case EX_TYPE_DEFAULT_MCE_SAFE:
m->kflags |= MCE_IN_KERNEL_RECOV;
--
2.39.3
On Mon, Feb 17, 2025 at 02:33:33PM +0800, Shuai Xue wrote:
> diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
> index dac4d64dfb2a..14c2d71c3ce1 100644
> --- a/arch/x86/kernel/cpu/mce/severity.c
> +++ b/arch/x86/kernel/cpu/mce/severity.c
> @@ -16,6 +16,7 @@
> #include <asm/traps.h>
> #include <asm/insn.h>
> #include <asm/insn-eval.h>
> +#include <linux/extable.h>
>
> #include "internal.h"
>
> @@ -285,7 +286,8 @@ static bool is_copy_from_user(struct pt_regs *regs)
> */
> static noinstr int error_context(struct mce *m, struct pt_regs *regs)
> {
> - int fixup_type;
> + const struct exception_table_entry *e;
> + int fixup_type, imm;
> bool copy_user;
>
> if ((m->cs & 3) == 3)
> @@ -294,9 +296,14 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
> if (!mc_recoverable(m->mcgstatus))
> return IN_KERNEL;
>
> + e = search_exception_tables(m->ip);
> + if (!e)
> + return IN_KERNEL;
You didn't actually build this, did you? Or did you ignore the extra
noinstr warnings?
> /* Allow instrumentation around external facilities usage. */
> instrumentation_begin();
> - fixup_type = ex_get_fixup_type(m->ip);
> + fixup_type = FIELD_GET(EX_DATA_TYPE_MASK, e->data);
> + imm = FIELD_GET(EX_DATA_IMM_MASK, e->data);
> copy_user = is_copy_from_user(regs);
> instrumentation_end();
>
> @@ -304,9 +311,13 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
> case EX_TYPE_UACCESS:
> if (!copy_user)
> return IN_KERNEL;
> - m->kflags |= MCE_IN_KERNEL_COPYIN;
> - fallthrough;
> -
> + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV;
> + return IN_KERNEL_RECOV;
> + case EX_TYPE_IMM_REG:
> + if (!copy_user || imm != -EFAULT)
> + return IN_KERNEL;
> + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV;
> + return IN_KERNEL_RECOV;
Maybe I'm justnot understanding things, but what's wrong with something
like the below; why do we care about the ex-type if we know its a MOV
reading from userspace?
The less we muck about with the extable here, the better.
---
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index dac4d64dfb2a..cb021058165f 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -300,13 +300,12 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
copy_user = is_copy_from_user(regs);
instrumentation_end();
- switch (fixup_type) {
- case EX_TYPE_UACCESS:
- if (!copy_user)
- return IN_KERNEL;
- m->kflags |= MCE_IN_KERNEL_COPYIN;
- fallthrough;
+ if (copy_user) {
+ m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_COPYIN;
+ return IN_KERNEL_RECOV
+ }
+ switch (fixup_type) {
case EX_TYPE_FAULT_MCE_SAFE:
case EX_TYPE_DEFAULT_MCE_SAFE:
m->kflags |= MCE_IN_KERNEL_RECOV;
Hi, Peter
在 2025/2/18 20:54, Peter Zijlstra 写道:
> On Mon, Feb 17, 2025 at 02:33:33PM +0800, Shuai Xue wrote:
>
>> diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
>> index dac4d64dfb2a..14c2d71c3ce1 100644
>> --- a/arch/x86/kernel/cpu/mce/severity.c
>> +++ b/arch/x86/kernel/cpu/mce/severity.c
>> @@ -16,6 +16,7 @@
>> #include <asm/traps.h>
>> #include <asm/insn.h>
>> #include <asm/insn-eval.h>
>> +#include <linux/extable.h>
>>
>> #include "internal.h"
>>
>> @@ -285,7 +286,8 @@ static bool is_copy_from_user(struct pt_regs *regs)
>> */
>> static noinstr int error_context(struct mce *m, struct pt_regs *regs)
>> {
>> - int fixup_type;
>> + const struct exception_table_entry *e;
>> + int fixup_type, imm;
>> bool copy_user;
>>
>> if ((m->cs & 3) == 3)
>> @@ -294,9 +296,14 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
>> if (!mc_recoverable(m->mcgstatus))
>> return IN_KERNEL;
>>
>> + e = search_exception_tables(m->ip);
>> + if (!e)
>> + return IN_KERNEL;
>
> You didn't actually build this, did you? Or did you ignore the extra
> noinstr warnings?
I did build and test this patch set on it. But I did not find any warnings.
Could you provide more details?
>
>> /* Allow instrumentation around external facilities usage. */
>> instrumentation_begin();
>> - fixup_type = ex_get_fixup_type(m->ip);
>> + fixup_type = FIELD_GET(EX_DATA_TYPE_MASK, e->data);
>> + imm = FIELD_GET(EX_DATA_IMM_MASK, e->data);
>> copy_user = is_copy_from_user(regs);
>> instrumentation_end();
>>
>> @@ -304,9 +311,13 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
>> case EX_TYPE_UACCESS:
>> if (!copy_user)
>> return IN_KERNEL;
>> - m->kflags |= MCE_IN_KERNEL_COPYIN;
>> - fallthrough;
>> -
>> + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV;
>> + return IN_KERNEL_RECOV;
>> + case EX_TYPE_IMM_REG:
>> + if (!copy_user || imm != -EFAULT)
>> + return IN_KERNEL;
>> + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV;
>> + return IN_KERNEL_RECOV;
>
> Maybe I'm justnot understanding things, but what's wrong with something
> like the below; why do we care about the ex-type if we know its a MOV
> reading from userspace?
>
> The less we muck about with the extable here, the better.
We need to make sure that we have register a fixup handler for the copy_user
case. If no fixup handler found, the PC accessing posion will trigger #MCE
again and again resulting a hardlock up.
>
> ---
> diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
> index dac4d64dfb2a..cb021058165f 100644
> --- a/arch/x86/kernel/cpu/mce/severity.c
> +++ b/arch/x86/kernel/cpu/mce/severity.c
> @@ -300,13 +300,12 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
> copy_user = is_copy_from_user(regs);
> instrumentation_end();
>
> - switch (fixup_type) {
> - case EX_TYPE_UACCESS:
> - if (!copy_user)
> - return IN_KERNEL;
> - m->kflags |= MCE_IN_KERNEL_COPYIN;
> - fallthrough;
> + if (copy_user) {
> + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_COPYIN;
> + return IN_KERNEL_RECOV
> + }
>
> + switch (fixup_type) {
> case EX_TYPE_FAULT_MCE_SAFE:
> case EX_TYPE_DEFAULT_MCE_SAFE:
> m->kflags |= MCE_IN_KERNEL_RECOV;
Thanks.
Shuai
On Tue, Feb 18, 2025 at 09:28:33PM +0800, Shuai Xue wrote:
> I did build and test this patch set on it. But I did not find any warnings.
> Could you provide more details?
NOINSTR_VALIDATION=y helps
> > > /* Allow instrumentation around external facilities usage. */
> > > instrumentation_begin();
> > > - fixup_type = ex_get_fixup_type(m->ip);
> > > + fixup_type = FIELD_GET(EX_DATA_TYPE_MASK, e->data);
> > > + imm = FIELD_GET(EX_DATA_IMM_MASK, e->data);
> > > copy_user = is_copy_from_user(regs);
> > > instrumentation_end();
> > > @@ -304,9 +311,13 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
> > > case EX_TYPE_UACCESS:
> > > if (!copy_user)
> > > return IN_KERNEL;
> > > - m->kflags |= MCE_IN_KERNEL_COPYIN;
> > > - fallthrough;
> > > -
> > > + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV;
> > > + return IN_KERNEL_RECOV;
> > > + case EX_TYPE_IMM_REG:
> > > + if (!copy_user || imm != -EFAULT)
> > > + return IN_KERNEL;
> > > + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV;
> > > + return IN_KERNEL_RECOV;
> >
> > Maybe I'm justnot understanding things, but what's wrong with something
> > like the below; why do we care about the ex-type if we know its a MOV
> > reading from userspace?
> >
> > The less we muck about with the extable here, the better.
>
> We need to make sure that we have register a fixup handler for the copy_user
> case. If no fixup handler found, the PC accessing posion will trigger #MCE
> again and again resulting a hardlock up.
Well, then write it like so. Afaict, you don't care what the actual
exception type is, just that there is one, for the copy_user case.
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index dac4d64dfb2a..cfdae25eacd7 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -301,18 +301,19 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
instrumentation_end();
switch (fixup_type) {
- case EX_TYPE_UACCESS:
- if (!copy_user)
- return IN_KERNEL;
- m->kflags |= MCE_IN_KERNEL_COPYIN;
- fallthrough;
-
case EX_TYPE_FAULT_MCE_SAFE:
case EX_TYPE_DEFAULT_MCE_SAFE:
m->kflags |= MCE_IN_KERNEL_RECOV;
return IN_KERNEL_RECOV;
default:
+ if (copy_user) {
+ m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV;
+ return IN_KERNEL_RECOV;
+ }
+ fallthrough;
+
+ case EX_TYPE_NONE:
return IN_KERNEL;
}
}
On Tue, Feb 18, 2025 at 03:15:35PM +0100, Peter Zijlstra wrote:
> diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
> index dac4d64dfb2a..cfdae25eacd7 100644
> --- a/arch/x86/kernel/cpu/mce/severity.c
> +++ b/arch/x86/kernel/cpu/mce/severity.c
> @@ -301,18 +301,19 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
> instrumentation_end();
>
> switch (fixup_type) {
> - case EX_TYPE_UACCESS:
> - if (!copy_user)
> - return IN_KERNEL;
> - m->kflags |= MCE_IN_KERNEL_COPYIN;
> - fallthrough;
> -
> case EX_TYPE_FAULT_MCE_SAFE:
> case EX_TYPE_DEFAULT_MCE_SAFE:
> m->kflags |= MCE_IN_KERNEL_RECOV;
> return IN_KERNEL_RECOV;
>
> default:
> + if (copy_user) {
As said on chat, if we can make is_copy_from_user() *always* correctly detect
user access, then sure but I'm afraid EX_TYPE_UACCESS being generated at the
handful places where we do user memory access is there for a reason as it
makes it pretty explicit.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Tue, Feb 18, 2025 at 05:48:00PM +0100, Borislav Petkov wrote:
> On Tue, Feb 18, 2025 at 03:15:35PM +0100, Peter Zijlstra wrote:
> > diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
> > index dac4d64dfb2a..cfdae25eacd7 100644
> > --- a/arch/x86/kernel/cpu/mce/severity.c
> > +++ b/arch/x86/kernel/cpu/mce/severity.c
> > @@ -301,18 +301,19 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
> > instrumentation_end();
> >
> > switch (fixup_type) {
> > - case EX_TYPE_UACCESS:
> > - if (!copy_user)
> > - return IN_KERNEL;
> > - m->kflags |= MCE_IN_KERNEL_COPYIN;
> > - fallthrough;
> > -
> > case EX_TYPE_FAULT_MCE_SAFE:
> > case EX_TYPE_DEFAULT_MCE_SAFE:
> > m->kflags |= MCE_IN_KERNEL_RECOV;
> > return IN_KERNEL_RECOV;
> >
> > default:
> > + if (copy_user) {
>
> As said on chat, if we can make is_copy_from_user() *always* correctly detect
> user access, then sure but I'm afraid EX_TYPE_UACCESS being generated at the
> handful places where we do user memory access is there for a reason as it
> makes it pretty explicit.
Thing is, we have copy routines that do not know if its user or not.
is_copy_from_user() must be reliable.
Anyway, if you all really want to go all funny, try the below.
Someone has to go and stick some EX_FLAG_USER on things, but I just
really don't believe that's doing to be useful. Because while you're
doing that, you should also audit if is_copy_from_user() will catch it
and if it does, you don't need the tag.
See how much tags you end up with..
---
diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h
index 906b0d5541e8..1d6c6ff51d28 100644
--- a/arch/x86/include/asm/extable_fixup_types.h
+++ b/arch/x86/include/asm/extable_fixup_types.h
@@ -31,6 +31,9 @@
#define EX_FLAG_CLEAR_DX EX_DATA_FLAG(2)
#define EX_FLAG_CLEAR_AX_DX EX_DATA_FLAG(3)
+#define EX_FLAG_USER EX_DATA_FLAG(4)
+#define EX_FLAG_MCE EX_DATA_FLAG(8)
+
/* types */
#define EX_TYPE_NONE 0
#define EX_TYPE_DEFAULT 1
@@ -46,8 +49,6 @@
#define EX_TYPE_RDMSR_SAFE 11 /* reg := -EIO */
#define EX_TYPE_WRMSR_IN_MCE 12
#define EX_TYPE_RDMSR_IN_MCE 13
-#define EX_TYPE_DEFAULT_MCE_SAFE 14
-#define EX_TYPE_FAULT_MCE_SAFE 15
#define EX_TYPE_POP_REG 16 /* sp += sizeof(long) */
#define EX_TYPE_POP_ZERO (EX_TYPE_POP_REG | EX_DATA_IMM(0))
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index dac4d64dfb2a..86a32fa020d2 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -300,21 +300,20 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
copy_user = is_copy_from_user(regs);
instrumentation_end();
- switch (fixup_type) {
- case EX_TYPE_UACCESS:
- if (!copy_user)
- return IN_KERNEL;
- m->kflags |= MCE_IN_KERNEL_COPYIN;
- fallthrough;
-
- case EX_TYPE_FAULT_MCE_SAFE:
- case EX_TYPE_DEFAULT_MCE_SAFE:
+ if (fixup_type == EX_TYPE_NONE)
+ return IN_KERNEL;
+
+ if (fixup_type & EX_FLAG_MCE) {
m->kflags |= MCE_IN_KERNEL_RECOV;
return IN_KERNEL_RECOV;
+ }
- default:
- return IN_KERNEL;
+ if ((fixup_type & EX_FLAG_USER) || copy_user) {
+ m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV;
+ return IN_KERNEL_RECOV;
}
+
+ return IN_KERNEL;
}
/* See AMD PPR(s) section Machine Check Error Handling. */
diff --git a/arch/x86/kernel/fpu/legacy.h b/arch/x86/kernel/fpu/legacy.h
index 098f367bb8a7..3f6036840d65 100644
--- a/arch/x86/kernel/fpu/legacy.h
+++ b/arch/x86/kernel/fpu/legacy.h
@@ -24,7 +24,7 @@ static inline void ldmxcsr(u32 mxcsr)
asm volatile(ASM_STAC "\n" \
"1: " #insn "\n" \
"2: " ASM_CLAC "\n" \
- _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_MCE_SAFE) \
+ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT | EX_FLAG_MCE) \
: [err] "=a" (err), output \
: "0"(0), input); \
err; \
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index aa16f1a1bbcf..eef534091105 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -115,7 +115,7 @@ static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u64 ma
asm volatile("1:" op "\n\t" \
"xor %[err], %[err]\n" \
"2:\n\t" \
- _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_MCE_SAFE) \
+ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT | EX_FLAG_MCE) \
: [err] "=a" (err) \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S
index c859a8a09860..7977689ad46e 100644
--- a/arch/x86/lib/copy_mc_64.S
+++ b/arch/x86/lib/copy_mc_64.S
@@ -103,9 +103,9 @@ SYM_FUNC_START(copy_mc_fragile)
movl %ecx, %edx
jmp copy_mc_fragile_handle_tail
- _ASM_EXTABLE_TYPE(.L_read_leading_bytes, .E_leading_bytes, EX_TYPE_DEFAULT_MCE_SAFE)
- _ASM_EXTABLE_TYPE(.L_read_words, .E_read_words, EX_TYPE_DEFAULT_MCE_SAFE)
- _ASM_EXTABLE_TYPE(.L_read_trailing_bytes, .E_trailing_bytes, EX_TYPE_DEFAULT_MCE_SAFE)
+ _ASM_EXTABLE_TYPE(.L_read_leading_bytes, .E_leading_bytes, EX_TYPE_DEFAULT | EX_FLAG_MCE)
+ _ASM_EXTABLE_TYPE(.L_read_words, .E_read_words, EX_TYPE_DEFAULT | EX_FLAG_MCE)
+ _ASM_EXTABLE_TYPE(.L_read_trailing_bytes, .E_trailing_bytes, EX_TYPE_DEFAULT | EX_FLAG_MCE)
_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
_ASM_EXTABLE(.L_write_words, .E_write_words)
_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
@@ -143,7 +143,7 @@ SYM_FUNC_START(copy_mc_enhanced_fast_string)
movq %rcx, %rax
RET
- _ASM_EXTABLE_TYPE(.L_copy, .E_copy, EX_TYPE_DEFAULT_MCE_SAFE)
+ _ASM_EXTABLE_TYPE(.L_copy, .E_copy, EX_TYPE_DEFAULT | EX_FLAG_MCE)
SYM_FUNC_END(copy_mc_enhanced_fast_string)
#endif /* !CONFIG_UML */
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 51986e8a9d35..7358bf10baba 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -293,8 +293,10 @@ static bool ex_handler_eretu(const struct exception_table_entry *fixup,
int ex_get_fixup_type(unsigned long ip)
{
const struct exception_table_entry *e = search_exception_tables(ip);
+ if (!e)
+ return EX_TYPE_NONE;
- return e ? FIELD_GET(EX_DATA_TYPE_MASK, e->data) : EX_TYPE_NONE;
+ return FIELD_GET(EX_DATA_TYPE_MASK, e->data) | (e->data & (EX_FLAG_USER | EX_FLAG_MCE));
}
int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
@@ -327,10 +329,8 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
switch (type) {
case EX_TYPE_DEFAULT:
- case EX_TYPE_DEFAULT_MCE_SAFE:
return ex_handler_default(e, regs);
case EX_TYPE_FAULT:
- case EX_TYPE_FAULT_MCE_SAFE:
return ex_handler_fault(e, regs, trapnr);
case EX_TYPE_UACCESS:
return ex_handler_uaccess(e, regs, trapnr, fault_addr);
在 2025/2/19 18:40, Peter Zijlstra 写道:
> On Tue, Feb 18, 2025 at 05:48:00PM +0100, Borislav Petkov wrote:
>> On Tue, Feb 18, 2025 at 03:15:35PM +0100, Peter Zijlstra wrote:
>>> diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
>>> index dac4d64dfb2a..cfdae25eacd7 100644
>>> --- a/arch/x86/kernel/cpu/mce/severity.c
>>> +++ b/arch/x86/kernel/cpu/mce/severity.c
>>> @@ -301,18 +301,19 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
>>> instrumentation_end();
>>>
>>> switch (fixup_type) {
>>> - case EX_TYPE_UACCESS:
>>> - if (!copy_user)
>>> - return IN_KERNEL;
>>> - m->kflags |= MCE_IN_KERNEL_COPYIN;
>>> - fallthrough;
>>> -
>>> case EX_TYPE_FAULT_MCE_SAFE:
>>> case EX_TYPE_DEFAULT_MCE_SAFE:
>>> m->kflags |= MCE_IN_KERNEL_RECOV;
>>> return IN_KERNEL_RECOV;
>>>
>>> default:
>>> + if (copy_user) {
>>
>> As said on chat, if we can make is_copy_from_user() *always* correctly detect
>> user access, then sure but I'm afraid EX_TYPE_UACCESS being generated at the
>> handful places where we do user memory access is there for a reason as it
>> makes it pretty explicit.
>
> Thing is, we have copy routines that do not know if its user or not.
> is_copy_from_user() must be reliable.
>
> Anyway, if you all really want to go all funny, try the below.
>
> Someone has to go and stick some EX_FLAG_USER on things, but I just
> really don't believe that's doing to be useful. Because while you're
> doing that, you should also audit if is_copy_from_user() will catch it
> and if it does, you don't need the tag.
>
> See how much tags you end up with..
Agreed, I think the key point whether the error context is in a read from user
memory. We do not care about the ex-type if we know its a MOV
reading from userspace.
is_copy_from_user() return true when both of the following two checks are
true:
- the current instruction is copy
- source address is user memory
If copy_user is true, we set
m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV;
Then do_machine_check will try fixup_exception first.
/*
* Handle an MCE which has happened in kernel space but from
* which the kernel can recover: ex_has_fault_handler() has
* already verified that the rIP at which the error happened is
* a rIP from which the kernel can recover (by jumping to
* recovery code specified in _ASM_EXTABLE_FAULT()) and the
* corresponding exception handler which would do that is the
* proper one.
*/
if (m->kflags & MCE_IN_KERNEL_RECOV) {
if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
mce_panic("Failed kernel mode recovery", &err, msg);
}
if (m->kflags & MCE_IN_KERNEL_COPYIN)
queue_task_work(&err, msg, kill_me_never);
So Peter's code is fine to me.
---
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index dac4d64dfb2a..cb021058165f 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -300,13 +300,12 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
copy_user = is_copy_from_user(regs);
instrumentation_end();
- switch (fixup_type) {
- case EX_TYPE_UACCESS:
- if (!copy_user)
- return IN_KERNEL;
- m->kflags |= MCE_IN_KERNEL_COPYIN;
- fallthrough;
+ if (copy_user) {
+ m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_COPYIN;
+ return IN_KERNEL_RECOV
+ }
+ switch (fixup_type) {
case EX_TYPE_FAULT_MCE_SAFE:
case EX_TYPE_DEFAULT_MCE_SAFE:
m->kflags |= MCE_IN_KERNEL_RECOV;
Is that ok? Please correct me if I missed anyting.
Thanks.
Shuai
On Tue, Feb 18, 2025 at 01:54:08PM +0100, Peter Zijlstra wrote:
> ---
> diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
> index dac4d64dfb2a..cb021058165f 100644
> --- a/arch/x86/kernel/cpu/mce/severity.c
> +++ b/arch/x86/kernel/cpu/mce/severity.c
> @@ -300,13 +300,12 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
> copy_user = is_copy_from_user(regs);
> instrumentation_end();
>
> - switch (fixup_type) {
> - case EX_TYPE_UACCESS:
> - if (!copy_user)
> - return IN_KERNEL;
> - m->kflags |= MCE_IN_KERNEL_COPYIN;
> - fallthrough;
> + if (copy_user) {
> + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_COPYIN;
Typing is hard, obviously that second should'be been _RECOV.
> + return IN_KERNEL_RECOV
But why are we having that bit *and* a return value saying the same
thing?
> + }
>
> + switch (fixup_type) {
> case EX_TYPE_FAULT_MCE_SAFE:
> case EX_TYPE_DEFAULT_MCE_SAFE:
> m->kflags |= MCE_IN_KERNEL_RECOV;
在 2025/2/18 21:02, Peter Zijlstra 写道:
> On Tue, Feb 18, 2025 at 01:54:08PM +0100, Peter Zijlstra wrote:
>
>> ---
>> diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
>> index dac4d64dfb2a..cb021058165f 100644
>> --- a/arch/x86/kernel/cpu/mce/severity.c
>> +++ b/arch/x86/kernel/cpu/mce/severity.c
>> @@ -300,13 +300,12 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
>> copy_user = is_copy_from_user(regs);
>> instrumentation_end();
>>
>> - switch (fixup_type) {
>> - case EX_TYPE_UACCESS:
>> - if (!copy_user)
>> - return IN_KERNEL;
>> - m->kflags |= MCE_IN_KERNEL_COPYIN;
>> - fallthrough;
>> + if (copy_user) {
>> + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_COPYIN;
>
> Typing is hard, obviously that second should'be been _RECOV.
Hah, it doesn't matter, I got your point.
>
>> + return IN_KERNEL_RECOV
>
> But why are we having that bit *and* a return value saying the same
> thing?
Yes, it is rather redundant. I can refactor this out if @Borislav is
happy with that.
>
>> + }
>>
>> + switch (fixup_type) {
>> case EX_TYPE_FAULT_MCE_SAFE:
>> case EX_TYPE_DEFAULT_MCE_SAFE:
>> m->kflags |= MCE_IN_KERNEL_RECOV;
Thanks.
Shuai
© 2016 - 2025 Red Hat, Inc.