[v6] kasan: x86: arm64: KASAN tag-based mode for x86

[PATCH v6 15/18] x86/kasan: Handle UD1 for inline KASAN reports

Posted by Maciej Wieczor-Retman 3 months, 1 week ago

From: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>

Inline KASAN on x86 should do tag mismatch reports by passing the
metadata through the UD1 instruction and the faulty address through RDI,
a scheme that's already used by UBSan and is easy to extend.

The current LLVM way of passing KASAN software tag mode metadata is done
using the INT3 instruction. However that should be changed because it
doesn't align to how the kernel already handles UD1 for similar use
cases. Since inline software tag-based KASAN doesn't work on x86 due to
missing compiler support it can be fixed and the INT3 can be changed to
UD1 at the same time.

Add a kasan component to the #UD decoding and handling functions.

Make part of that hook - which decides whether to die or recover from a
tag mismatch - arch independent to avoid duplicating a long comment on
both x86 and arm64 architectures.

Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>
---
Changelog v6:
- Change the whole patch from using INT3 to UD1.

Changelog v5:
- Add die to argument list of kasan_inline_recover() in
  arch/arm64/kernel/traps.c.

Changelog v4:
- Make kasan_handler() a stub in a header file. Remove #ifdef from
  traps.c.
- Consolidate the "recover" comment into one place.
- Make small changes to the patch message.

 MAINTAINERS                  |  2 +-
 arch/x86/include/asm/bug.h   |  1 +
 arch/x86/include/asm/kasan.h | 20 ++++++++++++++++++++
 arch/x86/kernel/traps.c      |  8 ++++++++
 arch/x86/mm/Makefile         |  2 ++
 arch/x86/mm/kasan_inline.c   | 21 +++++++++++++++++++++
 include/linux/kasan.h        | 23 +++++++++++++++++++++++
 7 files changed, 76 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/mm/kasan_inline.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 53cbc7534911..a6e3cc2f3cc5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13422,7 +13422,7 @@ S:	Maintained
 B:	https://bugzilla.kernel.org/buglist.cgi?component=Sanitizers&product=Memory%20Management
 F:	Documentation/dev-tools/kasan.rst
 F:	arch/*/include/asm/*kasan*.h
-F:	arch/*/mm/kasan_init*
+F:	arch/*/mm/kasan_*
 F:	include/linux/kasan*.h
 F:	lib/Kconfig.kasan
 F:	mm/kasan/
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 880ca15073ed..428c8865b995 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -31,6 +31,7 @@
 #define BUG_UD2			0xfffe
 #define BUG_UD1			0xfffd
 #define BUG_UD1_UBSAN		0xfffc
+#define BUG_UD1_KASAN		0xfffb
 #define BUG_UDB			0xffd6
 #define BUG_LOCK		0xfff0
 
diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h
index 396071832d02..375651d9b114 100644
--- a/arch/x86/include/asm/kasan.h
+++ b/arch/x86/include/asm/kasan.h
@@ -6,6 +6,24 @@
 #include <linux/kasan-tags.h>
 #include <linux/types.h>
 #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
+
+/*
+ * LLVM ABI for reporting tag mismatches in inline KASAN mode.
+ * On x86 the UD1 instruction is used to carry metadata in the ECX register
+ * to the KASAN report. ECX is used to differentiate KASAN from UBSan when
+ * decoding the UD1 instruction.
+ *
+ * SIZE refers to how many bytes the faulty memory access
+ * requested.
+ * WRITE bit, when set, indicates the access was a write, otherwise
+ * it was a read.
+ * RECOVER bit, when set, should allow the kernel to carry on after
+ * a tag mismatch. Otherwise die() is called.
+ */
+#define KASAN_ECX_RECOVER	0x20
+#define KASAN_ECX_WRITE		0x10
+#define KASAN_ECX_SIZE_MASK	0x0f
+#define KASAN_ECX_SIZE(ecx)	(1 << ((ecx) & KASAN_ECX_SIZE_MASK))
 #define KASAN_SHADOW_SCALE_SHIFT 3
 
 /*
@@ -34,10 +52,12 @@
 #define __tag_shifted(tag)		FIELD_PREP(GENMASK_ULL(60, 57), tag)
 #define __tag_reset(addr)		(sign_extend64((u64)(addr), 56))
 #define __tag_get(addr)			((u8)FIELD_GET(GENMASK_ULL(60, 57), (u64)addr))
+void kasan_inline_handler(struct pt_regs *regs);
 #else
 #define __tag_shifted(tag)		0UL
 #define __tag_reset(addr)		(addr)
 #define __tag_get(addr)			0
+static inline void kasan_inline_handler(struct pt_regs *regs) { }
 #endif /* CONFIG_KASAN_SW_TAGS */
 
 #ifdef CONFIG_64BIT
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 6b22611e69cc..40fefd306c76 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -179,6 +179,9 @@ __always_inline int decode_bug(unsigned long addr, s32 *imm, int *len)
 	if (X86_MODRM_REG(v) == 0)	/* EAX */
 		return BUG_UD1_UBSAN;
 
+	if (X86_MODRM_REG(v) == 1)	/* ECX */
+		return BUG_UD1_KASAN;
+
 	return BUG_UD1;
 }
 
@@ -357,6 +360,11 @@ static noinstr bool handle_bug(struct pt_regs *regs)
 		}
 		break;
 
+	case BUG_UD1_KASAN:
+		kasan_inline_handler(regs);
+		handled = true;
+		break;
+
 	default:
 		break;
 	}
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 5b9908f13dcf..1dc18090cbe7 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -36,7 +36,9 @@ obj-$(CONFIG_PTDUMP)		+= dump_pagetables.o
 obj-$(CONFIG_PTDUMP_DEBUGFS)	+= debug_pagetables.o
 
 KASAN_SANITIZE_kasan_init_$(BITS).o := n
+KASAN_SANITIZE_kasan_inline.o := n
 obj-$(CONFIG_KASAN)		+= kasan_init_$(BITS).o
+obj-$(CONFIG_KASAN_SW_TAGS)	+= kasan_inline.o
 
 KMSAN_SANITIZE_kmsan_shadow.o	:= n
 obj-$(CONFIG_KMSAN)		+= kmsan_shadow.o
diff --git a/arch/x86/mm/kasan_inline.c b/arch/x86/mm/kasan_inline.c
new file mode 100644
index 000000000000..65641557c294
--- /dev/null
+++ b/arch/x86/mm/kasan_inline.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kasan.h>
+#include <linux/kdebug.h>
+
+void kasan_inline_handler(struct pt_regs *regs)
+{
+	int metadata = regs->cx;
+	u64 addr = regs->di;
+	u64 pc = regs->ip;
+	bool recover = metadata & KASAN_ECX_RECOVER;
+	bool write = metadata & KASAN_ECX_WRITE;
+	size_t size = KASAN_ECX_SIZE(metadata);
+
+	if (user_mode(regs))
+		return;
+
+	if (!kasan_report((void *)addr, size, write, pc))
+		return;
+
+	kasan_die_unless_recover(recover, "Oops - KASAN", regs, metadata, die);
+}
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 3c0c60ed5d5c..9bd1b1ebd674 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -679,4 +679,27 @@ void kasan_non_canonical_hook(unsigned long addr);
 static inline void kasan_non_canonical_hook(unsigned long addr) { }
 #endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
 
+#ifdef CONFIG_KASAN_SW_TAGS
+/*
+ * The instrumentation allows to control whether we can proceed after
+ * a crash was detected. This is done by passing the -recover flag to
+ * the compiler. Disabling recovery allows to generate more compact
+ * code.
+ *
+ * Unfortunately disabling recovery doesn't work for the kernel right
+ * now. KASAN reporting is disabled in some contexts (for example when
+ * the allocator accesses slab object metadata; this is controlled by
+ * current->kasan_depth). All these accesses are detected by the tool,
+ * even though the reports for them are not printed.
+ *
+ * This is something that might be fixed at some point in the future.
+ */
+static inline void kasan_die_unless_recover(bool recover, char *msg, struct pt_regs *regs,
+	unsigned long err, void die_fn(const char *str, struct pt_regs *regs, long err))
+{
+	if (!recover)
+		die_fn(msg, regs, err);
+}
+#endif
+
 #endif /* LINUX_KASAN_H */
-- 
2.51.0

Re: [PATCH v6 15/18] x86/kasan: Handle UD1 for inline KASAN reports

Posted by Peter Zijlstra 3 months ago

On Wed, Oct 29, 2025 at 08:09:51PM +0000, Maciej Wieczor-Retman wrote:
> From: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>
> 
> Inline KASAN on x86 should do tag mismatch reports by passing the
> metadata through the UD1 instruction and the faulty address through RDI,
> a scheme that's already used by UBSan and is easy to extend.
> 
> The current LLVM way of passing KASAN software tag mode metadata is done
> using the INT3 instruction. However that should be changed because it
> doesn't align to how the kernel already handles UD1 for similar use
> cases. Since inline software tag-based KASAN doesn't work on x86 due to
> missing compiler support it can be fixed and the INT3 can be changed to
> UD1 at the same time.
> 
> Add a kasan component to the #UD decoding and handling functions.
> 
> Make part of that hook - which decides whether to die or recover from a
> tag mismatch - arch independent to avoid duplicating a long comment on
> both x86 and arm64 architectures.
> 

> diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h
> index 396071832d02..375651d9b114 100644
> --- a/arch/x86/include/asm/kasan.h
> +++ b/arch/x86/include/asm/kasan.h
> @@ -6,6 +6,24 @@
>  #include <linux/kasan-tags.h>
>  #include <linux/types.h>
>  #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
> +
> +/*
> + * LLVM ABI for reporting tag mismatches in inline KASAN mode.
> + * On x86 the UD1 instruction is used to carry metadata in the ECX register
> + * to the KASAN report. ECX is used to differentiate KASAN from UBSan when
> + * decoding the UD1 instruction.
> + *
> + * SIZE refers to how many bytes the faulty memory access
> + * requested.
> + * WRITE bit, when set, indicates the access was a write, otherwise
> + * it was a read.
> + * RECOVER bit, when set, should allow the kernel to carry on after
> + * a tag mismatch. Otherwise die() is called.
> + */
> +#define KASAN_ECX_RECOVER	0x20
> +#define KASAN_ECX_WRITE		0x10
> +#define KASAN_ECX_SIZE_MASK	0x0f
> +#define KASAN_ECX_SIZE(ecx)	(1 << ((ecx) & KASAN_ECX_SIZE_MASK))
>  #define KASAN_SHADOW_SCALE_SHIFT 3

> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index 6b22611e69cc..40fefd306c76 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -179,6 +179,9 @@ __always_inline int decode_bug(unsigned long addr, s32 *imm, int *len)
>  	if (X86_MODRM_REG(v) == 0)	/* EAX */
>  		return BUG_UD1_UBSAN;
>  
> +	if (X86_MODRM_REG(v) == 1)	/* ECX */
> +		return BUG_UD1_KASAN;
> +
>  	return BUG_UD1;
>  }
>  
> @@ -357,6 +360,11 @@ static noinstr bool handle_bug(struct pt_regs *regs)
>  		}
>  		break;
>  
> +	case BUG_UD1_KASAN:
> +		kasan_inline_handler(regs);
> +		handled = true;
> +		break;
> +
>  	default:
>  		break;
>  	}

> +void kasan_inline_handler(struct pt_regs *regs)
> +{
> +	int metadata = regs->cx;
> +	u64 addr = regs->di;
> +	u64 pc = regs->ip;
> +	bool recover = metadata & KASAN_ECX_RECOVER;
> +	bool write = metadata & KASAN_ECX_WRITE;
> +	size_t size = KASAN_ECX_SIZE(metadata);
> +
> +	if (user_mode(regs))
> +		return;
> +
> +	if (!kasan_report((void *)addr, size, write, pc))
> +		return;
> +
> +	kasan_die_unless_recover(recover, "Oops - KASAN", regs, metadata, die);
> +}

I'm confused. Going by the ARM64 code, the meta-data is constant per
site -- it is encoded in the break immediate.

And I suggested you do the same on x86 by using the single byte
displacement instruction encoding.

	ud1	0xFF(%ecx), %ecx

Also, we don't have to use a fixed register for the address, you can do:

	ud1	0xFF(%ecx), %reg

and have %reg tell us what register the address is in.

Then you can recover the meta-data from the displacement immediate and
the address from whatever register is denoted.

This avoids the 'callsite' from having to clobber cx and move the address
into di.

What you have here will work, and I don't suppose we care about code
density with KASAN much, but it could've been so much better :/

Re: [PATCH v6 15/18] x86/kasan: Handle UD1 for inline KASAN reports

Posted by Maciej Wieczór-Retman 2 months, 3 weeks ago

On 2025-11-11 at 11:27:19 +0100, Peter Zijlstra wrote:
>On Wed, Oct 29, 2025 at 08:09:51PM +0000, Maciej Wieczor-Retman wrote:
>> From: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>
>>
>> Inline KASAN on x86 should do tag mismatch reports by passing the
>> metadata through the UD1 instruction and the faulty address through RDI,
>> a scheme that's already used by UBSan and is easy to extend.
>>
>> The current LLVM way of passing KASAN software tag mode metadata is done
>> using the INT3 instruction. However that should be changed because it
>> doesn't align to how the kernel already handles UD1 for similar use
>> cases. Since inline software tag-based KASAN doesn't work on x86 due to
>> missing compiler support it can be fixed and the INT3 can be changed to
>> UD1 at the same time.
>>
>> Add a kasan component to the #UD decoding and handling functions.
>>
>> Make part of that hook - which decides whether to die or recover from a
>> tag mismatch - arch independent to avoid duplicating a long comment on
>> both x86 and arm64 architectures.
>>
>
>> diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h
>> index 396071832d02..375651d9b114 100644
>> --- a/arch/x86/include/asm/kasan.h
>> +++ b/arch/x86/include/asm/kasan.h
>> @@ -6,6 +6,24 @@
>>  #include <linux/kasan-tags.h>
>>  #include <linux/types.h>
>>  #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
>> +
>> +/*
>> + * LLVM ABI for reporting tag mismatches in inline KASAN mode.
>> + * On x86 the UD1 instruction is used to carry metadata in the ECX register
>> + * to the KASAN report. ECX is used to differentiate KASAN from UBSan when
>> + * decoding the UD1 instruction.
>> + *
>> + * SIZE refers to how many bytes the faulty memory access
>> + * requested.
>> + * WRITE bit, when set, indicates the access was a write, otherwise
>> + * it was a read.
>> + * RECOVER bit, when set, should allow the kernel to carry on after
>> + * a tag mismatch. Otherwise die() is called.
>> + */
>> +#define KASAN_ECX_RECOVER	0x20
>> +#define KASAN_ECX_WRITE		0x10
>> +#define KASAN_ECX_SIZE_MASK	0x0f
>> +#define KASAN_ECX_SIZE(ecx)	(1 << ((ecx) & KASAN_ECX_SIZE_MASK))
>>  #define KASAN_SHADOW_SCALE_SHIFT 3
>
>> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
>> index 6b22611e69cc..40fefd306c76 100644
>> --- a/arch/x86/kernel/traps.c
>> +++ b/arch/x86/kernel/traps.c
>> @@ -179,6 +179,9 @@ __always_inline int decode_bug(unsigned long addr, s32 *imm, int *len)
>>  	if (X86_MODRM_REG(v) == 0)	/* EAX */
>>  		return BUG_UD1_UBSAN;
>>
>> +	if (X86_MODRM_REG(v) == 1)	/* ECX */
>> +		return BUG_UD1_KASAN;
>> +
>>  	return BUG_UD1;
>>  }
>>
>> @@ -357,6 +360,11 @@ static noinstr bool handle_bug(struct pt_regs *regs)
>>  		}
>>  		break;
>>
>> +	case BUG_UD1_KASAN:
>> +		kasan_inline_handler(regs);
>> +		handled = true;
>> +		break;
>> +
>>  	default:
>>  		break;
>>  	}
>
>> +void kasan_inline_handler(struct pt_regs *regs)
>> +{
>> +	int metadata = regs->cx;
>> +	u64 addr = regs->di;
>> +	u64 pc = regs->ip;
>> +	bool recover = metadata & KASAN_ECX_RECOVER;
>> +	bool write = metadata & KASAN_ECX_WRITE;
>> +	size_t size = KASAN_ECX_SIZE(metadata);
>> +
>> +	if (user_mode(regs))
>> +		return;
>> +
>> +	if (!kasan_report((void *)addr, size, write, pc))
>> +		return;
>> +
>> +	kasan_die_unless_recover(recover, "Oops - KASAN", regs, metadata, die);
>> +}
>
>I'm confused. Going by the ARM64 code, the meta-data is constant per
>site -- it is encoded in the break immediate.
>
>And I suggested you do the same on x86 by using the single byte
>displacement instruction encoding.
>
>	ud1	0xFF(%ecx), %ecx
>
>Also, we don't have to use a fixed register for the address, you can do:
>
>	ud1	0xFF(%ecx), %reg
>
>and have %reg tell us what register the address is in.
>
>Then you can recover the meta-data from the displacement immediate and
>the address from whatever register is denoted.
>
>This avoids the 'callsite' from having to clobber cx and move the address
>into di.
>
>What you have here will work, and I don't suppose we care about code
>density with KASAN much, but it could've been so much better :/

Thanks for checking the patch out, maybe I got too focused on just
getting clang to work. You're right, I'll try using the displacement
encoding.

I was attempting a few different encodings because clang was fussy about
putting data where I wanted it. The one in the patch worked fine and I
thought it'd be consistent with the form that UBSan uses. But yeah, I'll
work on it more.

I'll also go and rebase my series onto your WARN() hackery one since
there are a lot of changes to traps.c.

Re: [PATCH v6 15/18] x86/kasan: Handle UD1 for inline KASAN reports

Posted by Peter Zijlstra 2 months, 3 weeks ago

On Mon, Nov 17, 2025 at 09:47:20AM +0000, Maciej Wieczór-Retman wrote:

> >> +void kasan_inline_handler(struct pt_regs *regs)
> >> +{
> >> +	int metadata = regs->cx;
> >> +	u64 addr = regs->di;
> >> +	u64 pc = regs->ip;
> >> +	bool recover = metadata & KASAN_ECX_RECOVER;
> >> +	bool write = metadata & KASAN_ECX_WRITE;
> >> +	size_t size = KASAN_ECX_SIZE(metadata);
> >> +
> >> +	if (user_mode(regs))
> >> +		return;
> >> +
> >> +	if (!kasan_report((void *)addr, size, write, pc))
> >> +		return;
> >> +
> >> +	kasan_die_unless_recover(recover, "Oops - KASAN", regs, metadata, die);
> >> +}
> >
> >I'm confused. Going by the ARM64 code, the meta-data is constant per
> >site -- it is encoded in the break immediate.
> >
> >And I suggested you do the same on x86 by using the single byte
> >displacement instruction encoding.
> >
> >	ud1	0xFF(%ecx), %ecx
> >
> >Also, we don't have to use a fixed register for the address, you can do:
> >
> >	ud1	0xFF(%ecx), %reg
> >
> >and have %reg tell us what register the address is in.
> >
> >Then you can recover the meta-data from the displacement immediate and
> >the address from whatever register is denoted.
> >
> >This avoids the 'callsite' from having to clobber cx and move the address
> >into di.
> >
> >What you have here will work, and I don't suppose we care about code
> >density with KASAN much, but it could've been so much better :/
> 
> Thanks for checking the patch out, maybe I got too focused on just
> getting clang to work. You're right, I'll try using the displacement
> encoding.
> 
> I was attempting a few different encodings because clang was fussy about
> putting data where I wanted it. The one in the patch worked fine and I
> thought it'd be consistent with the form that UBSan uses. But yeah, I'll
> work on it more.
> 
> I'll also go and rebase my series onto your WARN() hackery one since
> there are a lot of changes to traps.c.

Thanks!

Re: [PATCH v6 15/18] x86/kasan: Handle UD1 for inline KASAN reports

Posted by Alexander Potapenko 3 months ago

> +++ b/arch/x86/mm/kasan_inline.c

The name kasan_inline.c is confusing: a reader may imply that this
file is used for CONFIG_KASAN_INLINE, or that it contains inline
functions, while neither is true.
I suggest renaming it into something like kasan_sw_tags.c