lib/alloc_tag.c | 12 ++++++++++++ 1 file changed, 12 insertions(+)
From: Hao Ge <gehao@kylinos.cn>
When CONFIG_KASAN is enabled but CONFIG_KASAN_VMALLOC
is not enabled, we may encounter a panic during system boot.
Because we haven't allocated pages and created mappings
for the shadow memory corresponding to module_tags region,
similar to how it is done for execmem_vmalloc.
The difference is that our module_tags are allocated on demand,
so similarly,we also need to allocate shadow memory regions on demand.
However, we still need to adhere to the MODULE_ALIGN principle.
Here is the log for panic:
[ 18.349421] BUG: unable to handle page fault for address: fffffbfff8092000
[ 18.350016] #PF: supervisor read access in kernel mode
[ 18.350459] #PF: error_code(0x0000) - not-present page
[ 18.350904] PGD 20fe52067 P4D 219dc8067 PUD 219dc4067 PMD 102495067 PTE 0
[ 18.351484] Oops: Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI
[ 18.351961] CPU: 5 UID: 0 PID: 1 Comm: systemd Not tainted 6.13.0-rc1+ #3
[ 18.352533] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[ 18.353494] RIP: 0010:kasan_check_range+0xba/0x1b0
[ 18.353931] Code: 8d 5a 07 4c 0f 49 da 49 c1 fb 03 45 85 db 0f 84 dd 00 00 00 45 89 db 4a 8d 14 d8 eb 0d 48 83 c0 08 48 39 c2 0f 84 c1 00 00 00 <48> 83 38 00 74 ed 48 8d 50 08 eb 0d 48 83 c0 01 48 39 d0 0f 84 90
[ 18.355484] RSP: 0018:ff11000101877958 EFLAGS: 00010206
[ 18.355937] RAX: fffffbfff8092000 RBX: fffffbfff809201e RCX: ffffffff82a7ceac
[ 18.356542] RDX: fffffbfff8092018 RSI: 00000000000000f0 RDI: ffffffffc0490000
[ 18.357153] RBP: fffffbfff8092000 R08: 0000000000000001 R09: fffffbfff809201d
[ 18.357756] R10: ffffffffc04900ef R11: 0000000000000003 R12: ffffffffc0490000
[ 18.358365] R13: ff11000101877b48 R14: ffffffffc0490000 R15: 000000000000002c
[ 18.358968] FS: 00007f9bd13c5940(0000) GS:ff110001eb480000(0000) knlGS:0000000000000000
[ 18.359648] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 18.360178] CR2: fffffbfff8092000 CR3: 0000000109214004 CR4: 0000000000771ef0
[ 18.360790] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 18.361404] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 18.362020] PKRU: 55555554
[ 18.362261] Call Trace:
[ 18.362481] <TASK>
[ 18.362671] ? __die+0x23/0x70
[ 18.362964] ? page_fault_oops+0xc2/0x160
[ 18.363318] ? exc_page_fault+0xad/0xc0
[ 18.363680] ? asm_exc_page_fault+0x26/0x30
[ 18.364056] ? move_module+0x3cc/0x8a0
[ 18.364398] ? kasan_check_range+0xba/0x1b0
[ 18.364755] __asan_memcpy+0x3c/0x60
[ 18.365074] move_module+0x3cc/0x8a0
[ 18.365386] layout_and_allocate.constprop.0+0x3d5/0x720
[ 18.365841] ? early_mod_check+0x3dc/0x510
[ 18.366195] load_module+0x72/0x1850
[ 18.366509] ? __pfx_kernel_read_file+0x10/0x10
[ 18.366918] ? vm_mmap_pgoff+0x21c/0x2d0
[ 18.367262] init_module_from_file+0xd1/0x130
[ 18.367638] ? __pfx_init_module_from_file+0x10/0x10
[ 18.368073] ? __pfx__raw_spin_lock+0x10/0x10
[ 18.368456] ? __pfx_cred_has_capability.isra.0+0x10/0x10
[ 18.368938] idempotent_init_module+0x22c/0x790
[ 18.369332] ? simple_getattr+0x6f/0x120
[ 18.369676] ? __pfx_idempotent_init_module+0x10/0x10
[ 18.370110] ? fdget+0x58/0x3a0
[ 18.370393] ? security_capable+0x64/0xf0
[ 18.370745] __x64_sys_finit_module+0xc2/0x140
[ 18.371136] do_syscall_64+0x7d/0x160
[ 18.371459] ? fdget_pos+0x1c8/0x4c0
[ 18.371784] ? ksys_read+0xfd/0x1d0
[ 18.372106] ? syscall_exit_to_user_mode+0x10/0x1f0
[ 18.372525] ? do_syscall_64+0x89/0x160
[ 18.372860] ? do_syscall_64+0x89/0x160
[ 18.373194] ? do_syscall_64+0x89/0x160
[ 18.373527] ? syscall_exit_to_user_mode+0x10/0x1f0
[ 18.373952] ? do_syscall_64+0x89/0x160
[ 18.374283] ? syscall_exit_to_user_mode+0x10/0x1f0
[ 18.374701] ? do_syscall_64+0x89/0x160
[ 18.375037] ? do_user_addr_fault+0x4a8/0xa40
[ 18.375416] ? clear_bhb_loop+0x25/0x80
[ 18.375748] ? clear_bhb_loop+0x25/0x80
[ 18.376119] ? clear_bhb_loop+0x25/0x80
[ 18.376450] entry_SYSCALL_64_after_hwframe+0x76/0x7e
Fixes: 233e89322cbe ("alloc_tag: fix module allocation tags populated area calculation")
Reported-by: Ben Greear <greearb@candelatech.com>
Closes: https://lore.kernel.org/all/1ba0cc57-e2ed-caa2-1241-aa5615bee01f@candelatech.com/
Signed-off-by: Hao Ge <gehao@kylinos.cn>
---
v2: Add comments to facilitate understanding of the code.
Add align nr << PAGE_SHIFT to MODULE_ALIGN,even though kasan_alloc_module_shadow
already handles this internally,but to make the code more readable and user-friendly
commit 233e89322cbe ("alloc_tag: fix module allocation
tags populated area calculation") is currently in the
mm-hotfixes-unstable branch, so this patch is
developed based on the mm-hotfixes-unstable branch.
---
lib/alloc_tag.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index f942408b53ef..bd3ee57ea13f 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -10,6 +10,7 @@
#include <linux/seq_buf.h>
#include <linux/seq_file.h>
#include <linux/vmalloc.h>
+#include <linux/math.h>
#define ALLOCINFO_FILE_NAME "allocinfo"
#define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag))
@@ -422,6 +423,17 @@ static int vm_module_tags_populate(void)
return -ENOMEM;
}
vm_module_tags->nr_pages += nr;
+
+ /*
+ * Kasan allocates 1 byte of shadow for every 8 bytes of data.
+ * When kasan_alloc_module_shadow allocates shadow memory,
+ * it does so in units of pages.
+ * Therefore, here we need to align to MODULE_ALIGN.
+ */
+ if ((phys_end & (MODULE_ALIGN - 1)) == 0)
+ kasan_alloc_module_shadow((void *)phys_end,
+ round_up(nr << PAGE_SHIFT, MODULE_ALIGN),
+ GFP_KERNEL);
}
/*
--
2.25.1
On Mon, Dec 9, 2024 at 10:53 PM Hao Ge <hao.ge@linux.dev> wrote: > > From: Hao Ge <gehao@kylinos.cn> > > When CONFIG_KASAN is enabled but CONFIG_KASAN_VMALLOC > is not enabled, we may encounter a panic during system boot. > > Because we haven't allocated pages and created mappings > for the shadow memory corresponding to module_tags region, > similar to how it is done for execmem_vmalloc. > > The difference is that our module_tags are allocated on demand, > so similarly,we also need to allocate shadow memory regions on demand. > However, we still need to adhere to the MODULE_ALIGN principle. > > Here is the log for panic: > > [ 18.349421] BUG: unable to handle page fault for address: fffffbfff8092000 > [ 18.350016] #PF: supervisor read access in kernel mode > [ 18.350459] #PF: error_code(0x0000) - not-present page > [ 18.350904] PGD 20fe52067 P4D 219dc8067 PUD 219dc4067 PMD 102495067 PTE 0 > [ 18.351484] Oops: Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI > [ 18.351961] CPU: 5 UID: 0 PID: 1 Comm: systemd Not tainted 6.13.0-rc1+ #3 > [ 18.352533] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 > [ 18.353494] RIP: 0010:kasan_check_range+0xba/0x1b0 > [ 18.353931] Code: 8d 5a 07 4c 0f 49 da 49 c1 fb 03 45 85 db 0f 84 dd 00 00 00 45 89 db 4a 8d 14 d8 eb 0d 48 83 c0 08 48 39 c2 0f 84 c1 00 00 00 <48> 83 38 00 74 ed 48 8d 50 08 eb 0d 48 83 c0 01 48 39 d0 0f 84 90 > [ 18.355484] RSP: 0018:ff11000101877958 EFLAGS: 00010206 > [ 18.355937] RAX: fffffbfff8092000 RBX: fffffbfff809201e RCX: ffffffff82a7ceac > [ 18.356542] RDX: fffffbfff8092018 RSI: 00000000000000f0 RDI: ffffffffc0490000 > [ 18.357153] RBP: fffffbfff8092000 R08: 0000000000000001 R09: fffffbfff809201d > [ 18.357756] R10: ffffffffc04900ef R11: 0000000000000003 R12: ffffffffc0490000 > [ 18.358365] R13: ff11000101877b48 R14: ffffffffc0490000 R15: 000000000000002c > [ 18.358968] FS: 00007f9bd13c5940(0000) GS:ff110001eb480000(0000) knlGS:0000000000000000 > [ 18.359648] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ 18.360178] CR2: fffffbfff8092000 CR3: 0000000109214004 CR4: 0000000000771ef0 > [ 18.360790] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > [ 18.361404] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > [ 18.362020] PKRU: 55555554 > [ 18.362261] Call Trace: > [ 18.362481] <TASK> > [ 18.362671] ? __die+0x23/0x70 > [ 18.362964] ? page_fault_oops+0xc2/0x160 > [ 18.363318] ? exc_page_fault+0xad/0xc0 > [ 18.363680] ? asm_exc_page_fault+0x26/0x30 > [ 18.364056] ? move_module+0x3cc/0x8a0 > [ 18.364398] ? kasan_check_range+0xba/0x1b0 > [ 18.364755] __asan_memcpy+0x3c/0x60 > [ 18.365074] move_module+0x3cc/0x8a0 > [ 18.365386] layout_and_allocate.constprop.0+0x3d5/0x720 > [ 18.365841] ? early_mod_check+0x3dc/0x510 > [ 18.366195] load_module+0x72/0x1850 > [ 18.366509] ? __pfx_kernel_read_file+0x10/0x10 > [ 18.366918] ? vm_mmap_pgoff+0x21c/0x2d0 > [ 18.367262] init_module_from_file+0xd1/0x130 > [ 18.367638] ? __pfx_init_module_from_file+0x10/0x10 > [ 18.368073] ? __pfx__raw_spin_lock+0x10/0x10 > [ 18.368456] ? __pfx_cred_has_capability.isra.0+0x10/0x10 > [ 18.368938] idempotent_init_module+0x22c/0x790 > [ 18.369332] ? simple_getattr+0x6f/0x120 > [ 18.369676] ? __pfx_idempotent_init_module+0x10/0x10 > [ 18.370110] ? fdget+0x58/0x3a0 > [ 18.370393] ? security_capable+0x64/0xf0 > [ 18.370745] __x64_sys_finit_module+0xc2/0x140 > [ 18.371136] do_syscall_64+0x7d/0x160 > [ 18.371459] ? fdget_pos+0x1c8/0x4c0 > [ 18.371784] ? ksys_read+0xfd/0x1d0 > [ 18.372106] ? syscall_exit_to_user_mode+0x10/0x1f0 > [ 18.372525] ? do_syscall_64+0x89/0x160 > [ 18.372860] ? do_syscall_64+0x89/0x160 > [ 18.373194] ? do_syscall_64+0x89/0x160 > [ 18.373527] ? syscall_exit_to_user_mode+0x10/0x1f0 > [ 18.373952] ? do_syscall_64+0x89/0x160 > [ 18.374283] ? syscall_exit_to_user_mode+0x10/0x1f0 > [ 18.374701] ? do_syscall_64+0x89/0x160 > [ 18.375037] ? do_user_addr_fault+0x4a8/0xa40 > [ 18.375416] ? clear_bhb_loop+0x25/0x80 > [ 18.375748] ? clear_bhb_loop+0x25/0x80 > [ 18.376119] ? clear_bhb_loop+0x25/0x80 > [ 18.376450] entry_SYSCALL_64_after_hwframe+0x76/0x7e > > Fixes: 233e89322cbe ("alloc_tag: fix module allocation tags populated area calculation") > Reported-by: Ben Greear <greearb@candelatech.com> > Closes: https://lore.kernel.org/all/1ba0cc57-e2ed-caa2-1241-aa5615bee01f@candelatech.com/ > Signed-off-by: Hao Ge <gehao@kylinos.cn> > --- > v2: Add comments to facilitate understanding of the code. > Add align nr << PAGE_SHIFT to MODULE_ALIGN,even though kasan_alloc_module_shadow > already handles this internally,but to make the code more readable and user-friendly > > commit 233e89322cbe ("alloc_tag: fix module allocation > tags populated area calculation") is currently in the > mm-hotfixes-unstable branch, so this patch is > developed based on the mm-hotfixes-unstable branch. > --- > lib/alloc_tag.c | 12 ++++++++++++ > 1 file changed, 12 insertions(+) > > diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c > index f942408b53ef..bd3ee57ea13f 100644 > --- a/lib/alloc_tag.c > +++ b/lib/alloc_tag.c > @@ -10,6 +10,7 @@ > #include <linux/seq_buf.h> > #include <linux/seq_file.h> > #include <linux/vmalloc.h> > +#include <linux/math.h> > > #define ALLOCINFO_FILE_NAME "allocinfo" > #define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag)) > @@ -422,6 +423,17 @@ static int vm_module_tags_populate(void) > return -ENOMEM; > } > vm_module_tags->nr_pages += nr; > + > + /* > + * Kasan allocates 1 byte of shadow for every 8 bytes of data. > + * When kasan_alloc_module_shadow allocates shadow memory, > + * it does so in units of pages. > + * Therefore, here we need to align to MODULE_ALIGN. > + */ > + if ((phys_end & (MODULE_ALIGN - 1)) == 0) phys_end is calculated as: unsigned long phys_end = ALIGN_DOWN(module_tags.start_addr, PAGE_SIZE) + (vm_module_tags->nr_pages << PAGE_SHIFT); and therefore is always PAGE_SIZE-aligned. PAGE_SIZE is always a multiple of MODULE_ALIGN, therefore phys_end is always MODULE_ALIGN-aligned and the above condition is not needed. > + kasan_alloc_module_shadow((void *)phys_end, > + round_up(nr << PAGE_SHIFT, MODULE_ALIGN), Here again, (nr << PAGE_SHIFT) is PAGE_SIZE-aligned and PAGE_SIZE is a multiple of MODULE_ALIGN, therefore (nr << PAGE_SHIFT) is always multiple of MODULE_ALIGN and there is no need for round_up(). IOW, I think this patch should simply add one line: vm_module_tags->nr_pages += nr; + kasan_alloc_module_shadow((void *)phys_end, nr << PAGE_SHIFT, GFP_KERNEL); Am I missing something? > + GFP_KERNEL); > } > > /* > -- > 2.25.1 >
On 12/10/24 09:55, Suren Baghdasaryan wrote: > On Mon, Dec 9, 2024 at 10:53 PM Hao Ge <hao.ge@linux.dev> wrote: >> >> From: Hao Ge <gehao@kylinos.cn> >> >> When CONFIG_KASAN is enabled but CONFIG_KASAN_VMALLOC >> is not enabled, we may encounter a panic during system boot. >> >> Because we haven't allocated pages and created mappings >> for the shadow memory corresponding to module_tags region, >> similar to how it is done for execmem_vmalloc. >> >> The difference is that our module_tags are allocated on demand, >> so similarly,we also need to allocate shadow memory regions on demand. >> However, we still need to adhere to the MODULE_ALIGN principle. Hello, I applied this patch: https://lore.kernel.org/all/20241130001423.1114965-1-surenb@google.com/raw as well as the v2 patch in this email thread, on top of today's Linus kernel (plus local patches that should be un-related to this particular issue) and now my system boots when my current mix of kernel debugging is enabled. Thanks for the fix! --Ben -- Ben Greear <greearb@candelatech.com> Candela Technologies Inc http://www.candelatech.com
Hi Suren Thanks for your review. On 12/11/24 01:55, Suren Baghdasaryan wrote: > On Mon, Dec 9, 2024 at 10:53 PM Hao Ge <hao.ge@linux.dev> wrote: >> From: Hao Ge <gehao@kylinos.cn> >> >> When CONFIG_KASAN is enabled but CONFIG_KASAN_VMALLOC >> is not enabled, we may encounter a panic during system boot. >> >> Because we haven't allocated pages and created mappings >> for the shadow memory corresponding to module_tags region, >> similar to how it is done for execmem_vmalloc. >> >> The difference is that our module_tags are allocated on demand, >> so similarly,we also need to allocate shadow memory regions on demand. >> However, we still need to adhere to the MODULE_ALIGN principle. >> >> Here is the log for panic: >> >> [ 18.349421] BUG: unable to handle page fault for address: fffffbfff8092000 >> [ 18.350016] #PF: supervisor read access in kernel mode >> [ 18.350459] #PF: error_code(0x0000) - not-present page >> [ 18.350904] PGD 20fe52067 P4D 219dc8067 PUD 219dc4067 PMD 102495067 PTE 0 >> [ 18.351484] Oops: Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI >> [ 18.351961] CPU: 5 UID: 0 PID: 1 Comm: systemd Not tainted 6.13.0-rc1+ #3 >> [ 18.352533] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 >> [ 18.353494] RIP: 0010:kasan_check_range+0xba/0x1b0 >> [ 18.353931] Code: 8d 5a 07 4c 0f 49 da 49 c1 fb 03 45 85 db 0f 84 dd 00 00 00 45 89 db 4a 8d 14 d8 eb 0d 48 83 c0 08 48 39 c2 0f 84 c1 00 00 00 <48> 83 38 00 74 ed 48 8d 50 08 eb 0d 48 83 c0 01 48 39 d0 0f 84 90 >> [ 18.355484] RSP: 0018:ff11000101877958 EFLAGS: 00010206 >> [ 18.355937] RAX: fffffbfff8092000 RBX: fffffbfff809201e RCX: ffffffff82a7ceac >> [ 18.356542] RDX: fffffbfff8092018 RSI: 00000000000000f0 RDI: ffffffffc0490000 >> [ 18.357153] RBP: fffffbfff8092000 R08: 0000000000000001 R09: fffffbfff809201d >> [ 18.357756] R10: ffffffffc04900ef R11: 0000000000000003 R12: ffffffffc0490000 >> [ 18.358365] R13: ff11000101877b48 R14: ffffffffc0490000 R15: 000000000000002c >> [ 18.358968] FS: 00007f9bd13c5940(0000) GS:ff110001eb480000(0000) knlGS:0000000000000000 >> [ 18.359648] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 >> [ 18.360178] CR2: fffffbfff8092000 CR3: 0000000109214004 CR4: 0000000000771ef0 >> [ 18.360790] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 >> [ 18.361404] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 >> [ 18.362020] PKRU: 55555554 >> [ 18.362261] Call Trace: >> [ 18.362481] <TASK> >> [ 18.362671] ? __die+0x23/0x70 >> [ 18.362964] ? page_fault_oops+0xc2/0x160 >> [ 18.363318] ? exc_page_fault+0xad/0xc0 >> [ 18.363680] ? asm_exc_page_fault+0x26/0x30 >> [ 18.364056] ? move_module+0x3cc/0x8a0 >> [ 18.364398] ? kasan_check_range+0xba/0x1b0 >> [ 18.364755] __asan_memcpy+0x3c/0x60 >> [ 18.365074] move_module+0x3cc/0x8a0 >> [ 18.365386] layout_and_allocate.constprop.0+0x3d5/0x720 >> [ 18.365841] ? early_mod_check+0x3dc/0x510 >> [ 18.366195] load_module+0x72/0x1850 >> [ 18.366509] ? __pfx_kernel_read_file+0x10/0x10 >> [ 18.366918] ? vm_mmap_pgoff+0x21c/0x2d0 >> [ 18.367262] init_module_from_file+0xd1/0x130 >> [ 18.367638] ? __pfx_init_module_from_file+0x10/0x10 >> [ 18.368073] ? __pfx__raw_spin_lock+0x10/0x10 >> [ 18.368456] ? __pfx_cred_has_capability.isra.0+0x10/0x10 >> [ 18.368938] idempotent_init_module+0x22c/0x790 >> [ 18.369332] ? simple_getattr+0x6f/0x120 >> [ 18.369676] ? __pfx_idempotent_init_module+0x10/0x10 >> [ 18.370110] ? fdget+0x58/0x3a0 >> [ 18.370393] ? security_capable+0x64/0xf0 >> [ 18.370745] __x64_sys_finit_module+0xc2/0x140 >> [ 18.371136] do_syscall_64+0x7d/0x160 >> [ 18.371459] ? fdget_pos+0x1c8/0x4c0 >> [ 18.371784] ? ksys_read+0xfd/0x1d0 >> [ 18.372106] ? syscall_exit_to_user_mode+0x10/0x1f0 >> [ 18.372525] ? do_syscall_64+0x89/0x160 >> [ 18.372860] ? do_syscall_64+0x89/0x160 >> [ 18.373194] ? do_syscall_64+0x89/0x160 >> [ 18.373527] ? syscall_exit_to_user_mode+0x10/0x1f0 >> [ 18.373952] ? do_syscall_64+0x89/0x160 >> [ 18.374283] ? syscall_exit_to_user_mode+0x10/0x1f0 >> [ 18.374701] ? do_syscall_64+0x89/0x160 >> [ 18.375037] ? do_user_addr_fault+0x4a8/0xa40 >> [ 18.375416] ? clear_bhb_loop+0x25/0x80 >> [ 18.375748] ? clear_bhb_loop+0x25/0x80 >> [ 18.376119] ? clear_bhb_loop+0x25/0x80 >> [ 18.376450] entry_SYSCALL_64_after_hwframe+0x76/0x7e >> >> Fixes: 233e89322cbe ("alloc_tag: fix module allocation tags populated area calculation") >> Reported-by: Ben Greear <greearb@candelatech.com> >> Closes: https://lore.kernel.org/all/1ba0cc57-e2ed-caa2-1241-aa5615bee01f@candelatech.com/ >> Signed-off-by: Hao Ge <gehao@kylinos.cn> >> --- >> v2: Add comments to facilitate understanding of the code. >> Add align nr << PAGE_SHIFT to MODULE_ALIGN,even though kasan_alloc_module_shadow >> already handles this internally,but to make the code more readable and user-friendly >> >> commit 233e89322cbe ("alloc_tag: fix module allocation >> tags populated area calculation") is currently in the >> mm-hotfixes-unstable branch, so this patch is >> developed based on the mm-hotfixes-unstable branch. >> --- >> lib/alloc_tag.c | 12 ++++++++++++ >> 1 file changed, 12 insertions(+) >> >> diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c >> index f942408b53ef..bd3ee57ea13f 100644 >> --- a/lib/alloc_tag.c >> +++ b/lib/alloc_tag.c >> @@ -10,6 +10,7 @@ >> #include <linux/seq_buf.h> >> #include <linux/seq_file.h> >> #include <linux/vmalloc.h> >> +#include <linux/math.h> >> >> #define ALLOCINFO_FILE_NAME "allocinfo" >> #define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag)) >> @@ -422,6 +423,17 @@ static int vm_module_tags_populate(void) >> return -ENOMEM; >> } >> vm_module_tags->nr_pages += nr; >> + >> + /* >> + * Kasan allocates 1 byte of shadow for every 8 bytes of data. >> + * When kasan_alloc_module_shadow allocates shadow memory, >> + * it does so in units of pages. >> + * Therefore, here we need to align to MODULE_ALIGN. >> + */ >> + if ((phys_end & (MODULE_ALIGN - 1)) == 0) > phys_end is calculated as: > > unsigned long phys_end = ALIGN_DOWN(module_tags.start_addr, PAGE_SIZE) + > (vm_module_tags->nr_pages > << PAGE_SHIFT); > > and therefore is always PAGE_SIZE-aligned. PAGE_SIZE is always a > multiple of MODULE_ALIGN, therefore phys_end is always When CONFIG_KASAN_VMALLOC is not enabled #define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) https://elixir.bootlin.com/linux/v6.13-rc2/source/include/linux/execmem.h#L11 and On x86, KASAN_SHADOW_SCALE_SHIFT is set to 3 https://elixir.bootlin.com/linux/v6.13-rc2/source/arch/x86/include/asm/kasan.h#L7 As mentioned in my comment, Kasan allocates 1 byte of shadow for every 8 bytes of data So, when you allocate a shadow page through kasan_alloc_module_shadow, it corresponds to eight physical pages in our system. So, we need MODULE_ALIGN to ensure proper alignment when allocating shadow memory for modules using KASAN. Let's take a look at the kasan_alloc_module_shadow function again As I mentioned earlier,Kasan allocates 1 byte of shadow for every 8 bytes of data. Assuming phys_end is set to 0 for the sake of this example, if you allocate a single shadow page, the corresponding address range it can represent would be [0, 0x7FFFF]. So, it is incorrect to call kasan_alloc_module_shadow every time a page is allocated, as it can trigger warnings in the system. https://elixir.bootlin.com/linux/v6.13-rc2/source/mm/kasan/shadow.c#L599 Thanks Best Regards Hao > MODULE_ALIGN-aligned and the above condition is not needed. > >> + kasan_alloc_module_shadow((void *)phys_end, >> + round_up(nr << PAGE_SHIFT, MODULE_ALIGN), > Here again, (nr << PAGE_SHIFT) is PAGE_SIZE-aligned and PAGE_SIZE is a > multiple of MODULE_ALIGN, therefore (nr << PAGE_SHIFT) is always > multiple of MODULE_ALIGN and there is no need for round_up(). > > IOW, I think this patch should simply add one line: > > vm_module_tags->nr_pages += nr; > + kasan_alloc_module_shadow((void *)phys_end, nr << > PAGE_SHIFT, GFP_KERNEL); > > Am I missing something? > >> + GFP_KERNEL); >> } >> >> /* >> -- >> 2.25.1 >>
On Tue, Dec 10, 2024 at 10:46 AM Hao Ge <hao.ge@linux.dev> wrote: > > Hi Suren > > > Thanks for your review. > > > On 12/11/24 01:55, Suren Baghdasaryan wrote: > > On Mon, Dec 9, 2024 at 10:53 PM Hao Ge <hao.ge@linux.dev> wrote: > >> From: Hao Ge <gehao@kylinos.cn> > >> > >> When CONFIG_KASAN is enabled but CONFIG_KASAN_VMALLOC > >> is not enabled, we may encounter a panic during system boot. > >> > >> Because we haven't allocated pages and created mappings > >> for the shadow memory corresponding to module_tags region, > >> similar to how it is done for execmem_vmalloc. > >> > >> The difference is that our module_tags are allocated on demand, > >> so similarly,we also need to allocate shadow memory regions on demand. > >> However, we still need to adhere to the MODULE_ALIGN principle. > >> > >> Here is the log for panic: > >> > >> [ 18.349421] BUG: unable to handle page fault for address: fffffbfff8092000 > >> [ 18.350016] #PF: supervisor read access in kernel mode > >> [ 18.350459] #PF: error_code(0x0000) - not-present page > >> [ 18.350904] PGD 20fe52067 P4D 219dc8067 PUD 219dc4067 PMD 102495067 PTE 0 > >> [ 18.351484] Oops: Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI > >> [ 18.351961] CPU: 5 UID: 0 PID: 1 Comm: systemd Not tainted 6.13.0-rc1+ #3 > >> [ 18.352533] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 > >> [ 18.353494] RIP: 0010:kasan_check_range+0xba/0x1b0 > >> [ 18.353931] Code: 8d 5a 07 4c 0f 49 da 49 c1 fb 03 45 85 db 0f 84 dd 00 00 00 45 89 db 4a 8d 14 d8 eb 0d 48 83 c0 08 48 39 c2 0f 84 c1 00 00 00 <48> 83 38 00 74 ed 48 8d 50 08 eb 0d 48 83 c0 01 48 39 d0 0f 84 90 > >> [ 18.355484] RSP: 0018:ff11000101877958 EFLAGS: 00010206 > >> [ 18.355937] RAX: fffffbfff8092000 RBX: fffffbfff809201e RCX: ffffffff82a7ceac > >> [ 18.356542] RDX: fffffbfff8092018 RSI: 00000000000000f0 RDI: ffffffffc0490000 > >> [ 18.357153] RBP: fffffbfff8092000 R08: 0000000000000001 R09: fffffbfff809201d > >> [ 18.357756] R10: ffffffffc04900ef R11: 0000000000000003 R12: ffffffffc0490000 > >> [ 18.358365] R13: ff11000101877b48 R14: ffffffffc0490000 R15: 000000000000002c > >> [ 18.358968] FS: 00007f9bd13c5940(0000) GS:ff110001eb480000(0000) knlGS:0000000000000000 > >> [ 18.359648] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > >> [ 18.360178] CR2: fffffbfff8092000 CR3: 0000000109214004 CR4: 0000000000771ef0 > >> [ 18.360790] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > >> [ 18.361404] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > >> [ 18.362020] PKRU: 55555554 > >> [ 18.362261] Call Trace: > >> [ 18.362481] <TASK> > >> [ 18.362671] ? __die+0x23/0x70 > >> [ 18.362964] ? page_fault_oops+0xc2/0x160 > >> [ 18.363318] ? exc_page_fault+0xad/0xc0 > >> [ 18.363680] ? asm_exc_page_fault+0x26/0x30 > >> [ 18.364056] ? move_module+0x3cc/0x8a0 > >> [ 18.364398] ? kasan_check_range+0xba/0x1b0 > >> [ 18.364755] __asan_memcpy+0x3c/0x60 > >> [ 18.365074] move_module+0x3cc/0x8a0 > >> [ 18.365386] layout_and_allocate.constprop.0+0x3d5/0x720 > >> [ 18.365841] ? early_mod_check+0x3dc/0x510 > >> [ 18.366195] load_module+0x72/0x1850 > >> [ 18.366509] ? __pfx_kernel_read_file+0x10/0x10 > >> [ 18.366918] ? vm_mmap_pgoff+0x21c/0x2d0 > >> [ 18.367262] init_module_from_file+0xd1/0x130 > >> [ 18.367638] ? __pfx_init_module_from_file+0x10/0x10 > >> [ 18.368073] ? __pfx__raw_spin_lock+0x10/0x10 > >> [ 18.368456] ? __pfx_cred_has_capability.isra.0+0x10/0x10 > >> [ 18.368938] idempotent_init_module+0x22c/0x790 > >> [ 18.369332] ? simple_getattr+0x6f/0x120 > >> [ 18.369676] ? __pfx_idempotent_init_module+0x10/0x10 > >> [ 18.370110] ? fdget+0x58/0x3a0 > >> [ 18.370393] ? security_capable+0x64/0xf0 > >> [ 18.370745] __x64_sys_finit_module+0xc2/0x140 > >> [ 18.371136] do_syscall_64+0x7d/0x160 > >> [ 18.371459] ? fdget_pos+0x1c8/0x4c0 > >> [ 18.371784] ? ksys_read+0xfd/0x1d0 > >> [ 18.372106] ? syscall_exit_to_user_mode+0x10/0x1f0 > >> [ 18.372525] ? do_syscall_64+0x89/0x160 > >> [ 18.372860] ? do_syscall_64+0x89/0x160 > >> [ 18.373194] ? do_syscall_64+0x89/0x160 > >> [ 18.373527] ? syscall_exit_to_user_mode+0x10/0x1f0 > >> [ 18.373952] ? do_syscall_64+0x89/0x160 > >> [ 18.374283] ? syscall_exit_to_user_mode+0x10/0x1f0 > >> [ 18.374701] ? do_syscall_64+0x89/0x160 > >> [ 18.375037] ? do_user_addr_fault+0x4a8/0xa40 > >> [ 18.375416] ? clear_bhb_loop+0x25/0x80 > >> [ 18.375748] ? clear_bhb_loop+0x25/0x80 > >> [ 18.376119] ? clear_bhb_loop+0x25/0x80 > >> [ 18.376450] entry_SYSCALL_64_after_hwframe+0x76/0x7e > >> > >> Fixes: 233e89322cbe ("alloc_tag: fix module allocation tags populated area calculation") > >> Reported-by: Ben Greear <greearb@candelatech.com> > >> Closes: https://lore.kernel.org/all/1ba0cc57-e2ed-caa2-1241-aa5615bee01f@candelatech.com/ > >> Signed-off-by: Hao Ge <gehao@kylinos.cn> > >> --- > >> v2: Add comments to facilitate understanding of the code. > >> Add align nr << PAGE_SHIFT to MODULE_ALIGN,even though kasan_alloc_module_shadow > >> already handles this internally,but to make the code more readable and user-friendly > >> > >> commit 233e89322cbe ("alloc_tag: fix module allocation > >> tags populated area calculation") is currently in the > >> mm-hotfixes-unstable branch, so this patch is > >> developed based on the mm-hotfixes-unstable branch. > >> --- > >> lib/alloc_tag.c | 12 ++++++++++++ > >> 1 file changed, 12 insertions(+) > >> > >> diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c > >> index f942408b53ef..bd3ee57ea13f 100644 > >> --- a/lib/alloc_tag.c > >> +++ b/lib/alloc_tag.c > >> @@ -10,6 +10,7 @@ > >> #include <linux/seq_buf.h> > >> #include <linux/seq_file.h> > >> #include <linux/vmalloc.h> > >> +#include <linux/math.h> > >> > >> #define ALLOCINFO_FILE_NAME "allocinfo" > >> #define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag)) > >> @@ -422,6 +423,17 @@ static int vm_module_tags_populate(void) > >> return -ENOMEM; > >> } > >> vm_module_tags->nr_pages += nr; > >> + > >> + /* > >> + * Kasan allocates 1 byte of shadow for every 8 bytes of data. > >> + * When kasan_alloc_module_shadow allocates shadow memory, > >> + * it does so in units of pages. > >> + * Therefore, here we need to align to MODULE_ALIGN. > >> + */ > >> + if ((phys_end & (MODULE_ALIGN - 1)) == 0) > > phys_end is calculated as: > > > > unsigned long phys_end = ALIGN_DOWN(module_tags.start_addr, PAGE_SIZE) + > > (vm_module_tags->nr_pages > > << PAGE_SHIFT); > > > > and therefore is always PAGE_SIZE-aligned. PAGE_SIZE is always a > > multiple of MODULE_ALIGN, therefore phys_end is always > > When CONFIG_KASAN_VMALLOC is not enabled > > #define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) Ah, sorry, I misread this as (PAGE_SIZE >> KASAN_SHADOW_SCALE_SHIFT) and assumed MODULE_ALIGN is always multiple of PAGE_SIZE. Now it makes more sense. However I'm still not sure about this condition: if ((phys_end & (MODULE_ALIGN - 1)) == 0) What if page_end is not MODULE_ALIGN-aligned. We will be skipping kasan_alloc_module_shadow(). For example, say module_tags.start_addr == 0x1018 (4096+24), original phys_end will be 0x1000 (4096) and say we allocated one page (nr == 1), tags area is [0x1000-0x2000]. phys_end is not MODULE_ALIGN-aligned and we will skip kasan_alloc_module_shadow(). IIUC, this is already incorrect. Now, say the next time we allocate 8 pages. phys_end this time is 0x2000 and the new tags area spans [0x1000-0xA000], we skip kasan_alloc_module_shadow() again. Next time we allocate pages, phys_end is 0xA000 and it again is not MODULE_ALIGN-aligned, we skip again. You see my point? > > https://elixir.bootlin.com/linux/v6.13-rc2/source/include/linux/execmem.h#L11 > > and On x86, KASAN_SHADOW_SCALE_SHIFT is set to 3 > > https://elixir.bootlin.com/linux/v6.13-rc2/source/arch/x86/include/asm/kasan.h#L7 > > As mentioned in my comment, Kasan allocates 1 byte of shadow for every 8 > bytes of data > > So, when you allocate a shadow page through kasan_alloc_module_shadow, > it corresponds to eight physical pages in our system. > > So, we need MODULE_ALIGN to ensure proper alignment when allocating > shadow memory for modules using KASAN. > > Let's take a look at the kasan_alloc_module_shadow function again > > As I mentioned earlier,Kasan allocates 1 byte of shadow for every 8 > bytes of data. > > Assuming phys_end is set to 0 for the sake of this example, if you > allocate a single shadow page, > > the corresponding address range it can represent would be [0, 0x7FFFF]. > > So, it is incorrect to call kasan_alloc_module_shadow every time a page > is allocated, as it can trigger warnings in the system. > > https://elixir.bootlin.com/linux/v6.13-rc2/source/mm/kasan/shadow.c#L599 > > Thanks > > Best Regards Hao > > > MODULE_ALIGN-aligned and the above condition is not needed. > > > >> + kasan_alloc_module_shadow((void *)phys_end, > >> + round_up(nr << PAGE_SHIFT, MODULE_ALIGN), > > Here again, (nr << PAGE_SHIFT) is PAGE_SIZE-aligned and PAGE_SIZE is a > > multiple of MODULE_ALIGN, therefore (nr << PAGE_SHIFT) is always > > multiple of MODULE_ALIGN and there is no need for round_up(). > > > > IOW, I think this patch should simply add one line: > > > > vm_module_tags->nr_pages += nr; > > + kasan_alloc_module_shadow((void *)phys_end, nr << > > PAGE_SHIFT, GFP_KERNEL); > > > > Am I missing something? > > > > > >> + GFP_KERNEL); > >> } > >> > >> /* > >> -- > >> 2.25.1 > >>
Hi Suren On 12/11/24 03:20, Suren Baghdasaryan wrote: > On Tue, Dec 10, 2024 at 10:46 AM Hao Ge <hao.ge@linux.dev> wrote: >> Hi Suren >> >> >> Thanks for your review. >> >> >> On 12/11/24 01:55, Suren Baghdasaryan wrote: >>> On Mon, Dec 9, 2024 at 10:53 PM Hao Ge <hao.ge@linux.dev> wrote: >>>> From: Hao Ge <gehao@kylinos.cn> >>>> >>>> When CONFIG_KASAN is enabled but CONFIG_KASAN_VMALLOC >>>> is not enabled, we may encounter a panic during system boot. >>>> >>>> Because we haven't allocated pages and created mappings >>>> for the shadow memory corresponding to module_tags region, >>>> similar to how it is done for execmem_vmalloc. >>>> >>>> The difference is that our module_tags are allocated on demand, >>>> so similarly,we also need to allocate shadow memory regions on demand. >>>> However, we still need to adhere to the MODULE_ALIGN principle. >>>> >>>> Here is the log for panic: >>>> >>>> [ 18.349421] BUG: unable to handle page fault for address: fffffbfff8092000 >>>> [ 18.350016] #PF: supervisor read access in kernel mode >>>> [ 18.350459] #PF: error_code(0x0000) - not-present page >>>> [ 18.350904] PGD 20fe52067 P4D 219dc8067 PUD 219dc4067 PMD 102495067 PTE 0 >>>> [ 18.351484] Oops: Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI >>>> [ 18.351961] CPU: 5 UID: 0 PID: 1 Comm: systemd Not tainted 6.13.0-rc1+ #3 >>>> [ 18.352533] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 >>>> [ 18.353494] RIP: 0010:kasan_check_range+0xba/0x1b0 >>>> [ 18.353931] Code: 8d 5a 07 4c 0f 49 da 49 c1 fb 03 45 85 db 0f 84 dd 00 00 00 45 89 db 4a 8d 14 d8 eb 0d 48 83 c0 08 48 39 c2 0f 84 c1 00 00 00 <48> 83 38 00 74 ed 48 8d 50 08 eb 0d 48 83 c0 01 48 39 d0 0f 84 90 >>>> [ 18.355484] RSP: 0018:ff11000101877958 EFLAGS: 00010206 >>>> [ 18.355937] RAX: fffffbfff8092000 RBX: fffffbfff809201e RCX: ffffffff82a7ceac >>>> [ 18.356542] RDX: fffffbfff8092018 RSI: 00000000000000f0 RDI: ffffffffc0490000 >>>> [ 18.357153] RBP: fffffbfff8092000 R08: 0000000000000001 R09: fffffbfff809201d >>>> [ 18.357756] R10: ffffffffc04900ef R11: 0000000000000003 R12: ffffffffc0490000 >>>> [ 18.358365] R13: ff11000101877b48 R14: ffffffffc0490000 R15: 000000000000002c >>>> [ 18.358968] FS: 00007f9bd13c5940(0000) GS:ff110001eb480000(0000) knlGS:0000000000000000 >>>> [ 18.359648] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 >>>> [ 18.360178] CR2: fffffbfff8092000 CR3: 0000000109214004 CR4: 0000000000771ef0 >>>> [ 18.360790] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 >>>> [ 18.361404] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 >>>> [ 18.362020] PKRU: 55555554 >>>> [ 18.362261] Call Trace: >>>> [ 18.362481] <TASK> >>>> [ 18.362671] ? __die+0x23/0x70 >>>> [ 18.362964] ? page_fault_oops+0xc2/0x160 >>>> [ 18.363318] ? exc_page_fault+0xad/0xc0 >>>> [ 18.363680] ? asm_exc_page_fault+0x26/0x30 >>>> [ 18.364056] ? move_module+0x3cc/0x8a0 >>>> [ 18.364398] ? kasan_check_range+0xba/0x1b0 >>>> [ 18.364755] __asan_memcpy+0x3c/0x60 >>>> [ 18.365074] move_module+0x3cc/0x8a0 >>>> [ 18.365386] layout_and_allocate.constprop.0+0x3d5/0x720 >>>> [ 18.365841] ? early_mod_check+0x3dc/0x510 >>>> [ 18.366195] load_module+0x72/0x1850 >>>> [ 18.366509] ? __pfx_kernel_read_file+0x10/0x10 >>>> [ 18.366918] ? vm_mmap_pgoff+0x21c/0x2d0 >>>> [ 18.367262] init_module_from_file+0xd1/0x130 >>>> [ 18.367638] ? __pfx_init_module_from_file+0x10/0x10 >>>> [ 18.368073] ? __pfx__raw_spin_lock+0x10/0x10 >>>> [ 18.368456] ? __pfx_cred_has_capability.isra.0+0x10/0x10 >>>> [ 18.368938] idempotent_init_module+0x22c/0x790 >>>> [ 18.369332] ? simple_getattr+0x6f/0x120 >>>> [ 18.369676] ? __pfx_idempotent_init_module+0x10/0x10 >>>> [ 18.370110] ? fdget+0x58/0x3a0 >>>> [ 18.370393] ? security_capable+0x64/0xf0 >>>> [ 18.370745] __x64_sys_finit_module+0xc2/0x140 >>>> [ 18.371136] do_syscall_64+0x7d/0x160 >>>> [ 18.371459] ? fdget_pos+0x1c8/0x4c0 >>>> [ 18.371784] ? ksys_read+0xfd/0x1d0 >>>> [ 18.372106] ? syscall_exit_to_user_mode+0x10/0x1f0 >>>> [ 18.372525] ? do_syscall_64+0x89/0x160 >>>> [ 18.372860] ? do_syscall_64+0x89/0x160 >>>> [ 18.373194] ? do_syscall_64+0x89/0x160 >>>> [ 18.373527] ? syscall_exit_to_user_mode+0x10/0x1f0 >>>> [ 18.373952] ? do_syscall_64+0x89/0x160 >>>> [ 18.374283] ? syscall_exit_to_user_mode+0x10/0x1f0 >>>> [ 18.374701] ? do_syscall_64+0x89/0x160 >>>> [ 18.375037] ? do_user_addr_fault+0x4a8/0xa40 >>>> [ 18.375416] ? clear_bhb_loop+0x25/0x80 >>>> [ 18.375748] ? clear_bhb_loop+0x25/0x80 >>>> [ 18.376119] ? clear_bhb_loop+0x25/0x80 >>>> [ 18.376450] entry_SYSCALL_64_after_hwframe+0x76/0x7e >>>> >>>> Fixes: 233e89322cbe ("alloc_tag: fix module allocation tags populated area calculation") >>>> Reported-by: Ben Greear <greearb@candelatech.com> >>>> Closes: https://lore.kernel.org/all/1ba0cc57-e2ed-caa2-1241-aa5615bee01f@candelatech.com/ >>>> Signed-off-by: Hao Ge <gehao@kylinos.cn> >>>> --- >>>> v2: Add comments to facilitate understanding of the code. >>>> Add align nr << PAGE_SHIFT to MODULE_ALIGN,even though kasan_alloc_module_shadow >>>> already handles this internally,but to make the code more readable and user-friendly >>>> >>>> commit 233e89322cbe ("alloc_tag: fix module allocation >>>> tags populated area calculation") is currently in the >>>> mm-hotfixes-unstable branch, so this patch is >>>> developed based on the mm-hotfixes-unstable branch. >>>> --- >>>> lib/alloc_tag.c | 12 ++++++++++++ >>>> 1 file changed, 12 insertions(+) >>>> >>>> diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c >>>> index f942408b53ef..bd3ee57ea13f 100644 >>>> --- a/lib/alloc_tag.c >>>> +++ b/lib/alloc_tag.c >>>> @@ -10,6 +10,7 @@ >>>> #include <linux/seq_buf.h> >>>> #include <linux/seq_file.h> >>>> #include <linux/vmalloc.h> >>>> +#include <linux/math.h> >>>> >>>> #define ALLOCINFO_FILE_NAME "allocinfo" >>>> #define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag)) >>>> @@ -422,6 +423,17 @@ static int vm_module_tags_populate(void) >>>> return -ENOMEM; >>>> } >>>> vm_module_tags->nr_pages += nr; >>>> + >>>> + /* >>>> + * Kasan allocates 1 byte of shadow for every 8 bytes of data. >>>> + * When kasan_alloc_module_shadow allocates shadow memory, >>>> + * it does so in units of pages. >>>> + * Therefore, here we need to align to MODULE_ALIGN. >>>> + */ >>>> + if ((phys_end & (MODULE_ALIGN - 1)) == 0) >>> phys_end is calculated as: >>> >>> unsigned long phys_end = ALIGN_DOWN(module_tags.start_addr, PAGE_SIZE) + >>> (vm_module_tags->nr_pages >>> << PAGE_SHIFT); >>> >>> and therefore is always PAGE_SIZE-aligned. PAGE_SIZE is always a >>> multiple of MODULE_ALIGN, therefore phys_end is always >> When CONFIG_KASAN_VMALLOC is not enabled >> >> #define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) > Ah, sorry, I misread this as (PAGE_SIZE >> KASAN_SHADOW_SCALE_SHIFT) > and assumed MODULE_ALIGN is always multiple of PAGE_SIZE. Now it makes > more sense. However I'm still not sure about this condition: > > if ((phys_end & (MODULE_ALIGN - 1)) == 0) > > What if page_end is not MODULE_ALIGN-aligned. We will be skipping > kasan_alloc_module_shadow(). Theoretically, this scenario does not exist. Please refer to the following: https://elixir.bootlin.com/linux/v6.13-rc2/source/arch/x86/mm/init.c#L1072 They would all comply with MODULE_ALIGN. > For example, say module_tags.start_addr == 0x1018 (4096+24), original > phys_end will be 0x1000 (4096) and say we allocated one page (nr == > 1), tags area is [0x1000-0x2000]. phys_end is not MODULE_ALIGN-aligned > and we will skip kasan_alloc_module_shadow(). IIUC, this is already > incorrect. > Now, say the next time we allocate 8 pages. phys_end this time is > 0x2000 and the new tags area spans [0x1000-0xA000], we skip > kasan_alloc_module_shadow() again. Next time we allocate pages, > phys_end is 0xA000 and it again is not MODULE_ALIGN-aligned, we skip > again. You see my point? > >> https://elixir.bootlin.com/linux/v6.13-rc2/source/include/linux/execmem.h#L11 >> >> and On x86, KASAN_SHADOW_SCALE_SHIFT is set to 3 >> >> https://elixir.bootlin.com/linux/v6.13-rc2/source/arch/x86/include/asm/kasan.h#L7 >> >> As mentioned in my comment, Kasan allocates 1 byte of shadow for every 8 >> bytes of data >> >> So, when you allocate a shadow page through kasan_alloc_module_shadow, >> it corresponds to eight physical pages in our system. >> >> So, we need MODULE_ALIGN to ensure proper alignment when allocating >> shadow memory for modules using KASAN. >> >> Let's take a look at the kasan_alloc_module_shadow function again >> >> As I mentioned earlier,Kasan allocates 1 byte of shadow for every 8 >> bytes of data. >> >> Assuming phys_end is set to 0 for the sake of this example, if you >> allocate a single shadow page, >> >> the corresponding address range it can represent would be [0, 0x7FFFF]. >> >> So, it is incorrect to call kasan_alloc_module_shadow every time a page >> is allocated, as it can trigger warnings in the system. >> >> https://elixir.bootlin.com/linux/v6.13-rc2/source/mm/kasan/shadow.c#L599 >> >> Thanks >> >> Best Regards Hao >> >>> MODULE_ALIGN-aligned and the above condition is not needed. >>> >>>> + kasan_alloc_module_shadow((void *)phys_end, >>>> + round_up(nr << PAGE_SHIFT, MODULE_ALIGN), >>> Here again, (nr << PAGE_SHIFT) is PAGE_SIZE-aligned and PAGE_SIZE is a >>> multiple of MODULE_ALIGN, therefore (nr << PAGE_SHIFT) is always >>> multiple of MODULE_ALIGN and there is no need for round_up(). >>> >>> IOW, I think this patch should simply add one line: >>> >>> vm_module_tags->nr_pages += nr; >>> + kasan_alloc_module_shadow((void *)phys_end, nr << >>> PAGE_SHIFT, GFP_KERNEL); >>> >>> Am I missing something? >>> >> >>>> + GFP_KERNEL); >>>> } >>>> >>>> /* >>>> -- >>>> 2.25.1 >>>>
On Tue, Dec 10, 2024 at 11:37 AM Hao Ge <hao.ge@linux.dev> wrote: > > Hi Suren > > > On 12/11/24 03:20, Suren Baghdasaryan wrote: > > On Tue, Dec 10, 2024 at 10:46 AM Hao Ge <hao.ge@linux.dev> wrote: > >> Hi Suren > >> > >> > >> Thanks for your review. > >> > >> > >> On 12/11/24 01:55, Suren Baghdasaryan wrote: > >>> On Mon, Dec 9, 2024 at 10:53 PM Hao Ge <hao.ge@linux.dev> wrote: > >>>> From: Hao Ge <gehao@kylinos.cn> > >>>> > >>>> When CONFIG_KASAN is enabled but CONFIG_KASAN_VMALLOC > >>>> is not enabled, we may encounter a panic during system boot. > >>>> > >>>> Because we haven't allocated pages and created mappings > >>>> for the shadow memory corresponding to module_tags region, > >>>> similar to how it is done for execmem_vmalloc. > >>>> > >>>> The difference is that our module_tags are allocated on demand, > >>>> so similarly,we also need to allocate shadow memory regions on demand. > >>>> However, we still need to adhere to the MODULE_ALIGN principle. > >>>> > >>>> Here is the log for panic: > >>>> > >>>> [ 18.349421] BUG: unable to handle page fault for address: fffffbfff8092000 > >>>> [ 18.350016] #PF: supervisor read access in kernel mode > >>>> [ 18.350459] #PF: error_code(0x0000) - not-present page > >>>> [ 18.350904] PGD 20fe52067 P4D 219dc8067 PUD 219dc4067 PMD 102495067 PTE 0 > >>>> [ 18.351484] Oops: Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI > >>>> [ 18.351961] CPU: 5 UID: 0 PID: 1 Comm: systemd Not tainted 6.13.0-rc1+ #3 > >>>> [ 18.352533] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 > >>>> [ 18.353494] RIP: 0010:kasan_check_range+0xba/0x1b0 > >>>> [ 18.353931] Code: 8d 5a 07 4c 0f 49 da 49 c1 fb 03 45 85 db 0f 84 dd 00 00 00 45 89 db 4a 8d 14 d8 eb 0d 48 83 c0 08 48 39 c2 0f 84 c1 00 00 00 <48> 83 38 00 74 ed 48 8d 50 08 eb 0d 48 83 c0 01 48 39 d0 0f 84 90 > >>>> [ 18.355484] RSP: 0018:ff11000101877958 EFLAGS: 00010206 > >>>> [ 18.355937] RAX: fffffbfff8092000 RBX: fffffbfff809201e RCX: ffffffff82a7ceac > >>>> [ 18.356542] RDX: fffffbfff8092018 RSI: 00000000000000f0 RDI: ffffffffc0490000 > >>>> [ 18.357153] RBP: fffffbfff8092000 R08: 0000000000000001 R09: fffffbfff809201d > >>>> [ 18.357756] R10: ffffffffc04900ef R11: 0000000000000003 R12: ffffffffc0490000 > >>>> [ 18.358365] R13: ff11000101877b48 R14: ffffffffc0490000 R15: 000000000000002c > >>>> [ 18.358968] FS: 00007f9bd13c5940(0000) GS:ff110001eb480000(0000) knlGS:0000000000000000 > >>>> [ 18.359648] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > >>>> [ 18.360178] CR2: fffffbfff8092000 CR3: 0000000109214004 CR4: 0000000000771ef0 > >>>> [ 18.360790] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > >>>> [ 18.361404] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > >>>> [ 18.362020] PKRU: 55555554 > >>>> [ 18.362261] Call Trace: > >>>> [ 18.362481] <TASK> > >>>> [ 18.362671] ? __die+0x23/0x70 > >>>> [ 18.362964] ? page_fault_oops+0xc2/0x160 > >>>> [ 18.363318] ? exc_page_fault+0xad/0xc0 > >>>> [ 18.363680] ? asm_exc_page_fault+0x26/0x30 > >>>> [ 18.364056] ? move_module+0x3cc/0x8a0 > >>>> [ 18.364398] ? kasan_check_range+0xba/0x1b0 > >>>> [ 18.364755] __asan_memcpy+0x3c/0x60 > >>>> [ 18.365074] move_module+0x3cc/0x8a0 > >>>> [ 18.365386] layout_and_allocate.constprop.0+0x3d5/0x720 > >>>> [ 18.365841] ? early_mod_check+0x3dc/0x510 > >>>> [ 18.366195] load_module+0x72/0x1850 > >>>> [ 18.366509] ? __pfx_kernel_read_file+0x10/0x10 > >>>> [ 18.366918] ? vm_mmap_pgoff+0x21c/0x2d0 > >>>> [ 18.367262] init_module_from_file+0xd1/0x130 > >>>> [ 18.367638] ? __pfx_init_module_from_file+0x10/0x10 > >>>> [ 18.368073] ? __pfx__raw_spin_lock+0x10/0x10 > >>>> [ 18.368456] ? __pfx_cred_has_capability.isra.0+0x10/0x10 > >>>> [ 18.368938] idempotent_init_module+0x22c/0x790 > >>>> [ 18.369332] ? simple_getattr+0x6f/0x120 > >>>> [ 18.369676] ? __pfx_idempotent_init_module+0x10/0x10 > >>>> [ 18.370110] ? fdget+0x58/0x3a0 > >>>> [ 18.370393] ? security_capable+0x64/0xf0 > >>>> [ 18.370745] __x64_sys_finit_module+0xc2/0x140 > >>>> [ 18.371136] do_syscall_64+0x7d/0x160 > >>>> [ 18.371459] ? fdget_pos+0x1c8/0x4c0 > >>>> [ 18.371784] ? ksys_read+0xfd/0x1d0 > >>>> [ 18.372106] ? syscall_exit_to_user_mode+0x10/0x1f0 > >>>> [ 18.372525] ? do_syscall_64+0x89/0x160 > >>>> [ 18.372860] ? do_syscall_64+0x89/0x160 > >>>> [ 18.373194] ? do_syscall_64+0x89/0x160 > >>>> [ 18.373527] ? syscall_exit_to_user_mode+0x10/0x1f0 > >>>> [ 18.373952] ? do_syscall_64+0x89/0x160 > >>>> [ 18.374283] ? syscall_exit_to_user_mode+0x10/0x1f0 > >>>> [ 18.374701] ? do_syscall_64+0x89/0x160 > >>>> [ 18.375037] ? do_user_addr_fault+0x4a8/0xa40 > >>>> [ 18.375416] ? clear_bhb_loop+0x25/0x80 > >>>> [ 18.375748] ? clear_bhb_loop+0x25/0x80 > >>>> [ 18.376119] ? clear_bhb_loop+0x25/0x80 > >>>> [ 18.376450] entry_SYSCALL_64_after_hwframe+0x76/0x7e > >>>> > >>>> Fixes: 233e89322cbe ("alloc_tag: fix module allocation tags populated area calculation") > >>>> Reported-by: Ben Greear <greearb@candelatech.com> > >>>> Closes: https://lore.kernel.org/all/1ba0cc57-e2ed-caa2-1241-aa5615bee01f@candelatech.com/ > >>>> Signed-off-by: Hao Ge <gehao@kylinos.cn> > >>>> --- > >>>> v2: Add comments to facilitate understanding of the code. > >>>> Add align nr << PAGE_SHIFT to MODULE_ALIGN,even though kasan_alloc_module_shadow > >>>> already handles this internally,but to make the code more readable and user-friendly > >>>> > >>>> commit 233e89322cbe ("alloc_tag: fix module allocation > >>>> tags populated area calculation") is currently in the > >>>> mm-hotfixes-unstable branch, so this patch is > >>>> developed based on the mm-hotfixes-unstable branch. > >>>> --- > >>>> lib/alloc_tag.c | 12 ++++++++++++ > >>>> 1 file changed, 12 insertions(+) > >>>> > >>>> diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c > >>>> index f942408b53ef..bd3ee57ea13f 100644 > >>>> --- a/lib/alloc_tag.c > >>>> +++ b/lib/alloc_tag.c > >>>> @@ -10,6 +10,7 @@ > >>>> #include <linux/seq_buf.h> > >>>> #include <linux/seq_file.h> > >>>> #include <linux/vmalloc.h> > >>>> +#include <linux/math.h> > >>>> > >>>> #define ALLOCINFO_FILE_NAME "allocinfo" > >>>> #define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag)) > >>>> @@ -422,6 +423,17 @@ static int vm_module_tags_populate(void) > >>>> return -ENOMEM; > >>>> } > >>>> vm_module_tags->nr_pages += nr; > >>>> + > >>>> + /* > >>>> + * Kasan allocates 1 byte of shadow for every 8 bytes of data. > >>>> + * When kasan_alloc_module_shadow allocates shadow memory, > >>>> + * it does so in units of pages. > >>>> + * Therefore, here we need to align to MODULE_ALIGN. > >>>> + */ > >>>> + if ((phys_end & (MODULE_ALIGN - 1)) == 0) > >>> phys_end is calculated as: > >>> > >>> unsigned long phys_end = ALIGN_DOWN(module_tags.start_addr, PAGE_SIZE) + > >>> (vm_module_tags->nr_pages > >>> << PAGE_SHIFT); > >>> > >>> and therefore is always PAGE_SIZE-aligned. PAGE_SIZE is always a > >>> multiple of MODULE_ALIGN, therefore phys_end is always > >> When CONFIG_KASAN_VMALLOC is not enabled > >> > >> #define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) > > Ah, sorry, I misread this as (PAGE_SIZE >> KASAN_SHADOW_SCALE_SHIFT) > > and assumed MODULE_ALIGN is always multiple of PAGE_SIZE. Now it makes > > more sense. However I'm still not sure about this condition: > > > > if ((phys_end & (MODULE_ALIGN - 1)) == 0) > > > > What if page_end is not MODULE_ALIGN-aligned. We will be skipping > > kasan_alloc_module_shadow(). > > > Theoretically, this scenario does not exist. > > > Please refer to the following: > > https://elixir.bootlin.com/linux/v6.13-rc2/source/arch/x86/mm/init.c#L1072 > > They would all comply with MODULE_ALIGN. Well, not all. The original execmem_vmap() called from alloc_mod_tags_mem() will indeed return MODULE_ALIGN-aligned address, therefore the original phys_end is MODULE_ALIGN-aligned. But as phys_end grows it can become misaligned. Let's modify my example: module_tags.start_addr = 0x8000; (returned by execmem_vmap()) // we need to allocate 1 page (nr = 1) phys_end = 0x8000; // MODULE_ALIGN'ed, so we allocate a shadow page // tags covered area is [0x8000-0x9000] // our shadows memory represents the area [0x8000-0x10000] // now we allocate 8 more pages (nr = 8) phys_end = 0x9000; // not MODULE_ALIGN'ed, we skip allocating shadow pages // tags covered area is [0x8000-0x11000] // but our shadows memory still represents the area [0x8000-0x10000] > > > > For example, say module_tags.start_addr == 0x1018 (4096+24), original > > phys_end will be 0x1000 (4096) and say we allocated one page (nr == > > 1), tags area is [0x1000-0x2000]. phys_end is not MODULE_ALIGN-aligned > > and we will skip kasan_alloc_module_shadow(). IIUC, this is already > > incorrect. > > Now, say the next time we allocate 8 pages. phys_end this time is > > 0x2000 and the new tags area spans [0x1000-0xA000], we skip > > kasan_alloc_module_shadow() again. Next time we allocate pages, > > phys_end is 0xA000 and it again is not MODULE_ALIGN-aligned, we skip > > again. You see my point? > > > >> https://elixir.bootlin.com/linux/v6.13-rc2/source/include/linux/execmem.h#L11 > >> > >> and On x86, KASAN_SHADOW_SCALE_SHIFT is set to 3 > >> > >> https://elixir.bootlin.com/linux/v6.13-rc2/source/arch/x86/include/asm/kasan.h#L7 > >> > >> As mentioned in my comment, Kasan allocates 1 byte of shadow for every 8 > >> bytes of data > >> > >> So, when you allocate a shadow page through kasan_alloc_module_shadow, > >> it corresponds to eight physical pages in our system. > >> > >> So, we need MODULE_ALIGN to ensure proper alignment when allocating > >> shadow memory for modules using KASAN. > >> > >> Let's take a look at the kasan_alloc_module_shadow function again > >> > >> As I mentioned earlier,Kasan allocates 1 byte of shadow for every 8 > >> bytes of data. > >> > >> Assuming phys_end is set to 0 for the sake of this example, if you > >> allocate a single shadow page, > >> > >> the corresponding address range it can represent would be [0, 0x7FFFF]. > >> > >> So, it is incorrect to call kasan_alloc_module_shadow every time a page > >> is allocated, as it can trigger warnings in the system. > >> > >> https://elixir.bootlin.com/linux/v6.13-rc2/source/mm/kasan/shadow.c#L599 > >> > >> Thanks > >> > >> Best Regards Hao > >> > >>> MODULE_ALIGN-aligned and the above condition is not needed. > >>> > >>>> + kasan_alloc_module_shadow((void *)phys_end, > >>>> + round_up(nr << PAGE_SHIFT, MODULE_ALIGN), > >>> Here again, (nr << PAGE_SHIFT) is PAGE_SIZE-aligned and PAGE_SIZE is a > >>> multiple of MODULE_ALIGN, therefore (nr << PAGE_SHIFT) is always > >>> multiple of MODULE_ALIGN and there is no need for round_up(). > >>> > >>> IOW, I think this patch should simply add one line: > >>> > >>> vm_module_tags->nr_pages += nr; > >>> + kasan_alloc_module_shadow((void *)phys_end, nr << > >>> PAGE_SHIFT, GFP_KERNEL); > >>> > >>> Am I missing something? > >>> > >> > >>>> + GFP_KERNEL); > >>>> } > >>>> > >>>> /* > >>>> -- > >>>> 2.25.1 > >>>>
Hi Suren On 12/11/24 04:04, Suren Baghdasaryan wrote: > On Tue, Dec 10, 2024 at 11:37 AM Hao Ge <hao.ge@linux.dev> wrote: >> Hi Suren >> >> >> On 12/11/24 03:20, Suren Baghdasaryan wrote: >>> On Tue, Dec 10, 2024 at 10:46 AM Hao Ge <hao.ge@linux.dev> wrote: >>>> Hi Suren >>>> >>>> >>>> Thanks for your review. >>>> >>>> >>>> On 12/11/24 01:55, Suren Baghdasaryan wrote: >>>>> On Mon, Dec 9, 2024 at 10:53 PM Hao Ge <hao.ge@linux.dev> wrote: >>>>>> From: Hao Ge <gehao@kylinos.cn> >>>>>> >>>>>> When CONFIG_KASAN is enabled but CONFIG_KASAN_VMALLOC >>>>>> is not enabled, we may encounter a panic during system boot. >>>>>> >>>>>> Because we haven't allocated pages and created mappings >>>>>> for the shadow memory corresponding to module_tags region, >>>>>> similar to how it is done for execmem_vmalloc. >>>>>> >>>>>> The difference is that our module_tags are allocated on demand, >>>>>> so similarly,we also need to allocate shadow memory regions on demand. >>>>>> However, we still need to adhere to the MODULE_ALIGN principle. >>>>>> >>>>>> Here is the log for panic: >>>>>> >>>>>> [ 18.349421] BUG: unable to handle page fault for address: fffffbfff8092000 >>>>>> [ 18.350016] #PF: supervisor read access in kernel mode >>>>>> [ 18.350459] #PF: error_code(0x0000) - not-present page >>>>>> [ 18.350904] PGD 20fe52067 P4D 219dc8067 PUD 219dc4067 PMD 102495067 PTE 0 >>>>>> [ 18.351484] Oops: Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI >>>>>> [ 18.351961] CPU: 5 UID: 0 PID: 1 Comm: systemd Not tainted 6.13.0-rc1+ #3 >>>>>> [ 18.352533] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 >>>>>> [ 18.353494] RIP: 0010:kasan_check_range+0xba/0x1b0 >>>>>> [ 18.353931] Code: 8d 5a 07 4c 0f 49 da 49 c1 fb 03 45 85 db 0f 84 dd 00 00 00 45 89 db 4a 8d 14 d8 eb 0d 48 83 c0 08 48 39 c2 0f 84 c1 00 00 00 <48> 83 38 00 74 ed 48 8d 50 08 eb 0d 48 83 c0 01 48 39 d0 0f 84 90 >>>>>> [ 18.355484] RSP: 0018:ff11000101877958 EFLAGS: 00010206 >>>>>> [ 18.355937] RAX: fffffbfff8092000 RBX: fffffbfff809201e RCX: ffffffff82a7ceac >>>>>> [ 18.356542] RDX: fffffbfff8092018 RSI: 00000000000000f0 RDI: ffffffffc0490000 >>>>>> [ 18.357153] RBP: fffffbfff8092000 R08: 0000000000000001 R09: fffffbfff809201d >>>>>> [ 18.357756] R10: ffffffffc04900ef R11: 0000000000000003 R12: ffffffffc0490000 >>>>>> [ 18.358365] R13: ff11000101877b48 R14: ffffffffc0490000 R15: 000000000000002c >>>>>> [ 18.358968] FS: 00007f9bd13c5940(0000) GS:ff110001eb480000(0000) knlGS:0000000000000000 >>>>>> [ 18.359648] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 >>>>>> [ 18.360178] CR2: fffffbfff8092000 CR3: 0000000109214004 CR4: 0000000000771ef0 >>>>>> [ 18.360790] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 >>>>>> [ 18.361404] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 >>>>>> [ 18.362020] PKRU: 55555554 >>>>>> [ 18.362261] Call Trace: >>>>>> [ 18.362481] <TASK> >>>>>> [ 18.362671] ? __die+0x23/0x70 >>>>>> [ 18.362964] ? page_fault_oops+0xc2/0x160 >>>>>> [ 18.363318] ? exc_page_fault+0xad/0xc0 >>>>>> [ 18.363680] ? asm_exc_page_fault+0x26/0x30 >>>>>> [ 18.364056] ? move_module+0x3cc/0x8a0 >>>>>> [ 18.364398] ? kasan_check_range+0xba/0x1b0 >>>>>> [ 18.364755] __asan_memcpy+0x3c/0x60 >>>>>> [ 18.365074] move_module+0x3cc/0x8a0 >>>>>> [ 18.365386] layout_and_allocate.constprop.0+0x3d5/0x720 >>>>>> [ 18.365841] ? early_mod_check+0x3dc/0x510 >>>>>> [ 18.366195] load_module+0x72/0x1850 >>>>>> [ 18.366509] ? __pfx_kernel_read_file+0x10/0x10 >>>>>> [ 18.366918] ? vm_mmap_pgoff+0x21c/0x2d0 >>>>>> [ 18.367262] init_module_from_file+0xd1/0x130 >>>>>> [ 18.367638] ? __pfx_init_module_from_file+0x10/0x10 >>>>>> [ 18.368073] ? __pfx__raw_spin_lock+0x10/0x10 >>>>>> [ 18.368456] ? __pfx_cred_has_capability.isra.0+0x10/0x10 >>>>>> [ 18.368938] idempotent_init_module+0x22c/0x790 >>>>>> [ 18.369332] ? simple_getattr+0x6f/0x120 >>>>>> [ 18.369676] ? __pfx_idempotent_init_module+0x10/0x10 >>>>>> [ 18.370110] ? fdget+0x58/0x3a0 >>>>>> [ 18.370393] ? security_capable+0x64/0xf0 >>>>>> [ 18.370745] __x64_sys_finit_module+0xc2/0x140 >>>>>> [ 18.371136] do_syscall_64+0x7d/0x160 >>>>>> [ 18.371459] ? fdget_pos+0x1c8/0x4c0 >>>>>> [ 18.371784] ? ksys_read+0xfd/0x1d0 >>>>>> [ 18.372106] ? syscall_exit_to_user_mode+0x10/0x1f0 >>>>>> [ 18.372525] ? do_syscall_64+0x89/0x160 >>>>>> [ 18.372860] ? do_syscall_64+0x89/0x160 >>>>>> [ 18.373194] ? do_syscall_64+0x89/0x160 >>>>>> [ 18.373527] ? syscall_exit_to_user_mode+0x10/0x1f0 >>>>>> [ 18.373952] ? do_syscall_64+0x89/0x160 >>>>>> [ 18.374283] ? syscall_exit_to_user_mode+0x10/0x1f0 >>>>>> [ 18.374701] ? do_syscall_64+0x89/0x160 >>>>>> [ 18.375037] ? do_user_addr_fault+0x4a8/0xa40 >>>>>> [ 18.375416] ? clear_bhb_loop+0x25/0x80 >>>>>> [ 18.375748] ? clear_bhb_loop+0x25/0x80 >>>>>> [ 18.376119] ? clear_bhb_loop+0x25/0x80 >>>>>> [ 18.376450] entry_SYSCALL_64_after_hwframe+0x76/0x7e >>>>>> >>>>>> Fixes: 233e89322cbe ("alloc_tag: fix module allocation tags populated area calculation") >>>>>> Reported-by: Ben Greear <greearb@candelatech.com> >>>>>> Closes: https://lore.kernel.org/all/1ba0cc57-e2ed-caa2-1241-aa5615bee01f@candelatech.com/ >>>>>> Signed-off-by: Hao Ge <gehao@kylinos.cn> >>>>>> --- >>>>>> v2: Add comments to facilitate understanding of the code. >>>>>> Add align nr << PAGE_SHIFT to MODULE_ALIGN,even though kasan_alloc_module_shadow >>>>>> already handles this internally,but to make the code more readable and user-friendly >>>>>> >>>>>> commit 233e89322cbe ("alloc_tag: fix module allocation >>>>>> tags populated area calculation") is currently in the >>>>>> mm-hotfixes-unstable branch, so this patch is >>>>>> developed based on the mm-hotfixes-unstable branch. >>>>>> --- >>>>>> lib/alloc_tag.c | 12 ++++++++++++ >>>>>> 1 file changed, 12 insertions(+) >>>>>> >>>>>> diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c >>>>>> index f942408b53ef..bd3ee57ea13f 100644 >>>>>> --- a/lib/alloc_tag.c >>>>>> +++ b/lib/alloc_tag.c >>>>>> @@ -10,6 +10,7 @@ >>>>>> #include <linux/seq_buf.h> >>>>>> #include <linux/seq_file.h> >>>>>> #include <linux/vmalloc.h> >>>>>> +#include <linux/math.h> >>>>>> >>>>>> #define ALLOCINFO_FILE_NAME "allocinfo" >>>>>> #define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag)) >>>>>> @@ -422,6 +423,17 @@ static int vm_module_tags_populate(void) >>>>>> return -ENOMEM; >>>>>> } >>>>>> vm_module_tags->nr_pages += nr; >>>>>> + >>>>>> + /* >>>>>> + * Kasan allocates 1 byte of shadow for every 8 bytes of data. >>>>>> + * When kasan_alloc_module_shadow allocates shadow memory, >>>>>> + * it does so in units of pages. >>>>>> + * Therefore, here we need to align to MODULE_ALIGN. >>>>>> + */ >>>>>> + if ((phys_end & (MODULE_ALIGN - 1)) == 0) >>>>> phys_end is calculated as: >>>>> >>>>> unsigned long phys_end = ALIGN_DOWN(module_tags.start_addr, PAGE_SIZE) + >>>>> (vm_module_tags->nr_pages >>>>> << PAGE_SHIFT); >>>>> >>>>> and therefore is always PAGE_SIZE-aligned. PAGE_SIZE is always a >>>>> multiple of MODULE_ALIGN, therefore phys_end is always >>>> When CONFIG_KASAN_VMALLOC is not enabled >>>> >>>> #define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) >>> Ah, sorry, I misread this as (PAGE_SIZE >> KASAN_SHADOW_SCALE_SHIFT) >>> and assumed MODULE_ALIGN is always multiple of PAGE_SIZE. Now it makes >>> more sense. However I'm still not sure about this condition: >>> >>> if ((phys_end & (MODULE_ALIGN - 1)) == 0) >>> >>> What if page_end is not MODULE_ALIGN-aligned. We will be skipping >>> kasan_alloc_module_shadow(). >> >> Theoretically, this scenario does not exist. >> >> >> Please refer to the following: >> >> https://elixir.bootlin.com/linux/v6.13-rc2/source/arch/x86/mm/init.c#L1072 >> >> They would all comply with MODULE_ALIGN. > Well, not all. The original execmem_vmap() called from > alloc_mod_tags_mem() will indeed return MODULE_ALIGN-aligned address, > therefore the original phys_end is MODULE_ALIGN-aligned. But as > phys_end grows it can become misaligned. Let's modify my example: > > module_tags.start_addr = 0x8000; (returned by execmem_vmap()) > // we need to allocate 1 page (nr = 1) > phys_end = 0x8000; // MODULE_ALIGN'ed, so we allocate a shadow page > // tags covered area is [0x8000-0x9000] > // our shadows memory represents the area [0x8000-0x10000] > > // now we allocate 8 more pages (nr = 8) > phys_end = 0x9000; // not MODULE_ALIGN'ed, we skip allocating shadow pages > // tags covered area is [0x8000-0x11000] > // but our shadows memory still represents the area [0x8000-0x10000] I think I need a cup of coffee at this late hour – I completely forgot about that logic! Thank you so much for your detailed explanation and pointing it out. I'll update next version to address this issue. > > >> >>> For example, say module_tags.start_addr == 0x1018 (4096+24), original >>> phys_end will be 0x1000 (4096) and say we allocated one page (nr == >>> 1), tags area is [0x1000-0x2000]. phys_end is not MODULE_ALIGN-aligned >>> and we will skip kasan_alloc_module_shadow(). IIUC, this is already >>> incorrect. >>> Now, say the next time we allocate 8 pages. phys_end this time is >>> 0x2000 and the new tags area spans [0x1000-0xA000], we skip >>> kasan_alloc_module_shadow() again. Next time we allocate pages, >>> phys_end is 0xA000 and it again is not MODULE_ALIGN-aligned, we skip >>> again. You see my point? >>> >>>> https://elixir.bootlin.com/linux/v6.13-rc2/source/include/linux/execmem.h#L11 >>>> >>>> and On x86, KASAN_SHADOW_SCALE_SHIFT is set to 3 >>>> >>>> https://elixir.bootlin.com/linux/v6.13-rc2/source/arch/x86/include/asm/kasan.h#L7 >>>> >>>> As mentioned in my comment, Kasan allocates 1 byte of shadow for every 8 >>>> bytes of data >>>> >>>> So, when you allocate a shadow page through kasan_alloc_module_shadow, >>>> it corresponds to eight physical pages in our system. >>>> >>>> So, we need MODULE_ALIGN to ensure proper alignment when allocating >>>> shadow memory for modules using KASAN. >>>> >>>> Let's take a look at the kasan_alloc_module_shadow function again >>>> >>>> As I mentioned earlier,Kasan allocates 1 byte of shadow for every 8 >>>> bytes of data. >>>> >>>> Assuming phys_end is set to 0 for the sake of this example, if you >>>> allocate a single shadow page, >>>> >>>> the corresponding address range it can represent would be [0, 0x7FFFF]. >>>> >>>> So, it is incorrect to call kasan_alloc_module_shadow every time a page >>>> is allocated, as it can trigger warnings in the system. >>>> >>>> https://elixir.bootlin.com/linux/v6.13-rc2/source/mm/kasan/shadow.c#L599 >>>> >>>> Thanks >>>> >>>> Best Regards Hao >>>> >>>>> MODULE_ALIGN-aligned and the above condition is not needed. >>>>> >>>>>> + kasan_alloc_module_shadow((void *)phys_end, >>>>>> + round_up(nr << PAGE_SHIFT, MODULE_ALIGN), >>>>> Here again, (nr << PAGE_SHIFT) is PAGE_SIZE-aligned and PAGE_SIZE is a >>>>> multiple of MODULE_ALIGN, therefore (nr << PAGE_SHIFT) is always >>>>> multiple of MODULE_ALIGN and there is no need for round_up(). >>>>> >>>>> IOW, I think this patch should simply add one line: >>>>> >>>>> vm_module_tags->nr_pages += nr; >>>>> + kasan_alloc_module_shadow((void *)phys_end, nr << >>>>> PAGE_SHIFT, GFP_KERNEL); >>>>> >>>>> Am I missing something? >>>>> >>>>>> + GFP_KERNEL); >>>>>> } >>>>>> >>>>>> /* >>>>>> -- >>>>>> 2.25.1 >>>>>>
From: Hao Ge <gehao@kylinos.cn>
When CONFIG_KASAN is enabled but CONFIG_KASAN_VMALLOC
is not enabled, we may encounter a panic during system boot.
Because we haven't allocated pages and created mappings
for the shadow memory corresponding to module_tags region,
similar to how it is done for execmem_vmalloc.
The difference is that our module_tags are allocated on demand,
so similarly,we also need to allocate shadow memory regions on demand.
However, we still need to adhere to the MODULE_ALIGN principle.
Here is the log for panic:
[ 18.349421] BUG: unable to handle page fault for address: fffffbfff8092000
[ 18.350016] #PF: supervisor read access in kernel mode
[ 18.350459] #PF: error_code(0x0000) - not-present page
[ 18.350904] PGD 20fe52067 P4D 219dc8067 PUD 219dc4067 PMD 102495067 PTE 0
[ 18.351484] Oops: Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI
[ 18.351961] CPU: 5 UID: 0 PID: 1 Comm: systemd Not tainted 6.13.0-rc1+ #3
[ 18.352533] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[ 18.353494] RIP: 0010:kasan_check_range+0xba/0x1b0
[ 18.353931] Code: 8d 5a 07 4c 0f 49 da 49 c1 fb 03 45 85 db 0f 84 dd 00 00 00 45 89 db 4a 8d 14 d8 eb 0d 48 83 c0 08 48 39 c2 0f 84 c1 00 00 00 <48> 83 38 00 74 ed 48 8d 50 08 eb 0d 48 83 c0 01 48 39 d0 0f 84 90
[ 18.355484] RSP: 0018:ff11000101877958 EFLAGS: 00010206
[ 18.355937] RAX: fffffbfff8092000 RBX: fffffbfff809201e RCX: ffffffff82a7ceac
[ 18.356542] RDX: fffffbfff8092018 RSI: 00000000000000f0 RDI: ffffffffc0490000
[ 18.357153] RBP: fffffbfff8092000 R08: 0000000000000001 R09: fffffbfff809201d
[ 18.357756] R10: ffffffffc04900ef R11: 0000000000000003 R12: ffffffffc0490000
[ 18.358365] R13: ff11000101877b48 R14: ffffffffc0490000 R15: 000000000000002c
[ 18.358968] FS: 00007f9bd13c5940(0000) GS:ff110001eb480000(0000) knlGS:0000000000000000
[ 18.359648] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 18.360178] CR2: fffffbfff8092000 CR3: 0000000109214004 CR4: 0000000000771ef0
[ 18.360790] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 18.361404] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 18.362020] PKRU: 55555554
[ 18.362261] Call Trace:
[ 18.362481] <TASK>
[ 18.362671] ? __die+0x23/0x70
[ 18.362964] ? page_fault_oops+0xc2/0x160
[ 18.363318] ? exc_page_fault+0xad/0xc0
[ 18.363680] ? asm_exc_page_fault+0x26/0x30
[ 18.364056] ? move_module+0x3cc/0x8a0
[ 18.364398] ? kasan_check_range+0xba/0x1b0
[ 18.364755] __asan_memcpy+0x3c/0x60
[ 18.365074] move_module+0x3cc/0x8a0
[ 18.365386] layout_and_allocate.constprop.0+0x3d5/0x720
[ 18.365841] ? early_mod_check+0x3dc/0x510
[ 18.366195] load_module+0x72/0x1850
[ 18.366509] ? __pfx_kernel_read_file+0x10/0x10
[ 18.366918] ? vm_mmap_pgoff+0x21c/0x2d0
[ 18.367262] init_module_from_file+0xd1/0x130
[ 18.367638] ? __pfx_init_module_from_file+0x10/0x10
[ 18.368073] ? __pfx__raw_spin_lock+0x10/0x10
[ 18.368456] ? __pfx_cred_has_capability.isra.0+0x10/0x10
[ 18.368938] idempotent_init_module+0x22c/0x790
[ 18.369332] ? simple_getattr+0x6f/0x120
[ 18.369676] ? __pfx_idempotent_init_module+0x10/0x10
[ 18.370110] ? fdget+0x58/0x3a0
[ 18.370393] ? security_capable+0x64/0xf0
[ 18.370745] __x64_sys_finit_module+0xc2/0x140
[ 18.371136] do_syscall_64+0x7d/0x160
[ 18.371459] ? fdget_pos+0x1c8/0x4c0
[ 18.371784] ? ksys_read+0xfd/0x1d0
[ 18.372106] ? syscall_exit_to_user_mode+0x10/0x1f0
[ 18.372525] ? do_syscall_64+0x89/0x160
[ 18.372860] ? do_syscall_64+0x89/0x160
[ 18.373194] ? do_syscall_64+0x89/0x160
[ 18.373527] ? syscall_exit_to_user_mode+0x10/0x1f0
[ 18.373952] ? do_syscall_64+0x89/0x160
[ 18.374283] ? syscall_exit_to_user_mode+0x10/0x1f0
[ 18.374701] ? do_syscall_64+0x89/0x160
[ 18.375037] ? do_user_addr_fault+0x4a8/0xa40
[ 18.375416] ? clear_bhb_loop+0x25/0x80
[ 18.375748] ? clear_bhb_loop+0x25/0x80
[ 18.376119] ? clear_bhb_loop+0x25/0x80
[ 18.376450] entry_SYSCALL_64_after_hwframe+0x76/0x7e
Fixes: 233e89322cbe ("alloc_tag: fix module allocation tags populated area calculation")
Reported-by: Ben Greear <greearb@candelatech.com>
Closes: https://lore.kernel.org/all/1ba0cc57-e2ed-caa2-1241-aa5615bee01f@candelatech.com/
Signed-off-by: Hao Ge <gehao@kylinos.cn>
---
v3: Adjusting the title because the previous one was a bit unclear.
Suren has pointed out that our condition for determining whether
to allocate shadow memory is unreasonable.We have adjusted our method
to use every 8 pages as an index (idx), and we will make decisions based
on this idx when determining whether to allocate shadow memory.
v2: Add comments to facilitate understanding of the code.
Add align nr << PAGE_SHIFT to MODULE_ALIGN,even though kasan_alloc_module_shadow
already handles this internally,but to make the code more readable and user-friendly
commit 233e89322cbe ("alloc_tag: fix module allocation
tags populated area calculation") is currently in the
mm-hotfixes-unstable branch, so this patch is
developed based on the mm-hotfixes-unstable branch.
---
lib/alloc_tag.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index f942408b53ef..8bf04756887d 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -10,6 +10,7 @@
#include <linux/seq_buf.h>
#include <linux/seq_file.h>
#include <linux/vmalloc.h>
+#include <linux/math.h>
#define ALLOCINFO_FILE_NAME "allocinfo"
#define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag))
@@ -404,6 +405,9 @@ static int vm_module_tags_populate(void)
unsigned long phys_end = ALIGN_DOWN(module_tags.start_addr, PAGE_SIZE) +
(vm_module_tags->nr_pages << PAGE_SHIFT);
unsigned long new_end = module_tags.start_addr + module_tags.size;
+ unsigned long phys_idx = (vm_module_tags->nr_pages +
+ (2 << KASAN_SHADOW_SCALE_SHIFT) - 1) >> KASAN_SHADOW_SCALE_SHIFT;
+ unsigned long new_idx = 0;
if (phys_end < new_end) {
struct page **next_page = vm_module_tags->pages + vm_module_tags->nr_pages;
@@ -421,7 +425,26 @@ static int vm_module_tags_populate(void)
__free_page(next_page[i]);
return -ENOMEM;
}
+
vm_module_tags->nr_pages += nr;
+
+ new_idx = (vm_module_tags->nr_pages +
+ (2 << KASAN_SHADOW_SCALE_SHIFT) - 1) >> KASAN_SHADOW_SCALE_SHIFT;
+
+ /*
+ * Kasan allocates 1 byte of shadow for every 8 bytes of data.
+ * When kasan_alloc_module_shadow allocates shadow memory,
+ * its unit of allocation is a page.
+ * Therefore, here we need to align to MODULE_ALIGN.
+ *
+ * For every KASAN_SHADOW_SCALE_SHIFT, a shadow page is allocated.
+ * So, we determine whether to allocate based on whether the
+ * number of pages falls within the scope of the same KASAN_SHADOW_SCALE_SHIFT.
+ */
+ if (phys_idx != new_idx)
+ kasan_alloc_module_shadow((void *)round_up(phys_end, MODULE_ALIGN),
+ (new_idx - phys_idx) * MODULE_ALIGN,
+ GFP_KERNEL);
}
/*
--
2.25.1
Hi Hao, kernel test robot noticed the following build errors: [auto build test ERROR on akpm-mm/mm-everything] url: https://github.com/intel-lab-lkp/linux/commits/Hao-Ge/mm-alloc_tag-Fix-panic-when-CONFIG_KASAN-enabled-and-CONFIG_KASAN_VMALLOC-not-enabled/20241211-110206 base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything patch link: https://lore.kernel.org/r/20241211025755.56173-1-hao.ge%40linux.dev patch subject: [PATCH v3] mm/alloc_tag: Fix panic when CONFIG_KASAN enabled and CONFIG_KASAN_VMALLOC not enabled config: i386-buildonly-randconfig-005-20241211 (https://download.01.org/0day-ci/archive/20241212/202412120143.l3g6vx8b-lkp@intel.com/config) compiler: gcc-12 (Debian 12.2.0-14) 12.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241212/202412120143.l3g6vx8b-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202412120143.l3g6vx8b-lkp@intel.com/ All errors (new ones prefixed by >>): lib/alloc_tag.c: In function 'vm_module_tags_populate': >> lib/alloc_tag.c:409:40: error: 'KASAN_SHADOW_SCALE_SHIFT' undeclared (first use in this function) 409 | (2 << KASAN_SHADOW_SCALE_SHIFT) - 1) >> KASAN_SHADOW_SCALE_SHIFT; | ^~~~~~~~~~~~~~~~~~~~~~~~ lib/alloc_tag.c:409:40: note: each undeclared identifier is reported only once for each function it appears in vim +/KASAN_SHADOW_SCALE_SHIFT +409 lib/alloc_tag.c 402 403 static int vm_module_tags_populate(void) 404 { 405 unsigned long phys_end = ALIGN_DOWN(module_tags.start_addr, PAGE_SIZE) + 406 (vm_module_tags->nr_pages << PAGE_SHIFT); 407 unsigned long new_end = module_tags.start_addr + module_tags.size; 408 unsigned long phys_idx = (vm_module_tags->nr_pages + > 409 (2 << KASAN_SHADOW_SCALE_SHIFT) - 1) >> KASAN_SHADOW_SCALE_SHIFT; 410 unsigned long new_idx = 0; 411 412 if (phys_end < new_end) { 413 struct page **next_page = vm_module_tags->pages + vm_module_tags->nr_pages; 414 unsigned long more_pages; 415 unsigned long nr; 416 417 more_pages = ALIGN(new_end - phys_end, PAGE_SIZE) >> PAGE_SHIFT; 418 nr = alloc_pages_bulk_array_node(GFP_KERNEL | __GFP_NOWARN, 419 NUMA_NO_NODE, more_pages, next_page); 420 if (nr < more_pages || 421 vmap_pages_range(phys_end, phys_end + (nr << PAGE_SHIFT), PAGE_KERNEL, 422 next_page, PAGE_SHIFT) < 0) { 423 /* Clean up and error out */ 424 for (int i = 0; i < nr; i++) 425 __free_page(next_page[i]); 426 return -ENOMEM; 427 } 428 429 vm_module_tags->nr_pages += nr; 430 431 new_idx = (vm_module_tags->nr_pages + 432 (2 << KASAN_SHADOW_SCALE_SHIFT) - 1) >> KASAN_SHADOW_SCALE_SHIFT; 433 434 /* 435 * Kasan allocates 1 byte of shadow for every 8 bytes of data. 436 * When kasan_alloc_module_shadow allocates shadow memory, 437 * its unit of allocation is a page. 438 * Therefore, here we need to align to MODULE_ALIGN. 439 * 440 * For every KASAN_SHADOW_SCALE_SHIFT, a shadow page is allocated. 441 * So, we determine whether to allocate based on whether the 442 * number of pages falls within the scope of the same KASAN_SHADOW_SCALE_SHIFT. 443 */ 444 if (phys_idx != new_idx) 445 kasan_alloc_module_shadow((void *)round_up(phys_end, MODULE_ALIGN), 446 (new_idx - phys_idx) * MODULE_ALIGN, 447 GFP_KERNEL); 448 } 449 450 /* 451 * Mark the pages as accessible, now that they are mapped. 452 * With hardware tag-based KASAN, marking is skipped for 453 * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). 454 */ 455 kasan_unpoison_vmalloc((void *)module_tags.start_addr, 456 new_end - module_tags.start_addr, 457 KASAN_VMALLOC_PROT_NORMAL); 458 459 return 0; 460 } 461 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
Hi Thanks for you report. This version has been deprecated, and a new V4 version has been released. On 12/12/24 01:18, kernel test robot wrote: > Hi Hao, > > kernel test robot noticed the following build errors: > > [auto build test ERROR on akpm-mm/mm-everything] > > url: https://github.com/intel-lab-lkp/linux/commits/Hao-Ge/mm-alloc_tag-Fix-panic-when-CONFIG_KASAN-enabled-and-CONFIG_KASAN_VMALLOC-not-enabled/20241211-110206 > base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything > patch link: https://lore.kernel.org/r/20241211025755.56173-1-hao.ge%40linux.dev > patch subject: [PATCH v3] mm/alloc_tag: Fix panic when CONFIG_KASAN enabled and CONFIG_KASAN_VMALLOC not enabled > config: i386-buildonly-randconfig-005-20241211 (https://download.01.org/0day-ci/archive/20241212/202412120143.l3g6vx8b-lkp@intel.com/config) > compiler: gcc-12 (Debian 12.2.0-14) 12.2.0 > reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241212/202412120143.l3g6vx8b-lkp@intel.com/reproduce) > > If you fix the issue in a separate patch/commit (i.e. not just a new version of > the same patch/commit), kindly add following tags > | Reported-by: kernel test robot <lkp@intel.com> > | Closes: https://lore.kernel.org/oe-kbuild-all/202412120143.l3g6vx8b-lkp@intel.com/ > > All errors (new ones prefixed by >>): > > lib/alloc_tag.c: In function 'vm_module_tags_populate': >>> lib/alloc_tag.c:409:40: error: 'KASAN_SHADOW_SCALE_SHIFT' undeclared (first use in this function) > 409 | (2 << KASAN_SHADOW_SCALE_SHIFT) - 1) >> KASAN_SHADOW_SCALE_SHIFT; > | ^~~~~~~~~~~~~~~~~~~~~~~~ > lib/alloc_tag.c:409:40: note: each undeclared identifier is reported only once for each function it appears in > > > vim +/KASAN_SHADOW_SCALE_SHIFT +409 lib/alloc_tag.c > > 402 > 403 static int vm_module_tags_populate(void) > 404 { > 405 unsigned long phys_end = ALIGN_DOWN(module_tags.start_addr, PAGE_SIZE) + > 406 (vm_module_tags->nr_pages << PAGE_SHIFT); > 407 unsigned long new_end = module_tags.start_addr + module_tags.size; > 408 unsigned long phys_idx = (vm_module_tags->nr_pages + > > 409 (2 << KASAN_SHADOW_SCALE_SHIFT) - 1) >> KASAN_SHADOW_SCALE_SHIFT; > 410 unsigned long new_idx = 0; > 411 > 412 if (phys_end < new_end) { > 413 struct page **next_page = vm_module_tags->pages + vm_module_tags->nr_pages; > 414 unsigned long more_pages; > 415 unsigned long nr; > 416 > 417 more_pages = ALIGN(new_end - phys_end, PAGE_SIZE) >> PAGE_SHIFT; > 418 nr = alloc_pages_bulk_array_node(GFP_KERNEL | __GFP_NOWARN, > 419 NUMA_NO_NODE, more_pages, next_page); > 420 if (nr < more_pages || > 421 vmap_pages_range(phys_end, phys_end + (nr << PAGE_SHIFT), PAGE_KERNEL, > 422 next_page, PAGE_SHIFT) < 0) { > 423 /* Clean up and error out */ > 424 for (int i = 0; i < nr; i++) > 425 __free_page(next_page[i]); > 426 return -ENOMEM; > 427 } > 428 > 429 vm_module_tags->nr_pages += nr; > 430 > 431 new_idx = (vm_module_tags->nr_pages + > 432 (2 << KASAN_SHADOW_SCALE_SHIFT) - 1) >> KASAN_SHADOW_SCALE_SHIFT; > 433 > 434 /* > 435 * Kasan allocates 1 byte of shadow for every 8 bytes of data. > 436 * When kasan_alloc_module_shadow allocates shadow memory, > 437 * its unit of allocation is a page. > 438 * Therefore, here we need to align to MODULE_ALIGN. > 439 * > 440 * For every KASAN_SHADOW_SCALE_SHIFT, a shadow page is allocated. > 441 * So, we determine whether to allocate based on whether the > 442 * number of pages falls within the scope of the same KASAN_SHADOW_SCALE_SHIFT. > 443 */ > 444 if (phys_idx != new_idx) > 445 kasan_alloc_module_shadow((void *)round_up(phys_end, MODULE_ALIGN), > 446 (new_idx - phys_idx) * MODULE_ALIGN, > 447 GFP_KERNEL); > 448 } > 449 > 450 /* > 451 * Mark the pages as accessible, now that they are mapped. > 452 * With hardware tag-based KASAN, marking is skipped for > 453 * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). > 454 */ > 455 kasan_unpoison_vmalloc((void *)module_tags.start_addr, > 456 new_end - module_tags.start_addr, > 457 KASAN_VMALLOC_PROT_NORMAL); > 458 > 459 return 0; > 460 } > 461 >
On Tue, Dec 10, 2024 at 6:58 PM Hao Ge <hao.ge@linux.dev> wrote: > > From: Hao Ge <gehao@kylinos.cn> > > When CONFIG_KASAN is enabled but CONFIG_KASAN_VMALLOC > is not enabled, we may encounter a panic during system boot. > > Because we haven't allocated pages and created mappings > for the shadow memory corresponding to module_tags region, > similar to how it is done for execmem_vmalloc. > > The difference is that our module_tags are allocated on demand, > so similarly,we also need to allocate shadow memory regions on demand. > However, we still need to adhere to the MODULE_ALIGN principle. > > Here is the log for panic: > > [ 18.349421] BUG: unable to handle page fault for address: fffffbfff8092000 > [ 18.350016] #PF: supervisor read access in kernel mode > [ 18.350459] #PF: error_code(0x0000) - not-present page > [ 18.350904] PGD 20fe52067 P4D 219dc8067 PUD 219dc4067 PMD 102495067 PTE 0 > [ 18.351484] Oops: Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI > [ 18.351961] CPU: 5 UID: 0 PID: 1 Comm: systemd Not tainted 6.13.0-rc1+ #3 > [ 18.352533] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 > [ 18.353494] RIP: 0010:kasan_check_range+0xba/0x1b0 > [ 18.353931] Code: 8d 5a 07 4c 0f 49 da 49 c1 fb 03 45 85 db 0f 84 dd 00 00 00 45 89 db 4a 8d 14 d8 eb 0d 48 83 c0 08 48 39 c2 0f 84 c1 00 00 00 <48> 83 38 00 74 ed 48 8d 50 08 eb 0d 48 83 c0 01 48 39 d0 0f 84 90 > [ 18.355484] RSP: 0018:ff11000101877958 EFLAGS: 00010206 > [ 18.355937] RAX: fffffbfff8092000 RBX: fffffbfff809201e RCX: ffffffff82a7ceac > [ 18.356542] RDX: fffffbfff8092018 RSI: 00000000000000f0 RDI: ffffffffc0490000 > [ 18.357153] RBP: fffffbfff8092000 R08: 0000000000000001 R09: fffffbfff809201d > [ 18.357756] R10: ffffffffc04900ef R11: 0000000000000003 R12: ffffffffc0490000 > [ 18.358365] R13: ff11000101877b48 R14: ffffffffc0490000 R15: 000000000000002c > [ 18.358968] FS: 00007f9bd13c5940(0000) GS:ff110001eb480000(0000) knlGS:0000000000000000 > [ 18.359648] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ 18.360178] CR2: fffffbfff8092000 CR3: 0000000109214004 CR4: 0000000000771ef0 > [ 18.360790] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > [ 18.361404] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > [ 18.362020] PKRU: 55555554 > [ 18.362261] Call Trace: > [ 18.362481] <TASK> > [ 18.362671] ? __die+0x23/0x70 > [ 18.362964] ? page_fault_oops+0xc2/0x160 > [ 18.363318] ? exc_page_fault+0xad/0xc0 > [ 18.363680] ? asm_exc_page_fault+0x26/0x30 > [ 18.364056] ? move_module+0x3cc/0x8a0 > [ 18.364398] ? kasan_check_range+0xba/0x1b0 > [ 18.364755] __asan_memcpy+0x3c/0x60 > [ 18.365074] move_module+0x3cc/0x8a0 > [ 18.365386] layout_and_allocate.constprop.0+0x3d5/0x720 > [ 18.365841] ? early_mod_check+0x3dc/0x510 > [ 18.366195] load_module+0x72/0x1850 > [ 18.366509] ? __pfx_kernel_read_file+0x10/0x10 > [ 18.366918] ? vm_mmap_pgoff+0x21c/0x2d0 > [ 18.367262] init_module_from_file+0xd1/0x130 > [ 18.367638] ? __pfx_init_module_from_file+0x10/0x10 > [ 18.368073] ? __pfx__raw_spin_lock+0x10/0x10 > [ 18.368456] ? __pfx_cred_has_capability.isra.0+0x10/0x10 > [ 18.368938] idempotent_init_module+0x22c/0x790 > [ 18.369332] ? simple_getattr+0x6f/0x120 > [ 18.369676] ? __pfx_idempotent_init_module+0x10/0x10 > [ 18.370110] ? fdget+0x58/0x3a0 > [ 18.370393] ? security_capable+0x64/0xf0 > [ 18.370745] __x64_sys_finit_module+0xc2/0x140 > [ 18.371136] do_syscall_64+0x7d/0x160 > [ 18.371459] ? fdget_pos+0x1c8/0x4c0 > [ 18.371784] ? ksys_read+0xfd/0x1d0 > [ 18.372106] ? syscall_exit_to_user_mode+0x10/0x1f0 > [ 18.372525] ? do_syscall_64+0x89/0x160 > [ 18.372860] ? do_syscall_64+0x89/0x160 > [ 18.373194] ? do_syscall_64+0x89/0x160 > [ 18.373527] ? syscall_exit_to_user_mode+0x10/0x1f0 > [ 18.373952] ? do_syscall_64+0x89/0x160 > [ 18.374283] ? syscall_exit_to_user_mode+0x10/0x1f0 > [ 18.374701] ? do_syscall_64+0x89/0x160 > [ 18.375037] ? do_user_addr_fault+0x4a8/0xa40 > [ 18.375416] ? clear_bhb_loop+0x25/0x80 > [ 18.375748] ? clear_bhb_loop+0x25/0x80 > [ 18.376119] ? clear_bhb_loop+0x25/0x80 > [ 18.376450] entry_SYSCALL_64_after_hwframe+0x76/0x7e > > Fixes: 233e89322cbe ("alloc_tag: fix module allocation tags populated area calculation") > Reported-by: Ben Greear <greearb@candelatech.com> > Closes: https://lore.kernel.org/all/1ba0cc57-e2ed-caa2-1241-aa5615bee01f@candelatech.com/ > Signed-off-by: Hao Ge <gehao@kylinos.cn> > --- > v3: Adjusting the title because the previous one was a bit unclear. > Suren has pointed out that our condition for determining whether > to allocate shadow memory is unreasonable.We have adjusted our method > to use every 8 pages as an index (idx), and we will make decisions based > on this idx when determining whether to allocate shadow memory. > > v2: Add comments to facilitate understanding of the code. > Add align nr << PAGE_SHIFT to MODULE_ALIGN,even though kasan_alloc_module_shadow > already handles this internally,but to make the code more readable and user-friendly > > commit 233e89322cbe ("alloc_tag: fix module allocation > tags populated area calculation") is currently in the > mm-hotfixes-unstable branch, so this patch is > developed based on the mm-hotfixes-unstable branch. > --- > lib/alloc_tag.c | 23 +++++++++++++++++++++++ > 1 file changed, 23 insertions(+) > > diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c > index f942408b53ef..8bf04756887d 100644 > --- a/lib/alloc_tag.c > +++ b/lib/alloc_tag.c > @@ -10,6 +10,7 @@ > #include <linux/seq_buf.h> > #include <linux/seq_file.h> > #include <linux/vmalloc.h> > +#include <linux/math.h> > > #define ALLOCINFO_FILE_NAME "allocinfo" > #define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag)) > @@ -404,6 +405,9 @@ static int vm_module_tags_populate(void) > unsigned long phys_end = ALIGN_DOWN(module_tags.start_addr, PAGE_SIZE) + > (vm_module_tags->nr_pages << PAGE_SHIFT); > unsigned long new_end = module_tags.start_addr + module_tags.size; > + unsigned long phys_idx = (vm_module_tags->nr_pages + > + (2 << KASAN_SHADOW_SCALE_SHIFT) - 1) >> KASAN_SHADOW_SCALE_SHIFT; > + unsigned long new_idx = 0; > > if (phys_end < new_end) { > struct page **next_page = vm_module_tags->pages + vm_module_tags->nr_pages; > @@ -421,7 +425,26 @@ static int vm_module_tags_populate(void) > __free_page(next_page[i]); > return -ENOMEM; > } > + > vm_module_tags->nr_pages += nr; > + > + new_idx = (vm_module_tags->nr_pages + > + (2 << KASAN_SHADOW_SCALE_SHIFT) - 1) >> KASAN_SHADOW_SCALE_SHIFT; > + > + /* > + * Kasan allocates 1 byte of shadow for every 8 bytes of data. > + * When kasan_alloc_module_shadow allocates shadow memory, > + * its unit of allocation is a page. > + * Therefore, here we need to align to MODULE_ALIGN. > + * > + * For every KASAN_SHADOW_SCALE_SHIFT, a shadow page is allocated. > + * So, we determine whether to allocate based on whether the > + * number of pages falls within the scope of the same KASAN_SHADOW_SCALE_SHIFT. > + */ > + if (phys_idx != new_idx) > + kasan_alloc_module_shadow((void *)round_up(phys_end, MODULE_ALIGN), > + (new_idx - phys_idx) * MODULE_ALIGN, > + GFP_KERNEL); > } This seems overly-complicated. I was thinking something like this would work: static int vm_module_tags_populate(void) { unsigned long phys_end = ALIGN_DOWN(module_tags.start_addr, PAGE_SIZE) + (vm_module_tags->nr_pages << PAGE_SHIFT); unsigned long new_end = module_tags.start_addr + module_tags.size; if (phys_end < new_end) { struct page **next_page = vm_module_tags->pages + vm_module_tags->nr_pages; + unsigned long old_shadow_end = ALIGN(phys_end, MODULE_ALIGN); + unsigned long new_shadow_end = ALIGN(new_end, MODULE_ALIGN); unsigned long more_pages; unsigned long nr; more_pages = ALIGN(new_end - phys_end, PAGE_SIZE) >> PAGE_SHIFT; nr = alloc_pages_bulk_array_node(GFP_KERNEL | __GFP_NOWARN, NUMA_NO_NODE, more_pages, next_page); if (nr < more_pages || vmap_pages_range(phys_end, phys_end + (nr << PAGE_SHIFT), PAGE_KERNEL, next_page, PAGE_SHIFT) < 0) { /* Clean up and error out */ for (int i = 0; i < nr; i++) __free_page(next_page[i]); return -ENOMEM; } vm_module_tags->nr_pages += nr; + if (old_shadow_end < new_shadow_end) + kasan_alloc_module_shadow((void *)old_shadow_end, + new_shadow_end - old_shadow_end + GFP_KERNEL); } WDYT? > > /* > -- > 2.25.1 >
Hi Suren On 12/12/24 00:32, Suren Baghdasaryan wrote: > On Tue, Dec 10, 2024 at 6:58 PM Hao Ge <hao.ge@linux.dev> wrote: >> From: Hao Ge <gehao@kylinos.cn> >> >> When CONFIG_KASAN is enabled but CONFIG_KASAN_VMALLOC >> is not enabled, we may encounter a panic during system boot. >> >> Because we haven't allocated pages and created mappings >> for the shadow memory corresponding to module_tags region, >> similar to how it is done for execmem_vmalloc. >> >> The difference is that our module_tags are allocated on demand, >> so similarly,we also need to allocate shadow memory regions on demand. >> However, we still need to adhere to the MODULE_ALIGN principle. >> >> Here is the log for panic: >> >> [ 18.349421] BUG: unable to handle page fault for address: fffffbfff8092000 >> [ 18.350016] #PF: supervisor read access in kernel mode >> [ 18.350459] #PF: error_code(0x0000) - not-present page >> [ 18.350904] PGD 20fe52067 P4D 219dc8067 PUD 219dc4067 PMD 102495067 PTE 0 >> [ 18.351484] Oops: Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI >> [ 18.351961] CPU: 5 UID: 0 PID: 1 Comm: systemd Not tainted 6.13.0-rc1+ #3 >> [ 18.352533] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 >> [ 18.353494] RIP: 0010:kasan_check_range+0xba/0x1b0 >> [ 18.353931] Code: 8d 5a 07 4c 0f 49 da 49 c1 fb 03 45 85 db 0f 84 dd 00 00 00 45 89 db 4a 8d 14 d8 eb 0d 48 83 c0 08 48 39 c2 0f 84 c1 00 00 00 <48> 83 38 00 74 ed 48 8d 50 08 eb 0d 48 83 c0 01 48 39 d0 0f 84 90 >> [ 18.355484] RSP: 0018:ff11000101877958 EFLAGS: 00010206 >> [ 18.355937] RAX: fffffbfff8092000 RBX: fffffbfff809201e RCX: ffffffff82a7ceac >> [ 18.356542] RDX: fffffbfff8092018 RSI: 00000000000000f0 RDI: ffffffffc0490000 >> [ 18.357153] RBP: fffffbfff8092000 R08: 0000000000000001 R09: fffffbfff809201d >> [ 18.357756] R10: ffffffffc04900ef R11: 0000000000000003 R12: ffffffffc0490000 >> [ 18.358365] R13: ff11000101877b48 R14: ffffffffc0490000 R15: 000000000000002c >> [ 18.358968] FS: 00007f9bd13c5940(0000) GS:ff110001eb480000(0000) knlGS:0000000000000000 >> [ 18.359648] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 >> [ 18.360178] CR2: fffffbfff8092000 CR3: 0000000109214004 CR4: 0000000000771ef0 >> [ 18.360790] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 >> [ 18.361404] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 >> [ 18.362020] PKRU: 55555554 >> [ 18.362261] Call Trace: >> [ 18.362481] <TASK> >> [ 18.362671] ? __die+0x23/0x70 >> [ 18.362964] ? page_fault_oops+0xc2/0x160 >> [ 18.363318] ? exc_page_fault+0xad/0xc0 >> [ 18.363680] ? asm_exc_page_fault+0x26/0x30 >> [ 18.364056] ? move_module+0x3cc/0x8a0 >> [ 18.364398] ? kasan_check_range+0xba/0x1b0 >> [ 18.364755] __asan_memcpy+0x3c/0x60 >> [ 18.365074] move_module+0x3cc/0x8a0 >> [ 18.365386] layout_and_allocate.constprop.0+0x3d5/0x720 >> [ 18.365841] ? early_mod_check+0x3dc/0x510 >> [ 18.366195] load_module+0x72/0x1850 >> [ 18.366509] ? __pfx_kernel_read_file+0x10/0x10 >> [ 18.366918] ? vm_mmap_pgoff+0x21c/0x2d0 >> [ 18.367262] init_module_from_file+0xd1/0x130 >> [ 18.367638] ? __pfx_init_module_from_file+0x10/0x10 >> [ 18.368073] ? __pfx__raw_spin_lock+0x10/0x10 >> [ 18.368456] ? __pfx_cred_has_capability.isra.0+0x10/0x10 >> [ 18.368938] idempotent_init_module+0x22c/0x790 >> [ 18.369332] ? simple_getattr+0x6f/0x120 >> [ 18.369676] ? __pfx_idempotent_init_module+0x10/0x10 >> [ 18.370110] ? fdget+0x58/0x3a0 >> [ 18.370393] ? security_capable+0x64/0xf0 >> [ 18.370745] __x64_sys_finit_module+0xc2/0x140 >> [ 18.371136] do_syscall_64+0x7d/0x160 >> [ 18.371459] ? fdget_pos+0x1c8/0x4c0 >> [ 18.371784] ? ksys_read+0xfd/0x1d0 >> [ 18.372106] ? syscall_exit_to_user_mode+0x10/0x1f0 >> [ 18.372525] ? do_syscall_64+0x89/0x160 >> [ 18.372860] ? do_syscall_64+0x89/0x160 >> [ 18.373194] ? do_syscall_64+0x89/0x160 >> [ 18.373527] ? syscall_exit_to_user_mode+0x10/0x1f0 >> [ 18.373952] ? do_syscall_64+0x89/0x160 >> [ 18.374283] ? syscall_exit_to_user_mode+0x10/0x1f0 >> [ 18.374701] ? do_syscall_64+0x89/0x160 >> [ 18.375037] ? do_user_addr_fault+0x4a8/0xa40 >> [ 18.375416] ? clear_bhb_loop+0x25/0x80 >> [ 18.375748] ? clear_bhb_loop+0x25/0x80 >> [ 18.376119] ? clear_bhb_loop+0x25/0x80 >> [ 18.376450] entry_SYSCALL_64_after_hwframe+0x76/0x7e >> >> Fixes: 233e89322cbe ("alloc_tag: fix module allocation tags populated area calculation") >> Reported-by: Ben Greear <greearb@candelatech.com> >> Closes: https://lore.kernel.org/all/1ba0cc57-e2ed-caa2-1241-aa5615bee01f@candelatech.com/ >> Signed-off-by: Hao Ge <gehao@kylinos.cn> >> --- >> v3: Adjusting the title because the previous one was a bit unclear. >> Suren has pointed out that our condition for determining whether >> to allocate shadow memory is unreasonable.We have adjusted our method >> to use every 8 pages as an index (idx), and we will make decisions based >> on this idx when determining whether to allocate shadow memory. >> >> v2: Add comments to facilitate understanding of the code. >> Add align nr << PAGE_SHIFT to MODULE_ALIGN,even though kasan_alloc_module_shadow >> already handles this internally,but to make the code more readable and user-friendly >> >> commit 233e89322cbe ("alloc_tag: fix module allocation >> tags populated area calculation") is currently in the >> mm-hotfixes-unstable branch, so this patch is >> developed based on the mm-hotfixes-unstable branch. >> --- >> lib/alloc_tag.c | 23 +++++++++++++++++++++++ >> 1 file changed, 23 insertions(+) >> >> diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c >> index f942408b53ef..8bf04756887d 100644 >> --- a/lib/alloc_tag.c >> +++ b/lib/alloc_tag.c >> @@ -10,6 +10,7 @@ >> #include <linux/seq_buf.h> >> #include <linux/seq_file.h> >> #include <linux/vmalloc.h> >> +#include <linux/math.h> >> >> #define ALLOCINFO_FILE_NAME "allocinfo" >> #define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag)) >> @@ -404,6 +405,9 @@ static int vm_module_tags_populate(void) >> unsigned long phys_end = ALIGN_DOWN(module_tags.start_addr, PAGE_SIZE) + >> (vm_module_tags->nr_pages << PAGE_SHIFT); >> unsigned long new_end = module_tags.start_addr + module_tags.size; >> + unsigned long phys_idx = (vm_module_tags->nr_pages + >> + (2 << KASAN_SHADOW_SCALE_SHIFT) - 1) >> KASAN_SHADOW_SCALE_SHIFT; >> + unsigned long new_idx = 0; >> >> if (phys_end < new_end) { >> struct page **next_page = vm_module_tags->pages + vm_module_tags->nr_pages; >> @@ -421,7 +425,26 @@ static int vm_module_tags_populate(void) >> __free_page(next_page[i]); >> return -ENOMEM; >> } >> + >> vm_module_tags->nr_pages += nr; >> + >> + new_idx = (vm_module_tags->nr_pages + >> + (2 << KASAN_SHADOW_SCALE_SHIFT) - 1) >> KASAN_SHADOW_SCALE_SHIFT; >> + >> + /* >> + * Kasan allocates 1 byte of shadow for every 8 bytes of data. >> + * When kasan_alloc_module_shadow allocates shadow memory, >> + * its unit of allocation is a page. >> + * Therefore, here we need to align to MODULE_ALIGN. >> + * >> + * For every KASAN_SHADOW_SCALE_SHIFT, a shadow page is allocated. >> + * So, we determine whether to allocate based on whether the >> + * number of pages falls within the scope of the same KASAN_SHADOW_SCALE_SHIFT. >> + */ >> + if (phys_idx != new_idx) >> + kasan_alloc_module_shadow((void *)round_up(phys_end, MODULE_ALIGN), >> + (new_idx - phys_idx) * MODULE_ALIGN, >> + GFP_KERNEL); >> } > This seems overly-complicated. I was thinking something like this would work: > > static int vm_module_tags_populate(void) > { > unsigned long phys_end = ALIGN_DOWN(module_tags.start_addr, PAGE_SIZE) + > (vm_module_tags->nr_pages << PAGE_SHIFT); > unsigned long new_end = module_tags.start_addr + module_tags.size; > > if (phys_end < new_end) { > struct page **next_page = vm_module_tags->pages + > vm_module_tags->nr_pages; > + unsigned long old_shadow_end = ALIGN(phys_end, MODULE_ALIGN); > + unsigned long new_shadow_end = ALIGN(new_end, MODULE_ALIGN); > unsigned long more_pages; > unsigned long nr; > > more_pages = ALIGN(new_end - phys_end, PAGE_SIZE) >> PAGE_SHIFT; > nr = alloc_pages_bulk_array_node(GFP_KERNEL | __GFP_NOWARN, > NUMA_NO_NODE, > more_pages, next_page); > if (nr < more_pages || > vmap_pages_range(phys_end, phys_end + (nr << > PAGE_SHIFT), PAGE_KERNEL, > next_page, PAGE_SHIFT) < 0) { > /* Clean up and error out */ > for (int i = 0; i < nr; i++) > __free_page(next_page[i]); > return -ENOMEM; > } > vm_module_tags->nr_pages += nr; > + if (old_shadow_end < new_shadow_end) > + kasan_alloc_module_shadow((void *)old_shadow_end, > + new_shadow_end - old_shadow_end > + GFP_KERNEL); > } > > WDYT? Yes, it's much simpler this way. I'll verify for accuracy,If there are no issues, I'll release the V4 version and add your "Suggested-by". Thanks Best Regards Hao > >> /* >> -- >> 2.25.1 >>
From: Hao Ge <gehao@kylinos.cn>
When CONFIG_KASAN is enabled but CONFIG_KASAN_VMALLOC
is not enabled, we may encounter a panic during system boot.
Because we haven't allocated pages and created mappings
for the shadow memory corresponding to module_tags region,
similar to how it is done for execmem_vmalloc.
The difference is that our module_tags are allocated on demand,
so similarly,we also need to allocate shadow memory regions on demand.
However, we still need to adhere to the MODULE_ALIGN principle.
Here is the log for panic:
[ 18.349421] BUG: unable to handle page fault for address: fffffbfff8092000
[ 18.350016] #PF: supervisor read access in kernel mode
[ 18.350459] #PF: error_code(0x0000) - not-present page
[ 18.350904] PGD 20fe52067 P4D 219dc8067 PUD 219dc4067 PMD 102495067 PTE 0
[ 18.351484] Oops: Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI
[ 18.351961] CPU: 5 UID: 0 PID: 1 Comm: systemd Not tainted 6.13.0-rc1+ #3
[ 18.352533] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[ 18.353494] RIP: 0010:kasan_check_range+0xba/0x1b0
[ 18.353931] Code: 8d 5a 07 4c 0f 49 da 49 c1 fb 03 45 85 db 0f 84 dd 00 00 00 45 89 db 4a 8d 14 d8 eb 0d 48 83 c0 08 48 39 c2 0f 84 c1 00 00 00 <48> 83 38 00 74 ed 48 8d 50 08 eb 0d 48 83 c0 01 48 39 d0 0f 84 90
[ 18.355484] RSP: 0018:ff11000101877958 EFLAGS: 00010206
[ 18.355937] RAX: fffffbfff8092000 RBX: fffffbfff809201e RCX: ffffffff82a7ceac
[ 18.356542] RDX: fffffbfff8092018 RSI: 00000000000000f0 RDI: ffffffffc0490000
[ 18.357153] RBP: fffffbfff8092000 R08: 0000000000000001 R09: fffffbfff809201d
[ 18.357756] R10: ffffffffc04900ef R11: 0000000000000003 R12: ffffffffc0490000
[ 18.358365] R13: ff11000101877b48 R14: ffffffffc0490000 R15: 000000000000002c
[ 18.358968] FS: 00007f9bd13c5940(0000) GS:ff110001eb480000(0000) knlGS:0000000000000000
[ 18.359648] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 18.360178] CR2: fffffbfff8092000 CR3: 0000000109214004 CR4: 0000000000771ef0
[ 18.360790] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 18.361404] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 18.362020] PKRU: 55555554
[ 18.362261] Call Trace:
[ 18.362481] <TASK>
[ 18.362671] ? __die+0x23/0x70
[ 18.362964] ? page_fault_oops+0xc2/0x160
[ 18.363318] ? exc_page_fault+0xad/0xc0
[ 18.363680] ? asm_exc_page_fault+0x26/0x30
[ 18.364056] ? move_module+0x3cc/0x8a0
[ 18.364398] ? kasan_check_range+0xba/0x1b0
[ 18.364755] __asan_memcpy+0x3c/0x60
[ 18.365074] move_module+0x3cc/0x8a0
[ 18.365386] layout_and_allocate.constprop.0+0x3d5/0x720
[ 18.365841] ? early_mod_check+0x3dc/0x510
[ 18.366195] load_module+0x72/0x1850
[ 18.366509] ? __pfx_kernel_read_file+0x10/0x10
[ 18.366918] ? vm_mmap_pgoff+0x21c/0x2d0
[ 18.367262] init_module_from_file+0xd1/0x130
[ 18.367638] ? __pfx_init_module_from_file+0x10/0x10
[ 18.368073] ? __pfx__raw_spin_lock+0x10/0x10
[ 18.368456] ? __pfx_cred_has_capability.isra.0+0x10/0x10
[ 18.368938] idempotent_init_module+0x22c/0x790
[ 18.369332] ? simple_getattr+0x6f/0x120
[ 18.369676] ? __pfx_idempotent_init_module+0x10/0x10
[ 18.370110] ? fdget+0x58/0x3a0
[ 18.370393] ? security_capable+0x64/0xf0
[ 18.370745] __x64_sys_finit_module+0xc2/0x140
[ 18.371136] do_syscall_64+0x7d/0x160
[ 18.371459] ? fdget_pos+0x1c8/0x4c0
[ 18.371784] ? ksys_read+0xfd/0x1d0
[ 18.372106] ? syscall_exit_to_user_mode+0x10/0x1f0
[ 18.372525] ? do_syscall_64+0x89/0x160
[ 18.372860] ? do_syscall_64+0x89/0x160
[ 18.373194] ? do_syscall_64+0x89/0x160
[ 18.373527] ? syscall_exit_to_user_mode+0x10/0x1f0
[ 18.373952] ? do_syscall_64+0x89/0x160
[ 18.374283] ? syscall_exit_to_user_mode+0x10/0x1f0
[ 18.374701] ? do_syscall_64+0x89/0x160
[ 18.375037] ? do_user_addr_fault+0x4a8/0xa40
[ 18.375416] ? clear_bhb_loop+0x25/0x80
[ 18.375748] ? clear_bhb_loop+0x25/0x80
[ 18.376119] ? clear_bhb_loop+0x25/0x80
[ 18.376450] entry_SYSCALL_64_after_hwframe+0x76/0x7e
Fixes: 233e89322cbe ("alloc_tag: fix module allocation tags populated area calculation")
Reported-by: Ben Greear <greearb@candelatech.com>
Closes: https://lore.kernel.org/all/1ba0cc57-e2ed-caa2-1241-aa5615bee01f@candelatech.com/
Suggested-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Hao Ge <gehao@kylinos.cn>
---
v4: Based on Suren's suggestion for modification (to make the code simpler),
modify the code.
Update the comments in the code due to the modifications made to the code.
Add Suggested-by: Suren Baghdasaryan <surenb@google.com>
v3: Adjusting the title because the previous one was a bit unclear.
Suren has pointed out that our condition for determining whether
to allocate shadow memory is unreasonable.We have adjusted our method
to use every 8 pages as an index (idx), and we will make decisions based
on this idx when determining whether to allocate shadow memory.
v2: Add comments to facilitate understanding of the code.
Add align nr << PAGE_SHIFT to MODULE_ALIGN,even though kasan_alloc_module_shadow
already handles this internally,but to make the code more readable and user-friendly
commit 233e89322cbe ("alloc_tag: fix module allocation
tags populated area calculation") is currently in the
mm-hotfixes-unstable branch, so this patch is
developed based on the mm-hotfixes-unstable branch.
---
lib/alloc_tag.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index f942408b53ef..c5bdfa297a35 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -407,6 +407,8 @@ static int vm_module_tags_populate(void)
if (phys_end < new_end) {
struct page **next_page = vm_module_tags->pages + vm_module_tags->nr_pages;
+ unsigned long old_shadow_end = ALIGN(phys_end, MODULE_ALIGN);
+ unsigned long new_shadow_end = ALIGN(new_end, MODULE_ALIGN);
unsigned long more_pages;
unsigned long nr;
@@ -421,7 +423,19 @@ static int vm_module_tags_populate(void)
__free_page(next_page[i]);
return -ENOMEM;
}
+
vm_module_tags->nr_pages += nr;
+
+ /*
+ * Kasan allocates 1 byte of shadow for every 8 bytes of data.
+ * When kasan_alloc_module_shadow allocates shadow memory,
+ * its unit of allocation is a page.
+ * Therefore, here we need to align to MODULE_ALIGN.
+ */
+ if (old_shadow_end < new_shadow_end)
+ kasan_alloc_module_shadow((void *)old_shadow_end,
+ new_shadow_end - old_shadow_end,
+ GFP_KERNEL);
}
/*
--
2.25.1
© 2016 - 2024 Red Hat, Inc.