From nobody Sat Feb 7 22:07:43 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 5E3C81BDAA1; Sun, 24 Nov 2024 12:46:03 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1732452363; cv=none; b=qqS9174Mwi3Kdy/TsuXADKyl6EPk2+oEG+V1DQfNHYR5cdiYb2e4vX5Uls3AwGyva008fqdaQQXCwiaisP7JDwY/LiOiHqJaBw0VW9Fp5JAu9IuN++K1sN73KXzDhgU4tdfn8twO3apxqGrMB0d46evNZi3ob5xxeESCow3krz0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1732452363; c=relaxed/simple; bh=ER/TrgD1m4ShjWIkfP2C75UZ40CV5OXNahh1dFNzNmA=; h=From:To:Cc:Subject:Date:Message-ID:MIME-Version; b=DKQuVsf5KdetmkYU5nFpK4lH9pgxk0tI86ZZyrUOPLiU3v/Vuz4PnS1Av+58l2/8HK/pVUFSAjz/gTHW2dDd6Eg0LdhTvaehm+5yjSlDs6INham+QqyjrkmPsbC1RNWnJQlyli2+UgV7VCK3jBpTyiYN0o3GoA84HbsXGwFe76M= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=pJtHwxMq; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="pJtHwxMq" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 094D4C4CECC; Sun, 24 Nov 2024 12:46:01 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1732452363; bh=ER/TrgD1m4ShjWIkfP2C75UZ40CV5OXNahh1dFNzNmA=; h=From:To:Cc:Subject:Date:From; b=pJtHwxMqCuhBz4eqywTRDCwIjdDRJput2ItZFmkz3Ge1u8yYcla/Fxs5fW9oLYclI MW3Xj/NpdMfZT1rPgPmUG/RKN6pupJ5bOnpodQnALZbma6wvq7n2CgA28rRG5P47WR NWBAaKP94fGBMkE8aMhYUzERcsrtHT0XiODMzvMDCE6tMZE7XeJzynNNcK3T4jhJKH MKxLcugev+0ESdiA4ptpn87/0jnSNnVLCI2P2KXrBeJmM712jRvAruIiFWxUIzIsAn lEsJEO1gAOUXnWPA/WyrEyrTdo0sy5r/5phoHFS11JqPWYlYnJ1ggI7lmYK5D06xYK uom8sb4d6SLVQ== From: Sasha Levin To: linux-kernel@vger.kernel.org, stable@vger.kernel.org Cc: Marco Elver , Ran Xiaokai , Sasha Levin , bigeasy@linutronix.de, clrkwllms@kernel.org, rostedt@goodmis.org, kasan-dev@googlegroups.com, linux-rt-devel@lists.linux.dev Subject: [PATCH AUTOSEL 5.10] kcsan: Turn report_filterlist_lock into a raw_spinlock Date: Sun, 24 Nov 2024 07:46:00 -0500 Message-ID: <20241124124600.3337916-1-sashal@kernel.org> X-Mailer: git-send-email 2.43.0 Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-stable: review X-Patchwork-Hint: Ignore X-stable-base: Linux 5.10.230 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Marco Elver [ Upstream commit 59458fa4ddb47e7891c61b4a928d13d5f5b00aa0 ] Ran Xiaokai reports that with a KCSAN-enabled PREEMPT_RT kernel, we can see splats like: | BUG: sleeping function called from invalid context at kernel/locking/spin= lock_rt.c:48 | in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/1 | preempt_count: 10002, expected: 0 | RCU nest depth: 0, expected: 0 | no locks held by swapper/1/0. | irq event stamp: 156674 | hardirqs last enabled at (156673): [] do_idle+0x1f9/0x= 240 | hardirqs last disabled at (156674): [] sysvec_apic_time= r_interrupt+0x14/0xc0 | softirqs last enabled at (0): [] copy_process+0xfc7/0x= 4b60 | softirqs last disabled at (0): [<0000000000000000>] 0x0 | Preemption disabled at: | [] paint_ptr+0x2a/0x90 | CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Not tainted 6.11.0+ #3 | Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-= ga698c8995f-prebuilt.qemu.org 04/01/2014 | Call Trace: | | dump_stack_lvl+0x7e/0xc0 | dump_stack+0x1d/0x30 | __might_resched+0x1a2/0x270 | rt_spin_lock+0x68/0x170 | kcsan_skip_report_debugfs+0x43/0xe0 | print_report+0xb5/0x590 | kcsan_report_known_origin+0x1b1/0x1d0 | kcsan_setup_watchpoint+0x348/0x650 | __tsan_unaligned_write1+0x16d/0x1d0 | hrtimer_interrupt+0x3d6/0x430 | __sysvec_apic_timer_interrupt+0xe8/0x3a0 | sysvec_apic_timer_interrupt+0x97/0xc0 | On a detected data race, KCSAN's reporting logic checks if it should filter the report. That list is protected by the report_filterlist_lock *non-raw* spinlock which may sleep on RT kernels. Since KCSAN may report data races in any context, convert it to a raw_spinlock. This requires being careful about when to allocate memory for the filter list itself which can be done via KCSAN's debugfs interface. Concurrent modification of the filter list via debugfs should be rare: the chosen strategy is to optimistically pre-allocate memory before the critical section and discard if unused. Link: https://lore.kernel.org/all/20240925143154.2322926-1-ranxiaokai627@16= 3.com/ Reported-by: Ran Xiaokai Tested-by: Ran Xiaokai Signed-off-by: Marco Elver Signed-off-by: Sasha Levin --- kernel/kcsan/debugfs.c | 74 ++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 38 deletions(-) diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c index 62a52be8f6ba9..6a4ecd1a6fa5b 100644 --- a/kernel/kcsan/debugfs.c +++ b/kernel/kcsan/debugfs.c @@ -41,14 +41,8 @@ static struct { int used; /* number of elements used */ bool sorted; /* if elements are sorted */ bool whitelist; /* if list is a blacklist or whitelist */ -} report_filterlist =3D { - .addrs =3D NULL, - .size =3D 8, /* small initial size */ - .used =3D 0, - .sorted =3D false, - .whitelist =3D false, /* default is blacklist */ -}; -static DEFINE_SPINLOCK(report_filterlist_lock); +} report_filterlist; +static DEFINE_RAW_SPINLOCK(report_filterlist_lock); =20 /* * The microbenchmark allows benchmarking KCSAN core runtime only. To run @@ -105,7 +99,7 @@ bool kcsan_skip_report_debugfs(unsigned long func_addr) return false; func_addr -=3D offset; /* Get function start */ =20 - spin_lock_irqsave(&report_filterlist_lock, flags); + raw_spin_lock_irqsave(&report_filterlist_lock, flags); if (report_filterlist.used =3D=3D 0) goto out; =20 @@ -122,7 +116,7 @@ bool kcsan_skip_report_debugfs(unsigned long func_addr) ret =3D !ret; =20 out: - spin_unlock_irqrestore(&report_filterlist_lock, flags); + raw_spin_unlock_irqrestore(&report_filterlist_lock, flags); return ret; } =20 @@ -130,9 +124,9 @@ static void set_report_filterlist_whitelist(bool whitel= ist) { unsigned long flags; =20 - spin_lock_irqsave(&report_filterlist_lock, flags); + raw_spin_lock_irqsave(&report_filterlist_lock, flags); report_filterlist.whitelist =3D whitelist; - spin_unlock_irqrestore(&report_filterlist_lock, flags); + raw_spin_unlock_irqrestore(&report_filterlist_lock, flags); } =20 /* Returns 0 on success, error-code otherwise. */ @@ -140,6 +134,9 @@ static ssize_t insert_report_filterlist(const char *fun= c) { unsigned long flags; unsigned long addr =3D kallsyms_lookup_name(func); + unsigned long *delay_free =3D NULL; + unsigned long *new_addrs =3D NULL; + size_t new_size =3D 0; ssize_t ret =3D 0; =20 if (!addr) { @@ -147,32 +144,33 @@ static ssize_t insert_report_filterlist(const char *f= unc) return -ENOENT; } =20 - spin_lock_irqsave(&report_filterlist_lock, flags); +retry_alloc: + /* + * Check if we need an allocation, and re-validate under the lock. Since + * the report_filterlist_lock is a raw, cannot allocate under the lock. + */ + if (data_race(report_filterlist.used =3D=3D report_filterlist.size)) { + new_size =3D (report_filterlist.size ?: 4) * 2; + delay_free =3D new_addrs =3D kmalloc_array(new_size, sizeof(unsigned lon= g), GFP_KERNEL); + if (!new_addrs) + return -ENOMEM; + } =20 - if (report_filterlist.addrs =3D=3D NULL) { - /* initial allocation */ - report_filterlist.addrs =3D - kmalloc_array(report_filterlist.size, - sizeof(unsigned long), GFP_ATOMIC); - if (report_filterlist.addrs =3D=3D NULL) { - ret =3D -ENOMEM; - goto out; - } - } else if (report_filterlist.used =3D=3D report_filterlist.size) { - /* resize filterlist */ - size_t new_size =3D report_filterlist.size * 2; - unsigned long *new_addrs =3D - krealloc(report_filterlist.addrs, - new_size * sizeof(unsigned long), GFP_ATOMIC); - - if (new_addrs =3D=3D NULL) { - /* leave filterlist itself untouched */ - ret =3D -ENOMEM; - goto out; + raw_spin_lock_irqsave(&report_filterlist_lock, flags); + if (report_filterlist.used =3D=3D report_filterlist.size) { + /* Check we pre-allocated enough, and retry if not. */ + if (report_filterlist.used >=3D new_size) { + raw_spin_unlock_irqrestore(&report_filterlist_lock, flags); + kfree(new_addrs); /* kfree(NULL) is safe */ + delay_free =3D new_addrs =3D NULL; + goto retry_alloc; } =20 + if (report_filterlist.used) + memcpy(new_addrs, report_filterlist.addrs, report_filterlist.used * siz= eof(unsigned long)); + delay_free =3D report_filterlist.addrs; /* free the old list */ + report_filterlist.addrs =3D new_addrs; /* switch to the new list */ report_filterlist.size =3D new_size; - report_filterlist.addrs =3D new_addrs; } =20 /* Note: deduplicating should be done in userspace. */ @@ -180,9 +178,9 @@ static ssize_t insert_report_filterlist(const char *fun= c) kallsyms_lookup_name(func); report_filterlist.sorted =3D false; =20 -out: - spin_unlock_irqrestore(&report_filterlist_lock, flags); + raw_spin_unlock_irqrestore(&report_filterlist_lock, flags); =20 + kfree(delay_free); return ret; } =20 @@ -199,13 +197,13 @@ static int show_info(struct seq_file *file, void *v) } =20 /* show filter functions, and filter type */ - spin_lock_irqsave(&report_filterlist_lock, flags); + raw_spin_lock_irqsave(&report_filterlist_lock, flags); seq_printf(file, "\n%s functions: %s\n", report_filterlist.whitelist ? "whitelisted" : "blacklisted", report_filterlist.used =3D=3D 0 ? "none" : ""); for (i =3D 0; i < report_filterlist.used; ++i) seq_printf(file, " %ps\n", (void *)report_filterlist.addrs[i]); - spin_unlock_irqrestore(&report_filterlist_lock, flags); + raw_spin_unlock_irqrestore(&report_filterlist_lock, flags); =20 return 0; } --=20 2.43.0