[tip: locking/urgent] jump_label: Fix static_key_slow_dec() yet again

tip-bot2 for Peter Zijlstra posted 1 patch 1 year, 3 months ago
There is a newer version of this series
kernel/jump_label.c | 83 ++++++++++++++++++++++++++++----------------
1 file changed, 54 insertions(+), 29 deletions(-)
[tip: locking/urgent] jump_label: Fix static_key_slow_dec() yet again
Posted by tip-bot2 for Peter Zijlstra 1 year, 3 months ago
The following commit has been merged into the locking/urgent branch of tip:

Commit-ID:     de752774f38bb766941ed1bf910ba5a9f6cc6bf7
Gitweb:        https://git.kernel.org/tip/de752774f38bb766941ed1bf910ba5a9f6cc6bf7
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 07 Aug 2024 16:03:12 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Fri, 06 Sep 2024 16:29:22 +02:00

jump_label: Fix static_key_slow_dec() yet again

While commit 83ab38ef0a0b ("jump_label: Fix concurrency issues in
static_key_slow_dec()") fixed one problem, it created yet another,
notably the following is now possible:

  slow_dec
    if (try_dec) // dec_not_one-ish, false
    // enabled == 1
                                slow_inc
                                  if (inc_not_disabled) // inc_not_zero-ish
                                  // enabled == 2
                                    return

    guard((mutex)(&jump_label_mutex);
    if (atomic_cmpxchg(1,0)==1) // false, we're 2

                                slow_dec
                                  if (try-dec) // dec_not_one, true
                                  // enabled == 1
                                    return
    else
      try_dec() // dec_not_one, false
      WARN

Close this by creating two distinct operations, one dec_not_one()-like
for the fast path and one dec_and_test()-like for the slow path. Both
also taking the magic -1 value into account.

Thomas provided the more readable version with comments on.

Fixes: 83ab38ef0a0b ("jump_label: Fix concurrency issues in static_key_slow_dec()")
Reported-by: "Darrick J. Wong" <djwong@kernel.org>
Co-developed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/875xsc4ehr.ffs@tglx
---
 kernel/jump_label.c | 83 ++++++++++++++++++++++++++++----------------
 1 file changed, 54 insertions(+), 29 deletions(-)

diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 6dc76b5..0881fd2 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -168,8 +168,8 @@ bool static_key_slow_inc_cpuslocked(struct static_key *key)
 		jump_label_update(key);
 		/*
 		 * Ensure that when static_key_fast_inc_not_disabled() or
-		 * static_key_slow_try_dec() observe the positive value,
-		 * they must also observe all the text changes.
+		 * static_key_dec() observe the positive value, they must also
+		 * observe all the text changes.
 		 */
 		atomic_set_release(&key->enabled, 1);
 	} else {
@@ -250,49 +250,74 @@ void static_key_disable(struct static_key *key)
 }
 EXPORT_SYMBOL_GPL(static_key_disable);
 
-static bool static_key_slow_try_dec(struct static_key *key)
+static bool static_key_dec(struct static_key *key, bool dec_not_one)
 {
-	int v;
+	int v = atomic_read(&key->enabled);
 
-	/*
-	 * Go into the slow path if key::enabled is less than or equal than
-	 * one. One is valid to shut down the key, anything less than one
-	 * is an imbalance, which is handled at the call site.
-	 *
-	 * That includes the special case of '-1' which is set in
-	 * static_key_slow_inc_cpuslocked(), but that's harmless as it is
-	 * fully serialized in the slow path below. By the time this task
-	 * acquires the jump label lock the value is back to one and the
-	 * retry under the lock must succeed.
-	 */
-	v = atomic_read(&key->enabled);
 	do {
 		/*
-		 * Warn about the '-1' case though; since that means a
-		 * decrement is concurrent with a first (0->1) increment. IOW
-		 * people are trying to disable something that wasn't yet fully
-		 * enabled. This suggests an ordering problem on the user side.
+		 * Warn about the '-1' case; since that means a decrement is
+		 * concurrent with a first (0->1) increment. IOW people are
+		 * trying to disable something that wasn't yet fully enabled.
+		 * This suggests an ordering problem on the user side.
+		 *
+		 * Warn about the '0' case; simple underflow.
 		 */
-		WARN_ON_ONCE(v < 0);
-		if (v <= 1)
-			return false;
+		if (WARN_ON_ONCE(v <= 0))
+			return v;
+
+		if (dec_not_one && v == 1)
+			return v;
+
 	} while (!likely(atomic_try_cmpxchg(&key->enabled, &v, v - 1)));
 
-	return true;
+	return v;
+}
+
+/*
+ * Fastpath: Decrement if the reference count is greater than one
+ *
+ * Returns false, if the reference count is 1 or -1 to force the caller
+ * into the slowpath.
+ *
+ * The -1 case is to handle a decrement during a concurrent first enable,
+ * which sets the count to -1 in static_key_slow_inc_cpuslocked(). As the
+ * slow path is serialized the caller will observe 1 once it acquired the
+ * jump_label_mutex, so the slow path can succeed.
+ *
+ * Notably 0 (underflow) returns success such that it bails without doing
+ * anything.
+ */
+static bool static_key_dec_not_one(struct static_key *key)
+{
+	int v = static_key_dec(key, true);
+
+	return v != 1 && v != -1;
+}
+
+/*
+ * Slowpath: Decrement and test whether the refcount hit 0.
+ *
+ * Returns true if the refcount hit zero, i.e. the previous value was one.
+ */
+static bool static_key_dec_and_test(struct static_key *key)
+{
+	int v = static_key_dec(key, false);
+
+	lockdep_assert_held(&jump_label_mutex);
+	return v == 1;
 }
 
 static void __static_key_slow_dec_cpuslocked(struct static_key *key)
 {
 	lockdep_assert_cpus_held();
 
-	if (static_key_slow_try_dec(key))
+	if (static_key_dec_not_one(key))
 		return;
 
 	guard(mutex)(&jump_label_mutex);
-	if (atomic_cmpxchg(&key->enabled, 1, 0) == 1)
+	if (static_key_dec_and_test(key))
 		jump_label_update(key);
-	else
-		WARN_ON_ONCE(!static_key_slow_try_dec(key));
 }
 
 static void __static_key_slow_dec(struct static_key *key)
@@ -329,7 +354,7 @@ void __static_key_slow_dec_deferred(struct static_key *key,
 {
 	STATIC_KEY_CHECK_USE(key);
 
-	if (static_key_slow_try_dec(key))
+	if (static_key_dec_not_one(key))
 		return;
 
 	schedule_delayed_work(work, timeout);
Re: [tip: locking/urgent] jump_label: Fix static_key_slow_dec() yet again
Posted by Mark Brown 1 year, 3 months ago
On Fri, Sep 06, 2024 at 02:41:14PM -0000, tip-bot2 for Peter Zijlstra wrote:

> The following commit has been merged into the locking/urgent branch of tip:

> jump_label: Fix static_key_slow_dec() yet again
> 
> While commit 83ab38ef0a0b ("jump_label: Fix concurrency issues in
> static_key_slow_dec()") fixed one problem, it created yet another,
> notably the following is now possible:

This patch, which is now in -next appears to have caused the KVM unit
tests to start exploding badly on some arm64 systems (at least N1SDP and
Cavium TX2).  I've bisected the issue, but not analyzed it at all beyond
noting that the commit looks relevant to the failure.  None of the other
tests we run on these platforms seem to trigger the issue.

Before producing any output the tests trigger a warning:

<4>[   17.303495] ------------[ cut here ]------------
<4>[   17.308364] WARNING: CPU: 1 PID: 279 at kernel/jump_label.c:266 static_key_dec+0x68/0x74
<4>[   17.316706] Modules linked in: crct10dif_ce arm_spe_pmu coresight_replicator coresight_funnel coresight_tmc coresight_stm stm_core coresight_tpiu arm_cmn coresight cfg80211 rfkill fuse dm_mod ip_tables x_tables ipv6
<4>[   17.336080] CPU: 1 UID: 0 PID: 279 Comm: qemu-system-aar Not tainted 6.11.0-rc7-00006-g3a0c7230588b #10
<4>[   17.345719] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
<4>[   17.352927] pc : static_key_dec+0x68/0x74
<4>[   17.357183] lr : __static_key_slow_dec_cpuslocked+0x24/0x84
<4>[   17.363003] sp : ffff800083c8ba80

....

<4>[   17.440381] Call trace:
<4>[   17.443074]  static_key_dec+0x68/0x74
<4>[   17.446984]  static_key_slow_dec+0x2c/0x80
<4>[   17.451327]  preempt_notifier_dec+0x18/0x24
<4>[   17.455759]  kvm_destroy_vm+0x208/0x2b0
<4>[   17.459845]  kvm_vm_release+0x80/0xb0
<4>[   17.463754]  __fput+0xcc/0x2d4
<4>[   17.467057]  ____fput+0x10/0x1c
<4>[   17.470446]  task_work_run+0x78/0xd4
<4>[   17.474268]  do_exit+0x2c8/0x90c

then the test times out and all the remaining cores splat on:

4>[   18.067930] registering preempt_notifier while notifiers disabled
<4>[   18.067935] WARNING: CPU: 2 PID: 470 at kernel/sched/core.c:4729 preempt_notifier_register+0x24/0x58

The bisect seems to converge fairly smoothly:

git bisect start
# status: waiting for both good and bad commits
# bad: [6708132e80a2ced620bde9b9c36e426183544a23] Add linux-next specific files for 20240910
git bisect bad 6708132e80a2ced620bde9b9c36e426183544a23
# status: waiting for good commit(s), bad commit known
# good: [028ac237a57e1bcb07c7130b11527c0e025e4bef] Merge branch 'for-linux-next-fixes' of https://gitlab.freedesktop.org/drm/misc/kernel.git
git bisect good 028ac237a57e1bcb07c7130b11527c0e025e4bef
# good: [b66d58fce82c825b3dbb57a46b9a74f081ef7ec7] Merge branch 'main' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
git bisect good b66d58fce82c825b3dbb57a46b9a74f081ef7ec7
# good: [a636a90415dbc59f005369e3053996f859f0af50] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/libata/linux
git bisect good a636a90415dbc59f005369e3053996f859f0af50
# bad: [8e5ac35ddecbeddce79e915c226baaf577a2be6e] Merge branch 'driver-core-next' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git
git bisect bad 8e5ac35ddecbeddce79e915c226baaf577a2be6e
# bad: [1bcadc80ec6a46fb7193999935aaa299b4916569] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git
git bisect bad 1bcadc80ec6a46fb7193999935aaa299b4916569
# good: [c2d0e416bdd9c83db3c9bb1f19433d5ba34e18c2] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git
git bisect good c2d0e416bdd9c83db3c9bb1f19433d5ba34e18c2
# bad: [723da3b00882e1d13038fc48ba2602af9de4dd2e] Merge branch into tip/master: 'locking/core'
git bisect bad 723da3b00882e1d13038fc48ba2602af9de4dd2e
# bad: [a70a5c33a65ee54048e4ae479e3479d765a1bbc2] Merge branch into tip/master: 'core/core'
git bisect bad a70a5c33a65ee54048e4ae479e3479d765a1bbc2
# good: [85e511df3cec46021024176672a748008ed135bf] sched/eevdf: Allow shorter slices to wakeup-preempt
git bisect good 85e511df3cec46021024176672a748008ed135bf
# good: [20f13385b5836d00d64698748565fc6d3ce9b419] posix-timers: Consolidate timer setup
git bisect good 20f13385b5836d00d64698748565fc6d3ce9b419
# good: [42db2c2cb5ac3572380a9489b8f8bbe0e534dfc7] timekeeping: Use time_after() in timekeeping_check_update()
git bisect good 42db2c2cb5ac3572380a9489b8f8bbe0e534dfc7
# bad: [3a0c7230588b40caf1d81270ceaa3aa5c0355bc0] Merge branch into tip/master: 'perf/urgent'
git bisect bad 3a0c7230588b40caf1d81270ceaa3aa5c0355bc0
# bad: [de752774f38bb766941ed1bf910ba5a9f6cc6bf7] jump_label: Fix static_key_slow_dec() yet again
git bisect bad de752774f38bb766941ed1bf910ba5a9f6cc6bf7
# good: [fe513c2ef0a172a58f158e2e70465c4317f0a9a2] static_call: Replace pointless WARN_ON() in static_call_module_notify()
git bisect good fe513c2ef0a172a58f158e2e70465c4317f0a9a2
# first bad commit: [de752774f38bb766941ed1bf910ba5a9f6cc6bf7] jump_label: Fix static_key_slow_dec() yet again
Re: [tip: locking/urgent] jump_label: Fix static_key_slow_dec() yet again
Posted by Mark Rutland 1 year, 2 months ago
On Tue, Sep 10, 2024 at 09:16:20PM +0100, Mark Brown wrote:
> On Fri, Sep 06, 2024 at 02:41:14PM -0000, tip-bot2 for Peter Zijlstra wrote:
> 
> > The following commit has been merged into the locking/urgent branch of tip:
> 
> > jump_label: Fix static_key_slow_dec() yet again
> > 
> > While commit 83ab38ef0a0b ("jump_label: Fix concurrency issues in
> > static_key_slow_dec()") fixed one problem, it created yet another,
> > notably the following is now possible:
> 
> This patch, which is now in -next appears to have caused the KVM unit
> tests to start exploding badly on some arm64 systems (at least N1SDP and
> Cavium TX2).  I've bisected the issue, but not analyzed it at all beyond
> noting that the commit looks relevant to the failure.  None of the other
> tests we run on these platforms seem to trigger the issue.
> 
> Before producing any output the tests trigger a warning:

FWIW, I believe this has been fixed. The old version of the patch was
broken:

  de752774f38bb766 ("jump_label: Fix static_key_slow_dec() yet again")

... and I could get that to explode consistently when running the
kvm:smccc_filter test.

The version that eventually made it into tip locking/urgent works
fine for me:

  1d7f856c2ca449f0 ("jump_label: Fix static_key_slow_dec() yet again")

... and I don't see any warnings even if I repeatedly run the entire KVM
selftest suite.

Mark.

> <4>[   17.303495] ------------[ cut here ]------------
> <4>[   17.308364] WARNING: CPU: 1 PID: 279 at kernel/jump_label.c:266 static_key_dec+0x68/0x74
> <4>[   17.316706] Modules linked in: crct10dif_ce arm_spe_pmu coresight_replicator coresight_funnel coresight_tmc coresight_stm stm_core coresight_tpiu arm_cmn coresight cfg80211 rfkill fuse dm_mod ip_tables x_tables ipv6
> <4>[   17.336080] CPU: 1 UID: 0 PID: 279 Comm: qemu-system-aar Not tainted 6.11.0-rc7-00006-g3a0c7230588b #10
> <4>[   17.345719] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> <4>[   17.352927] pc : static_key_dec+0x68/0x74
> <4>[   17.357183] lr : __static_key_slow_dec_cpuslocked+0x24/0x84
> <4>[   17.363003] sp : ffff800083c8ba80
> 
> ....
> 
> <4>[   17.440381] Call trace:
> <4>[   17.443074]  static_key_dec+0x68/0x74
> <4>[   17.446984]  static_key_slow_dec+0x2c/0x80
> <4>[   17.451327]  preempt_notifier_dec+0x18/0x24
> <4>[   17.455759]  kvm_destroy_vm+0x208/0x2b0
> <4>[   17.459845]  kvm_vm_release+0x80/0xb0
> <4>[   17.463754]  __fput+0xcc/0x2d4
> <4>[   17.467057]  ____fput+0x10/0x1c
> <4>[   17.470446]  task_work_run+0x78/0xd4
> <4>[   17.474268]  do_exit+0x2c8/0x90c
> 
> then the test times out and all the remaining cores splat on:
> 
> 4>[   18.067930] registering preempt_notifier while notifiers disabled
> <4>[   18.067935] WARNING: CPU: 2 PID: 470 at kernel/sched/core.c:4729 preempt_notifier_register+0x24/0x58
> 
> The bisect seems to converge fairly smoothly:
> 
> git bisect start
> # status: waiting for both good and bad commits
> # bad: [6708132e80a2ced620bde9b9c36e426183544a23] Add linux-next specific files for 20240910
> git bisect bad 6708132e80a2ced620bde9b9c36e426183544a23
> # status: waiting for good commit(s), bad commit known
> # good: [028ac237a57e1bcb07c7130b11527c0e025e4bef] Merge branch 'for-linux-next-fixes' of https://gitlab.freedesktop.org/drm/misc/kernel.git
> git bisect good 028ac237a57e1bcb07c7130b11527c0e025e4bef
> # good: [b66d58fce82c825b3dbb57a46b9a74f081ef7ec7] Merge branch 'main' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
> git bisect good b66d58fce82c825b3dbb57a46b9a74f081ef7ec7
> # good: [a636a90415dbc59f005369e3053996f859f0af50] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/libata/linux
> git bisect good a636a90415dbc59f005369e3053996f859f0af50
> # bad: [8e5ac35ddecbeddce79e915c226baaf577a2be6e] Merge branch 'driver-core-next' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git
> git bisect bad 8e5ac35ddecbeddce79e915c226baaf577a2be6e
> # bad: [1bcadc80ec6a46fb7193999935aaa299b4916569] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git
> git bisect bad 1bcadc80ec6a46fb7193999935aaa299b4916569
> # good: [c2d0e416bdd9c83db3c9bb1f19433d5ba34e18c2] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git
> git bisect good c2d0e416bdd9c83db3c9bb1f19433d5ba34e18c2
> # bad: [723da3b00882e1d13038fc48ba2602af9de4dd2e] Merge branch into tip/master: 'locking/core'
> git bisect bad 723da3b00882e1d13038fc48ba2602af9de4dd2e
> # bad: [a70a5c33a65ee54048e4ae479e3479d765a1bbc2] Merge branch into tip/master: 'core/core'
> git bisect bad a70a5c33a65ee54048e4ae479e3479d765a1bbc2
> # good: [85e511df3cec46021024176672a748008ed135bf] sched/eevdf: Allow shorter slices to wakeup-preempt
> git bisect good 85e511df3cec46021024176672a748008ed135bf
> # good: [20f13385b5836d00d64698748565fc6d3ce9b419] posix-timers: Consolidate timer setup
> git bisect good 20f13385b5836d00d64698748565fc6d3ce9b419
> # good: [42db2c2cb5ac3572380a9489b8f8bbe0e534dfc7] timekeeping: Use time_after() in timekeeping_check_update()
> git bisect good 42db2c2cb5ac3572380a9489b8f8bbe0e534dfc7
> # bad: [3a0c7230588b40caf1d81270ceaa3aa5c0355bc0] Merge branch into tip/master: 'perf/urgent'
> git bisect bad 3a0c7230588b40caf1d81270ceaa3aa5c0355bc0
> # bad: [de752774f38bb766941ed1bf910ba5a9f6cc6bf7] jump_label: Fix static_key_slow_dec() yet again
> git bisect bad de752774f38bb766941ed1bf910ba5a9f6cc6bf7
> # good: [fe513c2ef0a172a58f158e2e70465c4317f0a9a2] static_call: Replace pointless WARN_ON() in static_call_module_notify()
> git bisect good fe513c2ef0a172a58f158e2e70465c4317f0a9a2
> # first bad commit: [de752774f38bb766941ed1bf910ba5a9f6cc6bf7] jump_label: Fix static_key_slow_dec() yet again