From nobody Tue Apr  7 04:21:17 2026
Received: from foss.arm.com (foss.arm.com [217.140.110.172])
	by smtp.subspace.kernel.org (Postfix) with ESMTP id 421BE38E5D1;
	Mon, 16 Mar 2026 10:03:02 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=217.140.110.172
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1773655386; cv=none;
 b=mUgdWAN4pueMHPXMoNDp/o3pJiIqNLMEqzSnCBONy9otKW/MuMYa373pajalgr9WoNR9nHZ6FMVKlm256XZNYm6SFk048kFxwQTx8zwTTKsGN+R4tWKWMYSSF1oNh1ENauMblUSJtH1/CDet8ek//rRB2BzI5H0JU6A6jTwttDc=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1773655386; c=relaxed/simple;
	bh=GllGj6xZhSDS16w8+jOsHbmWKwR3CFBW/xrXou7tlFU=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=Q7mPp1+sLZZiKRxSKJcAlSBC+ScXN920W+EVjmPAjYRqX1SC+f8ltJLBV/lrrwpykQ0HB9CL9PfeY9vTlMdEcLuPB6/mgGgl7MVeyOcKj286eQCRltVSIj1HLRXniQ9hXdRtfG2qIkZs/CNPrucCdfYelCpl63EL3ezmB4Tb4yM=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=arm.com;
 spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=arm.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=arm.com
Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14])
	by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 81AAD14BF;
	Mon, 16 Mar 2026 03:02:55 -0700 (PDT)
Received: from e127648.cambridge.arm.com (e127648.arm.com [10.1.28.15])
	by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 02BFE3F73B;
	Mon, 16 Mar 2026 03:02:58 -0700 (PDT)
From: Christian Loehle <christian.loehle@arm.com>
To: sched-ext@lists.linux.dev
Cc: linux-kernel@vger.kernel.org,
	linux-kselftest@vger.kernel.org,
	tj@kernel.org,
	void@manifault.com,
	arighi@nvidia.com,
	changwoo@igalia.com,
	mingo@redhat.com,
	peterz@infradead.org,
	shuah@kernel.org,
	dietmar.eggemann@arm.com,
	Christian Loehle <christian.loehle@arm.com>
Subject: [PATCH 1/2] sched_ext: Prevent SCX_KICK_WAIT deadlock by
 serialization
Date: Mon, 16 Mar 2026 10:02:48 +0000
Message-Id: <20260316100249.1651641-2-christian.loehle@arm.com>
X-Mailer: git-send-email 2.34.1
In-Reply-To: <20260316100249.1651641-1-christian.loehle@arm.com>
References: <20260316100249.1651641-1-christian.loehle@arm.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable

SCX_KICK_WAIT causes kick_cpus_irq_workfn() to busy-wait using
smp_cond_load_acquire() until the target CPU's current SCX task has been
context-switched out (its kick_sync counter advanced).

If multiple CPUs each issue SCX_KICK_WAIT targeting one another
concurrently =E2=80=94 e.g. CPU A waits for CPU B, B waits for CPU C, C wai=
ts for
CPU A =E2=80=94 all CPUs can end up wedged inside smp_cond_load_acquire()
simultaneously.  Because each victim CPU is spinning in hardirq/irq_work
context, it cannot reschedule, so no kick_sync counter ever advances and
the system deadlocks.

Fix this by serializing access to the wait loop behind a global raw
spinlock (scx_kick_wait_lock).  Only one CPU at a time may execute the
wait loop; any other CPU that has SCX_KICK_WAIT work to do and fails to
acquire the lock records itself in scx_kick_wait_pending and returns.
When the active waiter finishes and releases the lock, it replays the
pending set by re-queuing each pending CPU's kick_cpus_irq_work, ensuring
no wait request is silently dropped.

This is deliberately a coarse serialization: multiple simultaneous wait
operations now run sequentially, increasing latency.  In exchange,
deadlocks are impossible regardless of the cycle length (A->B->C->...->A).

Also clear scx_kick_wait_pending in free_kick_syncs() so that any stale
bits left by a CPU that deferred just as the scheduler exited are reset
before the next scheduler instance loads.

Fixes: 90e55164dad4 ("sched_ext: Implement SCX_KICK_WAIT")
Signed-off-by: Christian Loehle <christian.loehle@arm.com>
---
 kernel/sched/ext.c | 45 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 26a6ac2f8826..b63ae13d0486 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -89,6 +89,19 @@ struct scx_kick_syncs {
=20
 static DEFINE_PER_CPU(struct scx_kick_syncs __rcu *, scx_kick_syncs);
=20
+/*
+ * Serialize %SCX_KICK_WAIT processing across CPUs to avoid wait cycles.
+ * Callers failing to acquire @scx_kick_wait_lock defer by recording
+ * themselves in @scx_kick_wait_pending and are retriggered when the active
+ * waiter completes.
+ *
+ * Lock ordering: @scx_kick_wait_lock is always acquired before
+ * @scx_kick_wait_pending_lock; the two are never taken in the opposite or=
der.
+ */
+static DEFINE_RAW_SPINLOCK(scx_kick_wait_lock);
+static DEFINE_RAW_SPINLOCK(scx_kick_wait_pending_lock);
+static cpumask_t scx_kick_wait_pending;
+
 /*
  * Direct dispatch marker.
  *
@@ -4279,6 +4292,13 @@ static void free_kick_syncs(void)
 		if (to_free)
 			kvfree_rcu(to_free, rcu);
 	}
+
+	/*
+	 * Clear any CPUs that were waiting for the lock when the scheduler
+	 * exited.  Their irq_work has already returned so no in-flight
+	 * waiter can observe the stale bits on the next enable.
+	 */
+	cpumask_clear(&scx_kick_wait_pending);
 }
=20
 static void scx_disable_workfn(struct kthread_work *work)
@@ -5647,8 +5667,9 @@ static void kick_cpus_irq_workfn(struct irq_work *irq=
_work)
 	struct rq *this_rq =3D this_rq();
 	struct scx_rq *this_scx =3D &this_rq->scx;
 	struct scx_kick_syncs __rcu *ksyncs_pcpu =3D __this_cpu_read(scx_kick_syn=
cs);
-	bool should_wait =3D false;
+	bool should_wait =3D !cpumask_empty(this_scx->cpus_to_wait);
 	unsigned long *ksyncs;
+	s32 this_cpu =3D cpu_of(this_rq);
 	s32 cpu;
=20
 	if (unlikely(!ksyncs_pcpu)) {
@@ -5672,6 +5693,17 @@ static void kick_cpus_irq_workfn(struct irq_work *ir=
q_work)
 	if (!should_wait)
 		return;
=20
+	if (!raw_spin_trylock(&scx_kick_wait_lock)) {
+		raw_spin_lock(&scx_kick_wait_pending_lock);
+		cpumask_set_cpu(this_cpu, &scx_kick_wait_pending);
+		raw_spin_unlock(&scx_kick_wait_pending_lock);
+		return;
+	}
+
+	raw_spin_lock(&scx_kick_wait_pending_lock);
+	cpumask_clear_cpu(this_cpu, &scx_kick_wait_pending);
+	raw_spin_unlock(&scx_kick_wait_pending_lock);
+
 	for_each_cpu(cpu, this_scx->cpus_to_wait) {
 		unsigned long *wait_kick_sync =3D &cpu_rq(cpu)->scx.kick_sync;
=20
@@ -5686,11 +5718,20 @@ static void kick_cpus_irq_workfn(struct irq_work *i=
rq_work)
 		 * task is picked subsequently. The latter is necessary to break
 		 * the wait when $cpu is taken by a higher sched class.
 		 */
-		if (cpu !=3D cpu_of(this_rq))
+		if (cpu !=3D this_cpu)
 			smp_cond_load_acquire(wait_kick_sync, VAL !=3D ksyncs[cpu]);
=20
 		cpumask_clear_cpu(cpu, this_scx->cpus_to_wait);
 	}
+
+	raw_spin_unlock(&scx_kick_wait_lock);
+
+	raw_spin_lock(&scx_kick_wait_pending_lock);
+	for_each_cpu(cpu, &scx_kick_wait_pending) {
+		cpumask_clear_cpu(cpu, &scx_kick_wait_pending);
+		irq_work_queue(&cpu_rq(cpu)->scx.kick_cpus_irq_work);
+	}
+	raw_spin_unlock(&scx_kick_wait_pending_lock);
 }
=20
 /**
--=20
2.34.1

From nobody Tue Apr  7 04:21:17 2026
Received: from foss.arm.com (foss.arm.com [217.140.110.172])
	by smtp.subspace.kernel.org (Postfix) with ESMTP id 2472438E5D6;
	Mon, 16 Mar 2026 10:03:04 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=217.140.110.172
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1773655388; cv=none;
 b=BDf2W3hj7X4PiGZhXyeBN4GrPZzadBdeRWDZAW3qIGhrOnJ5p7+HV8D25pDhxeeYWwlbOo5xFTfftq3T/pL5ejhfD7h1Ni3AqPZ+SWV6/rXTX7YaLKw7Iww9ez0B5fxgUZiKkSYGh/hwjqvCJOosOzO149SxoxgcEywyGCCIujE=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1773655388; c=relaxed/simple;
	bh=VIF9kYfcgVSL3ByLqv2ACH4zHelFk2PGr95HIuYjMVs=;
	h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
	 MIME-Version:Content-Type;
 b=gAKoEtBn4SnfBxTKhkx7W0j3SyUKqcuG7p6G4q8ZHnUFaXs/EaRjN1WaEgGXozs9fuevdPa0AmtY1GFiMT4liu4zgizDYtLADNTUJcJf1P1TJuq3dz/qpc1dsQ/OdKfN93H+0Nhjwo0rIt3MzCFvOBPzB3inNFrXdaeMPSu2FNo=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=arm.com;
 spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=arm.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=arm.com
Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14])
	by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 4B8B71477;
	Mon, 16 Mar 2026 03:02:58 -0700 (PDT)
Received: from e127648.cambridge.arm.com (e127648.arm.com [10.1.28.15])
	by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 13C753F73B;
	Mon, 16 Mar 2026 03:03:01 -0700 (PDT)
From: Christian Loehle <christian.loehle@arm.com>
To: sched-ext@lists.linux.dev
Cc: linux-kernel@vger.kernel.org,
	linux-kselftest@vger.kernel.org,
	tj@kernel.org,
	void@manifault.com,
	arighi@nvidia.com,
	changwoo@igalia.com,
	mingo@redhat.com,
	peterz@infradead.org,
	shuah@kernel.org,
	dietmar.eggemann@arm.com,
	Christian Loehle <christian.loehle@arm.com>
Subject: [PATCH 2/2] sched_ext/selftests: Add SCX_KICK_WAIT cycle tests
Date: Mon, 16 Mar 2026 10:02:49 +0000
Message-Id: <20260316100249.1651641-3-christian.loehle@arm.com>
X-Mailer: git-send-email 2.34.1
In-Reply-To: <20260316100249.1651641-1-christian.loehle@arm.com>
References: <20260316100249.1651641-1-christian.loehle@arm.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable

Add wait_kick_cycle, a test stressing an SCX_KICK_WAIT cycle
between three CPUs by calling SCX_KICK_WAIT between them to
test if sched_ext prevents a deadlock.

Note: hangs on unfixed kernels

Signed-off-by: Christian Loehle <christian.loehle@arm.com>
---
 tools/testing/selftests/sched_ext/Makefile    |   1 +
 .../selftests/sched_ext/wait_kick_cycle.bpf.c |  70 ++++++
 .../selftests/sched_ext/wait_kick_cycle.c     | 223 ++++++++++++++++++
 3 files changed, 294 insertions(+)
 create mode 100644 tools/testing/selftests/sched_ext/wait_kick_cycle.bpf.c
 create mode 100644 tools/testing/selftests/sched_ext/wait_kick_cycle.c

diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/sel=
ftests/sched_ext/Makefile
index 006300ac6dff..0b5b527265f7 100644
--- a/tools/testing/selftests/sched_ext/Makefile
+++ b/tools/testing/selftests/sched_ext/Makefile
@@ -188,6 +188,7 @@ auto-test-targets :=3D			\
 	rt_stall			\
 	test_example			\
 	total_bw			\
+	wait_kick_cycle			\
=20
 testcase-targets :=3D $(addsuffix .o,$(addprefix $(SCXOBJ_DIR)/,$(auto-tes=
t-targets)))
=20
diff --git a/tools/testing/selftests/sched_ext/wait_kick_cycle.bpf.c b/tool=
s/testing/selftests/sched_ext/wait_kick_cycle.bpf.c
new file mode 100644
index 000000000000..c53cda86ec75
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/wait_kick_cycle.bpf.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Christian Loehle <christian.loehle@arm.com>
+ *
+ * Stress concurrent SCX_KICK_WAIT calls to validate forward progress.
+ *
+ * Three CPUs are designated from userspace. Every enqueue from one of the
+ * three CPUs kicks the next CPU in the ring with SCX_KICK_WAIT, creating a
+ * persistent A -> B -> C -> A wait cycle pressure.
+ */
+
+#include <scx/common.bpf.h>
+
+char _license[] SEC("license") =3D "GPL";
+
+const volatile s32 test_cpu_a;
+const volatile s32 test_cpu_b;
+const volatile s32 test_cpu_c;
+
+u64 nr_enqueues;
+u64 nr_wait_kicks;
+
+UEI_DEFINE(uei);
+
+static s32 target_cpu(s32 cpu)
+{
+	if (cpu =3D=3D test_cpu_a)
+		return test_cpu_b;
+	if (cpu =3D=3D test_cpu_b)
+		return test_cpu_c;
+	if (cpu =3D=3D test_cpu_c)
+		return test_cpu_a;
+	return -1;
+}
+
+void BPF_STRUCT_OPS(wait_kick_cycle_enqueue, struct task_struct *p, u64 en=
q_flags)
+{
+	s32 this_cpu =3D bpf_get_smp_processor_id();
+	s32 tgt;
+
+	__sync_fetch_and_add(&nr_enqueues, 1);
+
+	if (p->flags & PF_KTHREAD) {
+		scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_INF,
+				   enq_flags | SCX_ENQ_PREEMPT);
+		return;
+	}
+
+	scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
+
+	tgt =3D target_cpu(this_cpu);
+	if (tgt < 0 || tgt =3D=3D this_cpu)
+		return;
+
+	__sync_fetch_and_add(&nr_wait_kicks, 1);
+	scx_bpf_kick_cpu(tgt, SCX_KICK_WAIT);
+}
+
+void BPF_STRUCT_OPS(wait_kick_cycle_exit, struct scx_exit_info *ei)
+{
+	UEI_RECORD(uei, ei);
+}
+
+SEC(".struct_ops.link")
+struct sched_ext_ops wait_kick_cycle_ops =3D {
+	.enqueue		=3D wait_kick_cycle_enqueue,
+	.exit			=3D wait_kick_cycle_exit,
+	.name			=3D "wait_kick_cycle",
+	.timeout_ms		=3D 1000U,
+};
diff --git a/tools/testing/selftests/sched_ext/wait_kick_cycle.c b/tools/te=
sting/selftests/sched_ext/wait_kick_cycle.c
new file mode 100644
index 000000000000..3889e7a9a0a7
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/wait_kick_cycle.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Christian Loehle <christian.loehle@arm.com>
+ */
+#define _GNU_SOURCE
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#include <bpf/bpf.h>
+#include <errno.h>
+#include <pthread.h>
+#include <sched.h>
+#include <scx/common.h>
+#include <stdint.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "scx_test.h"
+#include "wait_kick_cycle.bpf.skel.h"
+
+/*
+ * Multiple workers per test CPU. Packing several runnable threads onto ea=
ch
+ * CPU causes frequent context switching and back-to-back enqueue() calls,=
 which
+ * maximizes the chance that all three test CPUs fire enqueue() concurrent=
ly
+ * and enter the SCX_KICK_WAIT cycle simultaneously.
+ */
+#define WORKERS_PER_CPU	4
+#define NR_TEST_CPUS	3
+#define NR_WORKERS	(NR_TEST_CPUS * WORKERS_PER_CPU)
+
+struct worker_ctx {
+	pthread_t tid;
+	int cpu;
+	volatile bool stop;
+	volatile __u64 iters;
+	bool started;
+};
+
+static int pick_test_cpus(int *cpu_a, int *cpu_b, int *cpu_c)
+{
+	cpu_set_t mask;
+	int cpus[4];
+	int nr =3D 0;
+	int cpu;
+
+	if (sched_getaffinity(0, sizeof(mask), &mask))
+		return -errno;
+
+	for (cpu =3D 0; cpu < CPU_SETSIZE && nr < ARRAY_SIZE(cpus); cpu++) {
+		if (!CPU_ISSET(cpu, &mask))
+			continue;
+		cpus[nr++] =3D cpu;
+	}
+
+	if (nr < 3)
+		return -EOPNOTSUPP;
+
+	/* Leave one CPU unused when possible so one CPU remains uncongested. */
+	if (nr >=3D 4) {
+		*cpu_a =3D cpus[1];
+		*cpu_b =3D cpus[2];
+		*cpu_c =3D cpus[3];
+	} else {
+		*cpu_a =3D cpus[0];
+		*cpu_b =3D cpus[1];
+		*cpu_c =3D cpus[2];
+	}
+	return 0;
+}
+
+static void *worker_fn(void *arg)
+{
+	struct worker_ctx *worker =3D arg;
+	cpu_set_t mask;
+
+	CPU_ZERO(&mask);
+	CPU_SET(worker->cpu, &mask);
+
+	if (sched_setaffinity(0, sizeof(mask), &mask))
+		return (void *)(uintptr_t)errno;
+
+	/*
+	 * Tight yield loop =E2=80=94 no sleep.  Keeping the CPU continuously busy
+	 * with rapid context switches ensures enqueue() fires at the highest
+	 * possible rate on each test CPU.
+	 */
+	while (!worker->stop) {
+		sched_yield();
+		worker->iters++;
+	}
+
+	return NULL;
+}
+
+static int join_worker(struct worker_ctx *worker)
+{
+	void *ret;
+	struct timespec ts;
+	int err;
+
+	if (!worker->started)
+		return 0;
+
+	if (clock_gettime(CLOCK_REALTIME, &ts))
+		return -errno;
+
+	ts.tv_sec +=3D 2;
+	err =3D pthread_timedjoin_np(worker->tid, &ret, &ts);
+	if (err =3D=3D ETIMEDOUT)
+		pthread_detach(worker->tid);
+	if (err)
+		return -err;
+
+	if ((uintptr_t)ret)
+		return -(int)(uintptr_t)ret;
+
+	return 0;
+}
+
+static enum scx_test_status setup(void **ctx)
+{
+	struct wait_kick_cycle *skel;
+
+	skel =3D wait_kick_cycle__open();
+	SCX_FAIL_IF(!skel, "Failed to open skel");
+	SCX_ENUM_INIT(skel);
+
+	*ctx =3D skel;
+	return SCX_TEST_PASS;
+}
+
+static enum scx_test_status run(void *ctx)
+{
+	struct wait_kick_cycle *skel =3D ctx;
+	struct worker_ctx workers[NR_WORKERS] =3D {};
+	struct bpf_link *link =3D NULL;
+	enum scx_test_status status =3D SCX_TEST_PASS;
+	int test_cpus[NR_TEST_CPUS] =3D { -1, -1, -1 };
+	int ret;
+	int i;
+
+	ret =3D pick_test_cpus(&test_cpus[0], &test_cpus[1], &test_cpus[2]);
+	if (ret =3D=3D -EOPNOTSUPP)
+		return SCX_TEST_SKIP;
+	if (ret) {
+		SCX_ERR("Failed to pick test cpus (%d)", ret);
+		return SCX_TEST_FAIL;
+	}
+
+	skel->rodata->test_cpu_a =3D test_cpus[0];
+	skel->rodata->test_cpu_b =3D test_cpus[1];
+	skel->rodata->test_cpu_c =3D test_cpus[2];
+
+	if (wait_kick_cycle__load(skel)) {
+		SCX_ERR("Failed to load skel");
+		return SCX_TEST_FAIL;
+	}
+
+	link =3D bpf_map__attach_struct_ops(skel->maps.wait_kick_cycle_ops);
+	if (!link) {
+		SCX_ERR("Failed to attach scheduler");
+		return SCX_TEST_FAIL;
+	}
+
+	/* WORKERS_PER_CPU threads per test CPU, all in tight yield loops. */
+	for (i =3D 0; i < NR_WORKERS; i++)
+		workers[i].cpu =3D test_cpus[i / WORKERS_PER_CPU];
+
+	for (i =3D 0; i < NR_WORKERS; i++) {
+		ret =3D pthread_create(&workers[i].tid, NULL, worker_fn, &workers[i]);
+		if (ret) {
+			SCX_ERR("Failed to create worker thread %d (%d)", i, ret);
+			status =3D SCX_TEST_FAIL;
+			goto out;
+		}
+		workers[i].started =3D true;
+	}
+
+	sleep(3);
+
+	if (skel->data->uei.kind !=3D EXIT_KIND(SCX_EXIT_NONE)) {
+		SCX_ERR("Scheduler exited unexpectedly (kind=3D%llu code=3D%lld)",
+			(unsigned long long)skel->data->uei.kind,
+			(long long)skel->data->uei.exit_code);
+		status =3D SCX_TEST_FAIL;
+	}
+
+out:
+	for (i =3D 0; i < NR_WORKERS; i++)
+		workers[i].stop =3D true;
+
+	for (i =3D 0; i < NR_WORKERS; i++) {
+		ret =3D join_worker(&workers[i]);
+		if (ret && status =3D=3D SCX_TEST_PASS) {
+			SCX_ERR("Failed to join worker thread %d (%d)", i, ret);
+			status =3D SCX_TEST_FAIL;
+		}
+	}
+
+	if (link)
+		bpf_link__destroy(link);
+
+	return status;
+}
+
+static void cleanup(void *ctx)
+{
+	struct wait_kick_cycle *skel =3D ctx;
+
+	wait_kick_cycle__destroy(skel);
+}
+
+struct scx_test wait_kick_cycle =3D {
+	.name =3D "wait_kick_cycle",
+	.description =3D "Verify SCX_KICK_WAIT forward progress under a 3-CPU wai=
t cycle",
+	.setup =3D setup,
+	.run =3D run,
+	.cleanup =3D cleanup,
+};
+REGISTER_SCX_TEST(&wait_kick_cycle)
--=20
2.34.1