[v3] sched: Restructure task_mm_cid_work for predictability

[PATCH v3 3/3] rseq/selftests: Add test for mm_cid compaction

Posted by Gabriele Monaco 1 year ago

A task in the kernel (task_mm_cid_work) runs somewhat periodically to
compact the mm_cid for each process, this test tries to validate that
it runs correctly and timely.

The test spawns 1 thread pinned to each CPU, then each thread, including
the main one, runs in short bursts for some time. During this period, the
mm_cids should be spanning all numbers between 0 and nproc.

At the end of this phase, a thread with high enough mm_cid (>= nproc/2)
is selected to be the new leader, all other threads terminate.

After some time, the only running thread should see 0 as mm_cid, if that
doesn't happen, the compaction mechanism didn't work and the test fails.

The test never fails if only 1 core is available, in which case, we
cannot test anything as the only available mm_cid is 0.

To: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
---
 tools/testing/selftests/rseq/.gitignore       |   1 +
 tools/testing/selftests/rseq/Makefile         |   2 +-
 .../selftests/rseq/mm_cid_compaction_test.c   | 190 ++++++++++++++++++
 3 files changed, 192 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/rseq/mm_cid_compaction_test.c

diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore
index 16496de5f6ce..2c89f97e4f73 100644
--- a/tools/testing/selftests/rseq/.gitignore
+++ b/tools/testing/selftests/rseq/.gitignore
@@ -3,6 +3,7 @@ basic_percpu_ops_test
 basic_percpu_ops_mm_cid_test
 basic_test
 basic_rseq_op_test
+mm_cid_compaction_test
 param_test
 param_test_benchmark
 param_test_compare_twice
diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
index 5a3432fceb58..ce1b38f46a35 100644
--- a/tools/testing/selftests/rseq/Makefile
+++ b/tools/testing/selftests/rseq/Makefile
@@ -16,7 +16,7 @@ OVERRIDE_TARGETS = 1
 
 TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test param_test \
 		param_test_benchmark param_test_compare_twice param_test_mm_cid \
-		param_test_mm_cid_benchmark param_test_mm_cid_compare_twice
+		param_test_mm_cid_benchmark param_test_mm_cid_compare_twice mm_cid_compaction_test
 
 TEST_GEN_PROGS_EXTENDED = librseq.so
 
diff --git a/tools/testing/selftests/rseq/mm_cid_compaction_test.c b/tools/testing/selftests/rseq/mm_cid_compaction_test.c
new file mode 100644
index 000000000000..e5557b38a4e9
--- /dev/null
+++ b/tools/testing/selftests/rseq/mm_cid_compaction_test.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: LGPL-2.1
+#define _GNU_SOURCE
+#include <assert.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+
+#include "../kselftest.h"
+#include "rseq.h"
+
+#define VERBOSE 0
+#define printf_verbose(fmt, ...)                    \
+	do {                                        \
+		if (VERBOSE)                        \
+			printf(fmt, ##__VA_ARGS__); \
+	} while (0)
+
+/* 0.5 s */
+#define RUNNER_PERIOD 500000
+/* Number of runs before we terminate or get the token */
+#define THREAD_RUNS 5
+
+/*
+ * Number of times we check that the mm_cid were compacted.
+ * Checks are repeated every RUNNER_PERIOD
+ */
+#define MM_CID_COMPACT_TIMEOUT 10
+
+struct thread_args {
+	int cpu;
+	int num_cpus;
+	pthread_mutex_t *token;
+	pthread_t *tinfo;
+	struct thread_args *args_head;
+};
+
+static void *thread_runner(void *arg)
+{
+	struct thread_args *args = arg;
+	int i, ret, curr_mm_cid;
+	cpu_set_t affinity;
+
+	CPU_ZERO(&affinity);
+	CPU_SET(args->cpu, &affinity);
+	ret = pthread_setaffinity_np(pthread_self(), sizeof(affinity), &affinity);
+	if (ret) {
+		fprintf(stderr,
+			"Error: failed to set affinity to thread %d (%d): %s\n",
+			args->cpu, ret, strerror(ret));
+		assert(ret == 0);
+	}
+	for (i = 0; i < THREAD_RUNS; i++)
+		usleep(RUNNER_PERIOD);
+	curr_mm_cid = rseq_current_mm_cid();
+	/*
+	 * We select one thread with high enough mm_cid to be the new leader
+	 * all other threads (including the main thread) will terminate
+	 * After some time, the mm_cid of the only remaining thread should
+	 * converge to 0, if not, the test fails
+	 */
+	if (curr_mm_cid >= args->num_cpus / 2 &&
+	    !pthread_mutex_trylock(args->token)) {
+		printf_verbose("cpu%d has %d and will be the new leader\n",
+			       sched_getcpu(), curr_mm_cid);
+		for (i = 0; i < args->num_cpus; i++) {
+			if (args->tinfo[i] == pthread_self())
+				continue;
+			ret = pthread_join(args->tinfo[i], NULL);
+			if (ret) {
+				fprintf(stderr,
+					"Error: failed to join thread %d (%d): %s\n",
+					i, ret, strerror(ret));
+				assert(ret == 0);
+			}
+		}
+		free(args->tinfo);
+		free(args->token);
+		free(args->args_head);
+
+		for (i = 0; i < MM_CID_COMPACT_TIMEOUT; i++) {
+			curr_mm_cid = rseq_current_mm_cid();
+			printf_verbose("run %d: mm_cid %d on cpu%d\n", i,
+				       curr_mm_cid, sched_getcpu());
+			if (curr_mm_cid == 0) {
+				printf_verbose(
+					"mm_cids successfully compacted, exiting\n");
+				pthread_exit(NULL);
+			}
+			usleep(RUNNER_PERIOD);
+		}
+		assert(false);
+	}
+	printf_verbose("cpu%d has %d and is going to terminate\n",
+		       sched_getcpu(), curr_mm_cid);
+	pthread_exit(NULL);
+}
+
+void test_mm_cid_compaction(void)
+{
+	cpu_set_t affinity;
+	int i, j, ret, num_threads;
+	pthread_t *tinfo;
+	pthread_mutex_t *token;
+	struct thread_args *args;
+
+	sched_getaffinity(0, sizeof(affinity), &affinity);
+	num_threads = CPU_COUNT(&affinity);
+	tinfo = calloc(num_threads, sizeof(*tinfo));
+	if (!tinfo) {
+		fprintf(stderr, "Error: failed to allocate tinfo(%d): %s\n",
+			errno, strerror(errno));
+		assert(ret == 0);
+	}
+	args = calloc(num_threads, sizeof(*args));
+	if (!args) {
+		fprintf(stderr, "Error: failed to allocate args(%d): %s\n",
+			errno, strerror(errno));
+		assert(ret == 0);
+	}
+	token = calloc(num_threads, sizeof(*token));
+	if (!token) {
+		fprintf(stderr, "Error: failed to allocate token(%d): %s\n",
+			errno, strerror(errno));
+		assert(ret == 0);
+	}
+	if (num_threads == 1) {
+		printf_verbose(
+			"Running on a single cpu, cannot test anything\n");
+		return;
+	}
+	pthread_mutex_init(token, NULL);
+	/* The main thread runs on CPU0 */
+	for (i = 0, j = 0; i < CPU_SETSIZE && j < num_threads; i++) {
+		if (CPU_ISSET(i, &affinity)) {
+			args[j].num_cpus = num_threads;
+			args[j].tinfo = tinfo;
+			args[j].token = token;
+			args[j].cpu = i;
+			args[j].args_head = args;
+			if (!j) {
+				/* The first thread is the main one */
+				tinfo[0] = pthread_self();
+				++j;
+				continue;
+			}
+			ret = pthread_create(&tinfo[j], NULL, thread_runner,
+					     &args[j]);
+			if (ret) {
+				fprintf(stderr,
+					"Error: failed to create thread(%d): %s\n",
+					ret, strerror(ret));
+				assert(ret == 0);
+			}
+			++j;
+		}
+	}
+	printf_verbose("Started %d threads\n", num_threads);
+
+	/* Also main thread will terminate if it is not selected as leader */
+	thread_runner(&args[0]);
+}
+
+int main(int argc, char **argv)
+{
+	if (rseq_register_current_thread()) {
+		fprintf(stderr,
+			"Error: rseq_register_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		goto error;
+	}
+	if (!rseq_mm_cid_available()) {
+		fprintf(stderr, "Error: rseq_mm_cid unavailable\n");
+		goto error;
+	}
+	test_mm_cid_compaction();
+	if (rseq_unregister_current_thread()) {
+		fprintf(stderr,
+			"Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		goto error;
+	}
+	return 0;
+
+error:
+	return -1;
+}
-- 
2.47.1

Re: [PATCH v3 3/3] rseq/selftests: Add test for mm_cid compaction

Posted by Mathieu Desnoyers 11 months, 4 weeks ago

On 2024-12-16 08:09, Gabriele Monaco wrote:
> A task in the kernel (task_mm_cid_work) runs somewhat periodically to
> compact the mm_cid for each process, this test tries to validate that
> it runs correctly and timely.
> 
> The test spawns 1 thread pinned to each CPU, then each thread, including
> the main one, runs in short bursts for some time. During this period, the
> mm_cids should be spanning all numbers between 0 and nproc.
> 
> At the end of this phase, a thread with high enough mm_cid (>= nproc/2)
> is selected to be the new leader, all other threads terminate.
> 
> After some time, the only running thread should see 0 as mm_cid, if that
> doesn't happen, the compaction mechanism didn't work and the test fails.
> 
> The test never fails if only 1 core is available, in which case, we
> cannot test anything as the only available mm_cid is 0.
> 
> To: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> Cc: Shuah Khan <shuah@kernel.org>
> Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
> ---
>   tools/testing/selftests/rseq/.gitignore       |   1 +
>   tools/testing/selftests/rseq/Makefile         |   2 +-
>   .../selftests/rseq/mm_cid_compaction_test.c   | 190 ++++++++++++++++++
>   3 files changed, 192 insertions(+), 1 deletion(-)
>   create mode 100644 tools/testing/selftests/rseq/mm_cid_compaction_test.c
> 
> diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore
> index 16496de5f6ce..2c89f97e4f73 100644
> --- a/tools/testing/selftests/rseq/.gitignore
> +++ b/tools/testing/selftests/rseq/.gitignore
> @@ -3,6 +3,7 @@ basic_percpu_ops_test
>   basic_percpu_ops_mm_cid_test
>   basic_test
>   basic_rseq_op_test
> +mm_cid_compaction_test
>   param_test
>   param_test_benchmark
>   param_test_compare_twice
> diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
> index 5a3432fceb58..ce1b38f46a35 100644
> --- a/tools/testing/selftests/rseq/Makefile
> +++ b/tools/testing/selftests/rseq/Makefile
> @@ -16,7 +16,7 @@ OVERRIDE_TARGETS = 1
>   
>   TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test param_test \
>   		param_test_benchmark param_test_compare_twice param_test_mm_cid \
> -		param_test_mm_cid_benchmark param_test_mm_cid_compare_twice
> +		param_test_mm_cid_benchmark param_test_mm_cid_compare_twice mm_cid_compaction_test
>   
>   TEST_GEN_PROGS_EXTENDED = librseq.so
>   
> diff --git a/tools/testing/selftests/rseq/mm_cid_compaction_test.c b/tools/testing/selftests/rseq/mm_cid_compaction_test.c
> new file mode 100644
> index 000000000000..e5557b38a4e9
> --- /dev/null
> +++ b/tools/testing/selftests/rseq/mm_cid_compaction_test.c
> @@ -0,0 +1,190 @@
> +// SPDX-License-Identifier: LGPL-2.1
> +#define _GNU_SOURCE
> +#include <assert.h>
> +#include <pthread.h>
> +#include <sched.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <stddef.h>
> +
> +#include "../kselftest.h"
> +#include "rseq.h"
> +
> +#define VERBOSE 0
> +#define printf_verbose(fmt, ...)                    \
> +	do {                                        \
> +		if (VERBOSE)                        \
> +			printf(fmt, ##__VA_ARGS__); \
> +	} while (0)
> +
> +/* 0.5 s */
> +#define RUNNER_PERIOD 500000
> +/* Number of runs before we terminate or get the token */
> +#define THREAD_RUNS 5
> +
> +/*
> + * Number of times we check that the mm_cid were compacted.
> + * Checks are repeated every RUNNER_PERIOD

Minor style issue: missing period.

> + */
> +#define MM_CID_COMPACT_TIMEOUT 10
> +
> +struct thread_args {
> +	int cpu;
> +	int num_cpus;
> +	pthread_mutex_t *token;
> +	pthread_t *tinfo;
> +	struct thread_args *args_head;
> +};
> +
> +static void *thread_runner(void *arg)
> +{
> +	struct thread_args *args = arg;
> +	int i, ret, curr_mm_cid;
> +	cpu_set_t affinity;
> +
> +	CPU_ZERO(&affinity);
> +	CPU_SET(args->cpu, &affinity);
> +	ret = pthread_setaffinity_np(pthread_self(), sizeof(affinity), &affinity);
> +	if (ret) {
> +		fprintf(stderr,
> +			"Error: failed to set affinity to thread %d (%d): %s\n",
> +			args->cpu, ret, strerror(ret));
> +		assert(ret == 0);
> +	}
> +	for (i = 0; i < THREAD_RUNS; i++)
> +		usleep(RUNNER_PERIOD);
> +	curr_mm_cid = rseq_current_mm_cid();
> +	/*
> +	 * We select one thread with high enough mm_cid to be the new leader
> +	 * all other threads (including the main thread) will terminate

^ missing period.

> +	 * After some time, the mm_cid of the only remaining thread should
> +	 * converge to 0, if not, the test fails

^ missing period.

> +	 */
> +	if (curr_mm_cid >= args->num_cpus / 2 &&
> +	    !pthread_mutex_trylock(args->token)) {
> +		printf_verbose("cpu%d has %d and will be the new leader\n",

has mm_cid=%d  ?

> +			       sched_getcpu(), curr_mm_cid);
> +		for (i = 0; i < args->num_cpus; i++) {
> +			if (args->tinfo[i] == pthread_self())
> +				continue;
> +			ret = pthread_join(args->tinfo[i], NULL);
> +			if (ret) {
> +				fprintf(stderr,
> +					"Error: failed to join thread %d (%d): %s\n",
> +					i, ret, strerror(ret));
> +				assert(ret == 0);
> +			}
> +		}
> +		free(args->tinfo);
> +		free(args->token);
> +		free(args->args_head);
> +
> +		for (i = 0; i < MM_CID_COMPACT_TIMEOUT; i++) {
> +			curr_mm_cid = rseq_current_mm_cid();
> +			printf_verbose("run %d: mm_cid %d on cpu%d\n", i,

mm_cid=%d (if we want consistent output)

> +				       curr_mm_cid, sched_getcpu());
> +			if (curr_mm_cid == 0) {
> +				printf_verbose(
> +					"mm_cids successfully compacted, exiting\n");
> +				pthread_exit(NULL);
> +			}
> +			usleep(RUNNER_PERIOD);
> +		}
> +		assert(false);

I suspect we'd want an explicit error message here
with an abort() rather than an assertion which can be
compiled-out with -DNDEBUG.

> +	}
> +	printf_verbose("cpu%d has %d and is going to terminate\n",
> +		       sched_getcpu(), curr_mm_cid);
> +	pthread_exit(NULL);
> +}
> +
> +void test_mm_cid_compaction(void)

This function should return its error to the caller
rather than assert.

> +{
> +	cpu_set_t affinity;
> +	int i, j, ret, num_threads;
> +	pthread_t *tinfo;
> +	pthread_mutex_t *token;
> +	struct thread_args *args;
> +
> +	sched_getaffinity(0, sizeof(affinity), &affinity);
> +	num_threads = CPU_COUNT(&affinity);
> +	tinfo = calloc(num_threads, sizeof(*tinfo));
> +	if (!tinfo) {
> +		fprintf(stderr, "Error: failed to allocate tinfo(%d): %s\n",
> +			errno, strerror(errno));
> +		assert(ret == 0);
> +	}
> +	args = calloc(num_threads, sizeof(*args));
> +	if (!args) {
> +		fprintf(stderr, "Error: failed to allocate args(%d): %s\n",
> +			errno, strerror(errno));
> +		assert(ret == 0);
> +	}
> +	token = calloc(num_threads, sizeof(*token));
> +	if (!token) {
> +		fprintf(stderr, "Error: failed to allocate token(%d): %s\n",
> +			errno, strerror(errno));
> +		assert(ret == 0);
> +	}
> +	if (num_threads == 1) {
> +		printf_verbose(
> +			"Running on a single cpu, cannot test anything\n");
> +		return;

This should return a value telling the caller that
the test is skipped (not an error per se).

> +	}
> +	pthread_mutex_init(token, NULL);
> +	/* The main thread runs on CPU0 */
> +	for (i = 0, j = 0; i < CPU_SETSIZE && j < num_threads; i++) {
> +		if (CPU_ISSET(i, &affinity)) {

We can save an indent level here by moving this
in the for () condition:

	for (i = 0, j = 0; i < CPU_SETSIZE &&
	     CPU_ISSET(i, &affinity) && j < num_threads; i++) {

> +			args[j].num_cpus = num_threads;
> +			args[j].tinfo = tinfo;
> +			args[j].token = token;
> +			args[j].cpu = i;
> +			args[j].args_head = args;
> +			if (!j) {
> +				/* The first thread is the main one */
> +				tinfo[0] = pthread_self();
> +				++j;
> +				continue;
> +			}
> +			ret = pthread_create(&tinfo[j], NULL, thread_runner,
> +					     &args[j]);
> +			if (ret) {
> +				fprintf(stderr,
> +					"Error: failed to create thread(%d): %s\n",
> +					ret, strerror(ret));
> +				assert(ret == 0);
> +			}
> +			++j;
> +		}
> +	}
> +	printf_verbose("Started %d threads\n", num_threads);

I think there is a missing rendez-vous point here. Assuming a
sufficiently long unexpected delay (think of a guest VM VCPU
preempted for a long time), the new leader can start poking
into args and other thread's info while we are still creating
threads here.

Thanks,

Mathieu

> +
> +	/* Also main thread will terminate if it is not selected as leader */
> +	thread_runner(&args[0]);
> +}
> +
> +int main(int argc, char **argv)
> +{
> +	if (rseq_register_current_thread()) {
> +		fprintf(stderr,
> +			"Error: rseq_register_current_thread(...) failed(%d): %s\n",
> +			errno, strerror(errno));
> +		goto error;
> +	}
> +	if (!rseq_mm_cid_available()) {
> +		fprintf(stderr, "Error: rseq_mm_cid unavailable\n");
> +		goto error;
> +	}
> +	test_mm_cid_compaction();
> +	if (rseq_unregister_current_thread()) {
> +		fprintf(stderr,
> +			"Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
> +			errno, strerror(errno));
> +		goto error;
> +	}
> +	return 0;
> +
> +error:
> +	return -1;
> +}

-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com

Re: [PATCH v3 3/3] rseq/selftests: Add test for mm_cid compaction

Posted by Gabriele Monaco 11 months, 3 weeks ago

On Tue, 2024-12-24 at 11:20 -0500, Mathieu Desnoyers wrote:
> On 2024-12-16 08:09, Gabriele Monaco wrote:
> > A task in the kernel (task_mm_cid_work) runs somewhat periodically
> > to
> > compact the mm_cid for each process, this test tries to validate
> > that
> > it runs correctly and timely.
> > 
> > + if (curr_mm_cid == 0) {
> > + printf_verbose(
> > + "mm_cids successfully compacted, exiting\n");
> > + pthread_exit(NULL);
> > + }
> > + usleep(RUNNER_PERIOD);
> > + }
> > + assert(false);
> 
> I suspect we'd want an explicit error message here
> with an abort() rather than an assertion which can be
> compiled-out with -DNDEBUG.
> 
> > + }
> > + printf_verbose("cpu%d has %d and is going to terminate\n",
> > +        sched_getcpu(), curr_mm_cid);
> > + pthread_exit(NULL);
> > +}
> > +
> > +void test_mm_cid_compaction(void)
> 
> This function should return its error to the caller
> rather than assert.
> 
> > +{
> > + cpu_set_t affinity;
> > + int i, j, ret, num_threads;
> > + pthread_t *tinfo;
> > + pthread_mutex_t *token;
> > + struct thread_args *args;
> > +
> > + sched_getaffinity(0, sizeof(affinity), &affinity);
> > + num_threads = CPU_COUNT(&affinity);
> > + tinfo = calloc(num_threads, sizeof(*tinfo));
> > + if (!tinfo) {
> > + fprintf(stderr, "Error: failed to allocate tinfo(%d): %s\n",
> > + errno, strerror(errno));
> > + assert(ret == 0);
> > + }
> > + args = calloc(num_threads, sizeof(*args));
> > + if (!args) {
> > + fprintf(stderr, "Error: failed to allocate args(%d): %s\n",
> > + errno, strerror(errno));
> > + assert(ret == 0);
> > + }
> > + token = calloc(num_threads, sizeof(*token));
> > + if (!token) {
> > + fprintf(stderr, "Error: failed to allocate token(%d): %s\n",
> > + errno, strerror(errno));
> > + assert(ret == 0);
> > + }
> > + if (num_threads == 1) {
> > + printf_verbose(
> > + "Running on a single cpu, cannot test anything\n");
> > + return;
> 
> This should return a value telling the caller that
> the test is skipped (not an error per se).
> 

Thanks for the review!
I'm not sure how to properly handle these, but it seems to me the
cleanest way is to use ksft_* functions to report failures and skipped
tests. Other tests in rseq don't use the library but it doesn't seem a
big deal if just one test is using it, for now.

It gets a bit complicated to return values since we are exiting from
the main thread (sure we could join the remaining /winning/ thread but
we would end up with 2 threads running). The ksft_* functions solve
this quite nicely using exit codes, though.

> > + }
> > + pthread_mutex_init(token, NULL);
> > + /* The main thread runs on CPU0 */
> > + for (i = 0, j = 0; i < CPU_SETSIZE && j < num_threads; i++) {
> > + if (CPU_ISSET(i, &affinity)) {
> 
> We can save an indent level here by moving this
> in the for () condition:
> 
>  for (i = 0, j = 0; i < CPU_SETSIZE &&
>       CPU_ISSET(i, &affinity) && j < num_threads; i++) {
> 

Well, if we assume the affinity mask is contiguous, which is likely but
not always true. A typical setup with isolated CPUs have one
housekeeping core per NUMA node, let's say 0,32,64,96 out of 127 cpus,
the test would run only on cpu 0 in that case.

> > + args[j].num_cpus = num_threads;
> > + args[j].tinfo = tinfo;
> > + args[j].token = token;
> > + args[j].cpu = i;
> > + args[j].args_head = args;
> > + if (!j) {
> > + /* The first thread is the main one */
> > + tinfo[0] = pthread_self();
> > + ++j;
> > + continue;
> > + }
> > + ret = pthread_create(&tinfo[j], NULL, thread_runner,
> > +      &args[j]);
> > + if (ret) {
> > + fprintf(stderr,
> > + "Error: failed to create thread(%d): %s\n",
> > + ret, strerror(ret));
> > + assert(ret == 0);
> > + }
> > + ++j;
> > + }
> > + }
> > + printf_verbose("Started %d threads\n", num_threads);
> 
> I think there is a missing rendez-vous point here. Assuming a
> sufficiently long unexpected delay (think of a guest VM VCPU
> preempted for a long time), the new leader can start poking
> into args and other thread's info while we are still creating
> threads here.
> 

Yeah, good point, I'm assuming all threads are ready by the time we are
done waiting but that's not bulletproof. I'll add a barrier.

Thanks again for the comments, I'll prepare a V4.
Gabriele

Re: [PATCH v3 3/3] rseq/selftests: Add test for mm_cid compaction

Posted by Mathieu Desnoyers 11 months, 3 weeks ago

On 2024-12-26 04:04, Gabriele Monaco wrote:
> 
> On Tue, 2024-12-24 at 11:20 -0500, Mathieu Desnoyers wrote:
>> On 2024-12-16 08:09, Gabriele Monaco wrote:
>>> A task in the kernel (task_mm_cid_work) runs somewhat periodically
>>> to
>>> compact the mm_cid for each process, this test tries to validate
>>> that
>>> it runs correctly and timely.
>>>
>>> + if (curr_mm_cid == 0) {
>>> + printf_verbose(
>>> + "mm_cids successfully compacted, exiting\n");
>>> + pthread_exit(NULL);
>>> + }
>>> + usleep(RUNNER_PERIOD);
>>> + }
>>> + assert(false);
>>
>> I suspect we'd want an explicit error message here
>> with an abort() rather than an assertion which can be
>> compiled-out with -DNDEBUG.
>>
>>> + }
>>> + printf_verbose("cpu%d has %d and is going to terminate\n",
>>> +        sched_getcpu(), curr_mm_cid);
>>> + pthread_exit(NULL);
>>> +}
>>> +
>>> +void test_mm_cid_compaction(void)
>>
>> This function should return its error to the caller
>> rather than assert.
>>
>>> +{
>>> + cpu_set_t affinity;
>>> + int i, j, ret, num_threads;
>>> + pthread_t *tinfo;
>>> + pthread_mutex_t *token;
>>> + struct thread_args *args;
>>> +
>>> + sched_getaffinity(0, sizeof(affinity), &affinity);
>>> + num_threads = CPU_COUNT(&affinity);
>>> + tinfo = calloc(num_threads, sizeof(*tinfo));
>>> + if (!tinfo) {
>>> + fprintf(stderr, "Error: failed to allocate tinfo(%d): %s\n",
>>> + errno, strerror(errno));
>>> + assert(ret == 0);
>>> + }
>>> + args = calloc(num_threads, sizeof(*args));
>>> + if (!args) {
>>> + fprintf(stderr, "Error: failed to allocate args(%d): %s\n",
>>> + errno, strerror(errno));
>>> + assert(ret == 0);
>>> + }
>>> + token = calloc(num_threads, sizeof(*token));
>>> + if (!token) {
>>> + fprintf(stderr, "Error: failed to allocate token(%d): %s\n",
>>> + errno, strerror(errno));
>>> + assert(ret == 0);
>>> + }
>>> + if (num_threads == 1) {
>>> + printf_verbose(
>>> + "Running on a single cpu, cannot test anything\n");
>>> + return;
>>
>> This should return a value telling the caller that
>> the test is skipped (not an error per se).
>>
> 
> Thanks for the review!
> I'm not sure how to properly handle these, but it seems to me the
> cleanest way is to use ksft_* functions to report failures and skipped
> tests. Other tests in rseq don't use the library but it doesn't seem a
> big deal if just one test is using it, for now.

For the moment, we could do like the following test which
does a skip:

void test_membarrier(void)
{
         fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
                         "Skipping membarrier test.\n");
}

We can revamp the rest of the tests to use ksft in the future.

Currently everything is driven from run_param_test.sh, and it would
require significant rework to move to ksft.

> 
> It gets a bit complicated to return values since we are exiting from
> the main thread (sure we could join the remaining /winning/ thread but
> we would end up with 2 threads running). The ksft_* functions solve
> this quite nicely using exit codes, though.

Then thread_running should be marked with the noreturn attribute.

test_mm_cid_compaction can indeed return if it fails in the preparation
stages, just not when calling thread_running.

So we want test_mm_cid_compaction to return errors so main can handle
them, and we may want to move the call to thread_running directly
into main after success of test_mm_cid_compaction preparation step.

It's not like we can append any further test after this noreturn call.

> 
>>> + }
>>> + pthread_mutex_init(token, NULL);
>>> + /* The main thread runs on CPU0 */
>>> + for (i = 0, j = 0; i < CPU_SETSIZE && j < num_threads; i++) {
>>> + if (CPU_ISSET(i, &affinity)) {
>>
>> We can save an indent level here by moving this
>> in the for () condition:
>>
>>   for (i = 0, j = 0; i < CPU_SETSIZE &&
>>        CPU_ISSET(i, &affinity) && j < num_threads; i++) {
>>
> 
> Well, if we assume the affinity mask is contiguous, which is likely but
> not always true. A typical setup with isolated CPUs have one
> housekeeping core per NUMA node, let's say 0,32,64,96 out of 127 cpus,
> the test would run only on cpu 0 in that case.

Whooops, yes, you are right. Scratch that. It would become
an end loop condition, which is not what we want here.

then to remove indent level we'd want:

   if (!CPU_ISSET(i, &affinity))
       continue;

in the for loop.

> 
>>> + args[j].num_cpus = num_threads;
>>> + args[j].tinfo = tinfo;
>>> + args[j].token = token;
>>> + args[j].cpu = i;
>>> + args[j].args_head = args;
>>> + if (!j) {
>>> + /* The first thread is the main one */
>>> + tinfo[0] = pthread_self();
>>> + ++j;
>>> + continue;
>>> + }
>>> + ret = pthread_create(&tinfo[j], NULL, thread_runner,
>>> +      &args[j]);
>>> + if (ret) {
>>> + fprintf(stderr,
>>> + "Error: failed to create thread(%d): %s\n",
>>> + ret, strerror(ret));
>>> + assert(ret == 0);
>>> + }
>>> + ++j;
>>> + }
>>> + }
>>> + printf_verbose("Started %d threads\n", num_threads);
>>
>> I think there is a missing rendez-vous point here. Assuming a
>> sufficiently long unexpected delay (think of a guest VM VCPU
>> preempted for a long time), the new leader can start poking
>> into args and other thread's info while we are still creating
>> threads here.
>>
> 
> Yeah, good point, I'm assuming all threads are ready by the time we are
> done waiting but that's not bulletproof. I'll add a barrier.
> 
> Thanks again for the comments, I'll prepare a V4.

Thanks!

Mathieu

> Gabriele
> 

-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com

Re: [PATCH v3 3/3] rseq/selftests: Add test for mm_cid compaction

Posted by Gabriele Monaco 11 months, 3 weeks ago

On Thu, 2024-12-26 at 09:17 -0500, Mathieu Desnoyers wrote:
> On 2024-12-26 04:04, Gabriele Monaco wrote:
> > 
> > On Tue, 2024-12-24 at 11:20 -0500, Mathieu Desnoyers wrote:
> > > On 2024-12-16 08:09, Gabriele Monaco wrote:
> > > > + if (curr_mm_cid == 0) {
> > > > + printf_verbose(
> > > > + "mm_cids successfully compacted, exiting\n");
> > > > + pthread_exit(NULL);
> > > > + }
> > > > + usleep(RUNNER_PERIOD);
> > > > + }
> > > > + assert(false);
> > > 
> > > I suspect we'd want an explicit error message here
> > > with an abort() rather than an assertion which can be
> > > compiled-out with -DNDEBUG.
> > > 
> > > > + }
> > > > + printf_verbose("cpu%d has %d and is going to terminate\n",
> > > > +        sched_getcpu(), curr_mm_cid);
> > > > + pthread_exit(NULL);
> > > > +}
> > > > +
> > > > +void test_mm_cid_compaction(void)
> > > 
> > > This function should return its error to the caller
> > > rather than assert.
> > > 
> > > > + if (num_threads == 1) {
> > > > + printf_verbose(
> > > > + "Running on a single cpu, cannot test anything\n");
> > > > + return;
> > > 
> > > This should return a value telling the caller that
> > > the test is skipped (not an error per se).
> > > 
> > 
> > Thanks for the review!
> > I'm not sure how to properly handle these, but it seems to me the
> > cleanest way is to use ksft_* functions to report failures and
> > skipped
> > tests. Other tests in rseq don't use the library but it doesn't
> > seem a
> > big deal if just one test is using it, for now.
> 
> For the moment, we could do like the following test which
> does a skip:
> 
> void test_membarrier(void)
> {
>          fprintf(stderr, "rseq_offset_deref_addv is not implemented
> on this architecture. "
>                          "Skipping membarrier test.\n");
> }
> 
> We can revamp the rest of the tests to use ksft in the future.
> 
> Currently everything is driven from run_param_test.sh, and it would
> require significant rework to move to ksft.
> 
> > 
> > It gets a bit complicated to return values since we are exiting
> > from
> > the main thread (sure we could join the remaining /winning/ thread
> > but
> > we would end up with 2 threads running). The ksft_* functions solve
> > this quite nicely using exit codes, though.
> 
> Then thread_running should be marked with the noreturn attribute.
> 
> test_mm_cid_compaction can indeed return if it fails in the
> preparation
> stages, just not when calling thread_running.
> 
> So we want test_mm_cid_compaction to return errors so main can handle
> them, and we may want to move the call to thread_running directly
> into main after success of test_mm_cid_compaction preparation step.
> 
> It's not like we can append any further test after this noreturn
> call.
> 

Alright, I'm a bit confused now. I see how in the rseq folder there are
tests in param_test (run by a shell script) and tests on their own c
file that are run just as binary.
For simplicity I added this new test in a separate file and I tried to
mirror what the other tests are doing: all of them are calling one or
more void functions from main (test_*) and some minimal initialisation
(register rseq, which I believe I may not even need, since I'm already
not doing it for all threads).

Now, I can have my test_* function return a value and handle it from
main e.g. aborting if the function returns some value, but that would
require me to define some return values (e.g. abort, fail, perhaps
skip) in use only for this test.
It felt it more consistent to just stick to the void function and
abort/exit directly from there (or return in case of skip).
All other tests do use abort for errors and assert for the pass/fail
condition, but since in  my case nothing else can execute after, I'd
say I can simply use exit(0)/exit(1) from the winning thread.

What do you think?

Thanks,
Gabriele

[PATCH v3 1/3] sched: Compact RSEQ concurrency IDs with reduced threads and affinity
[PATCH v3 2/3] sched: Move task_mm_cid_work to mm delayed work
[PATCH v3 3/3] rseq/selftests: Add test for mm_cid compaction