drm/sched: Add new KUnit test suite for concurrent job submission

[RFC PATCH] drm/sched: Add new KUnit test suite for concurrent job submission

Posted by Marco Pagani 2 weeks, 3 days ago

Add a new test suite to simulate concurrent job submissions from userspace.
The suite includes a basic test case where each worker submits a single
job, and a more advanced case involving the submission of multiple jobs.

Signed-off-by: Marco Pagani <marpagan@redhat.com>
---
 drivers/gpu/drm/scheduler/tests/tests_basic.c | 175 ++++++++++++++++++
 1 file changed, 175 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c
index 82a41a456b0a..7c25bcbbe7c9 100644
--- a/drivers/gpu/drm/scheduler/tests/tests_basic.c
+++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2025 Valve Corporation */
 
 #include <linux/delay.h>
+#include <linux/completion.h>
 
 #include "sched_tests.h"
 
@@ -235,6 +236,179 @@ static void drm_sched_basic_cancel(struct kunit *test)
 	KUNIT_ASSERT_EQ(test, job->hw_fence.error, -ECANCELED);
 }
 
+struct sched_concurrent_test_context {
+	struct drm_mock_scheduler *sched;
+	struct workqueue_struct *sub_wq;
+	struct completion wait_go;
+};
+
+KUNIT_DEFINE_ACTION_WRAPPER(destroy_workqueue_wrap, destroy_workqueue,
+			    struct workqueue_struct *);
+
+KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_fini_wrap, drm_mock_sched_fini,
+			    struct drm_mock_scheduler *);
+
+KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_entity_free_wrap, drm_mock_sched_entity_free,
+			    struct drm_mock_sched_entity *);
+
+static int drm_sched_concurrent_init(struct kunit *test)
+{
+	struct sched_concurrent_test_context *ctx;
+	int ret;
+
+	ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
+
+	init_completion(&ctx->wait_go);
+
+	ctx->sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT);
+
+	ret = kunit_add_action_or_reset(test, drm_mock_sched_fini_wrap, ctx->sched);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	/* Use an unbounded workqueue to maximize job submission concurrency */
+	ctx->sub_wq = alloc_workqueue("drm-sched-submitters-wq", WQ_UNBOUND,
+				      WQ_UNBOUND_MAX_ACTIVE);
+	KUNIT_ASSERT_NOT_NULL(test, ctx->sub_wq);
+
+	ret = kunit_add_action_or_reset(test, destroy_workqueue_wrap, ctx->sub_wq);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	test->priv = ctx;
+
+	return 0;
+}
+
+struct drm_sched_concurrent_params {
+	const char *description;
+	unsigned int job_base_us;
+	unsigned int num_jobs;
+	unsigned int num_subs;
+};
+
+static const struct drm_sched_concurrent_params drm_sched_concurrent_cases[] = {
+	{
+		.description = "Concurrently submit a single job in a single entity",
+		.job_base_us = 1000,
+		.num_jobs = 1,
+		.num_subs = 32,
+	},
+	{
+		.description = "Concurrently submit multiple jobs in a single entity",
+		.job_base_us = 1000,
+		.num_jobs = 10,
+		.num_subs = 64,
+	},
+};
+
+static void
+drm_sched_concurrent_desc(const struct drm_sched_concurrent_params *params, char *desc)
+{
+	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
+}
+
+KUNIT_ARRAY_PARAM(drm_sched_concurrent, drm_sched_concurrent_cases, drm_sched_concurrent_desc);
+
+struct submitter_data {
+	struct work_struct work;
+	struct sched_concurrent_test_context *ctx;
+	struct drm_mock_sched_entity *entity;
+	struct drm_mock_sched_job **jobs;
+	struct kunit *test;
+	unsigned int id;
+	bool timedout;
+};
+
+static void drm_sched_submitter_worker(struct work_struct *work)
+{
+	const struct drm_sched_concurrent_params *params;
+	struct sched_concurrent_test_context *ctx;
+	struct submitter_data *sub_data;
+	unsigned int i, duration_us;
+	unsigned long timeout_jiffies;
+	bool done;
+
+	sub_data = container_of(work, struct submitter_data, work);
+	ctx = sub_data->ctx;
+	params = sub_data->test->param_value;
+
+	wait_for_completion(&ctx->wait_go);
+
+	for (i = 0; i < params->num_jobs; i++) {
+		duration_us = params->job_base_us + (sub_data->id * 10);
+		drm_mock_sched_job_set_duration_us(sub_data->jobs[i], duration_us);
+		drm_mock_sched_job_submit(sub_data->jobs[i]);
+	}
+
+	timeout_jiffies = usecs_to_jiffies(params->job_base_us * params->num_subs *
+					   params->num_jobs * 10);
+	for (i = 0; i < params->num_jobs; i++) {
+		done = drm_mock_sched_job_wait_finished(sub_data->jobs[i],
+							timeout_jiffies);
+		if (!done)
+			sub_data->timedout = true;
+	}
+}
+
+static void drm_sched_concurrent_submit_test(struct kunit *test)
+{
+	struct sched_concurrent_test_context *ctx = test->priv;
+	const struct drm_sched_concurrent_params *params = test->param_value;
+	struct submitter_data *subs_data;
+	unsigned int i, j;
+	int ret;
+
+	subs_data = kunit_kcalloc(test, params->num_subs, sizeof(*subs_data),
+				  GFP_KERNEL);
+	KUNIT_ASSERT_NOT_NULL(test, subs_data);
+
+	/*
+	 * Pre-allocate entities and jobs in the main thread to avoid KUnit
+	 * assertions in submitters threads
+	 */
+	for (i = 0; i < params->num_subs; i++) {
+		subs_data[i].id = i;
+		subs_data[i].ctx = ctx;
+		subs_data[i].test = test;
+		subs_data[i].timedout = false;
+		subs_data[i].entity = drm_mock_sched_entity_new(test,
+								DRM_SCHED_PRIORITY_NORMAL,
+								ctx->sched);
+
+		ret = kunit_add_action_or_reset(test, drm_mock_sched_entity_free_wrap,
+						subs_data[i].entity);
+		KUNIT_ASSERT_EQ(test, ret, 0);
+
+		subs_data[i].jobs = kunit_kcalloc(test, params->num_jobs,
+						  sizeof(*subs_data[i].jobs), GFP_KERNEL);
+		KUNIT_ASSERT_NOT_NULL(test, subs_data[i].jobs);
+
+		for (j = 0; j < params->num_jobs; j++)
+			subs_data[i].jobs[j] = drm_mock_sched_job_new(test,
+								      subs_data[i].entity);
+
+		INIT_WORK(&subs_data[i].work, drm_sched_submitter_worker);
+		queue_work(ctx->sub_wq, &subs_data[i].work);
+	}
+
+	complete_all(&ctx->wait_go);
+	flush_workqueue(ctx->sub_wq);
+
+	for (i = 0; i < params->num_subs; i++)
+		KUNIT_ASSERT_FALSE_MSG(test, subs_data[i].timedout,
+				       "Job submitter worker %u timedout", i);
+}
+
+static struct kunit_case drm_sched_concurrent_tests[] = {
+	KUNIT_CASE_PARAM(drm_sched_concurrent_submit_test, drm_sched_concurrent_gen_params),
+	{}
+};
+
+static struct kunit_suite drm_sched_concurrent = {
+	.name = "drm_sched_concurrent_tests",
+	.init = drm_sched_concurrent_init,
+	.test_cases = drm_sched_concurrent_tests,
+};
+
 static struct kunit_case drm_sched_cancel_tests[] = {
 	KUNIT_CASE(drm_sched_basic_cancel),
 	{}
@@ -556,6 +730,7 @@ static struct kunit_suite drm_sched_credits = {
 };
 
 kunit_test_suites(&drm_sched_basic,
+		  &drm_sched_concurrent,
 		  &drm_sched_timeout,
 		  &drm_sched_cancel,
 		  &drm_sched_priority,
-- 
2.52.0

Re: [RFC PATCH] drm/sched: Add new KUnit test suite for concurrent job submission

Posted by Tvrtko Ursulin 2 weeks, 2 days ago

On 20/01/2026 20:52, Marco Pagani wrote:
> Add a new test suite to simulate concurrent job submissions from userspace.
> The suite includes a basic test case where each worker submits a single
> job, and a more advanced case involving the submission of multiple jobs.

New test coverage is welcome!

But as Philipp has said some more context would be beneficial. Like are 
you trying to hit a bug, or extend later with something which will hit a 
bug and then you will propose improvements? Or simply improving the 
coverage?

If it is about some race I would maybe consider putting this into a new 
tests_races.c. I actually have this file locally and some unfinished 
test cases already, although it is unclear when I will be happy with 
them to post. But if the test is simply about adding coverage it is fine 
to live in tests_basic.c.

> Signed-off-by: Marco Pagani <marpagan@redhat.com>
> ---
>   drivers/gpu/drm/scheduler/tests/tests_basic.c | 175 ++++++++++++++++++
>   1 file changed, 175 insertions(+)
> 
> diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c
> index 82a41a456b0a..7c25bcbbe7c9 100644
> --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c
> +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c
> @@ -2,6 +2,7 @@
>   /* Copyright (c) 2025 Valve Corporation */
>   
>   #include <linux/delay.h>
> +#include <linux/completion.h>
>   
>   #include "sched_tests.h"
>   
> @@ -235,6 +236,179 @@ static void drm_sched_basic_cancel(struct kunit *test)
>   	KUNIT_ASSERT_EQ(test, job->hw_fence.error, -ECANCELED);
>   }
>   
> +struct sched_concurrent_test_context {
> +	struct drm_mock_scheduler *sched;
> +	struct workqueue_struct *sub_wq;
> +	struct completion wait_go;
> +};
> +
> +KUNIT_DEFINE_ACTION_WRAPPER(destroy_workqueue_wrap, destroy_workqueue,
> +			    struct workqueue_struct *);
> +
> +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_fini_wrap, drm_mock_sched_fini,
> +			    struct drm_mock_scheduler *);
> +
> +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_entity_free_wrap, drm_mock_sched_entity_free,
> +			    struct drm_mock_sched_entity *);
> +
> +static int drm_sched_concurrent_init(struct kunit *test)
> +{
> +	struct sched_concurrent_test_context *ctx;
> +	int ret;
> +
> +	ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
> +
> +	init_completion(&ctx->wait_go);
> +
> +	ctx->sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT);
> +
> +	ret = kunit_add_action_or_reset(test, drm_mock_sched_fini_wrap, ctx->sched);
> +	KUNIT_ASSERT_EQ(test, ret, 0);
> +
> +	/* Use an unbounded workqueue to maximize job submission concurrency */
> +	ctx->sub_wq = alloc_workqueue("drm-sched-submitters-wq", WQ_UNBOUND,
> +				      WQ_UNBOUND_MAX_ACTIVE);
> +	KUNIT_ASSERT_NOT_NULL(test, ctx->sub_wq);
> +
> +	ret = kunit_add_action_or_reset(test, destroy_workqueue_wrap, ctx->sub_wq);
> +	KUNIT_ASSERT_EQ(test, ret, 0);
> +
> +	test->priv = ctx;
> +
> +	return 0;
> +}
> +
> +struct drm_sched_concurrent_params {
> +	const char *description;
> +	unsigned int job_base_us;
> +	unsigned int num_jobs;
> +	unsigned int num_subs;
> +};
> +
> +static const struct drm_sched_concurrent_params drm_sched_concurrent_cases[] = {
> +	{
> +		.description = "Concurrently submit a single job in a single entity",
> +		.job_base_us = 1000,
> +		.num_jobs = 1,
> +		.num_subs = 32,
> +	},

Why is submission from a single thread interesting if it is already covered?

> +	{
> +		.description = "Concurrently submit multiple jobs in a single entity",
> +		.job_base_us = 1000,
> +		.num_jobs = 10,
> +		.num_subs = 64,
> +	},
> +};
> +
> +static void
> +drm_sched_concurrent_desc(const struct drm_sched_concurrent_params *params, char *desc)
> +{
> +	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
> +}
> +
> +KUNIT_ARRAY_PARAM(drm_sched_concurrent, drm_sched_concurrent_cases, drm_sched_concurrent_desc);
> +
> +struct submitter_data {
> +	struct work_struct work;
> +	struct sched_concurrent_test_context *ctx;
> +	struct drm_mock_sched_entity *entity;
> +	struct drm_mock_sched_job **jobs;
> +	struct kunit *test;
> +	unsigned int id;
> +	bool timedout;
> +};
> +
> +static void drm_sched_submitter_worker(struct work_struct *work)
> +{
> +	const struct drm_sched_concurrent_params *params;
> +	struct sched_concurrent_test_context *ctx;
> +	struct submitter_data *sub_data;
> +	unsigned int i, duration_us;
> +	unsigned long timeout_jiffies;
> +	bool done;
> +
> +	sub_data = container_of(work, struct submitter_data, work);
> +	ctx = sub_data->ctx;
> +	params = sub_data->test->param_value;
> +
> +	wait_for_completion(&ctx->wait_go);
> +
> +	for (i = 0; i < params->num_jobs; i++) {
> +		duration_us = params->job_base_us + (sub_data->id * 10);

Why is job duration dependent by the submitter id?

Would it be feasiable to not use auto-completing jobs and instead 
advance the timeline manually? Given how the premise of the test seems 
to be about concurrent submission it sounds plausible that what happens 
after submission maybe isn't very relevant.

> +		drm_mock_sched_job_set_duration_us(sub_data->jobs[i], duration_us);
> +		drm_mock_sched_job_submit(sub_data->jobs[i]);

On a related note, one interesting thing to add coverage for later is 
multi-threaded submit of multiple jobs against a single entity. But it 
is not an immediate need. Just mentioning it as something interesting.

It would mean open coding drm_mock_sched_job_submit() as 
drm_sched_job_arm() and drm_sched_entity_push_job() and sticking some 
delay in between so two threads have the chance to interleave. Mock 
scheduler does not handle it today, neither does the scheduler itself 
who punts responsibility to callers. So adding a test and making the 
mock scheduler handle that properly would serve as an example on how 
scheduler must be used. Or what can go bad if it isn't.

> +	}
> +
> +	timeout_jiffies = usecs_to_jiffies(params->job_base_us * params->num_subs *
> +					   params->num_jobs * 10);

The timeout calculation could use a comment. You are using num_subs * 10 
to match the duratiot_us above being id * 10? With logic of calculating 
a pessimistic timeout?

Have you tried it with qemu to check if it is pessimistic enough?

> +	for (i = 0; i < params->num_jobs; i++) {
> +		done = drm_mock_sched_job_wait_finished(sub_data->jobs[i],
> +							timeout_jiffies);
> +		if (!done)
> +			sub_data->timedout = true;
> +	}

Technically you only need to wait on the last job but it is passable 
like this too.

Also, is it important for the worker to wait for completion or the main 
thread could simply wait for everything? Maybe that would simplify things.

Manual timeline advance and this combined would mean the workers only 
submit jobs, while the main thread simply does 
drm_mock_sched_advance(sched, num_subs * num_jobs) and waits for last 
job from each submitter to finish.

Again, auto-completion and timeout reporting is something I do not 
immediate see is relevant for multi-threaded submission testing.

Maybe if you want to test the mock scheduler itself it could be, but 
then I would add it as separate entry in drm_sched_concurrent_cases[]. 
Like maybe have a flag/boolean "auto-complete jobs". So one without and 
one with that set.

Other than that it looks tidy and was easy to follow. Only thing which 
slightly got me was the term "subs" since I don't intuitively associate 
it with a submitter but, well, a sub entity of some kind. Might be worth 
renaming that to submitter(s), or even dropping the prefix in some cases 
might be feasible (like sub(s)_data).

Regards,

Tvrtko

> +}
> +
> +static void drm_sched_concurrent_submit_test(struct kunit *test)
> +{
> +	struct sched_concurrent_test_context *ctx = test->priv;
> +	const struct drm_sched_concurrent_params *params = test->param_value;
> +	struct submitter_data *subs_data;
> +	unsigned int i, j;
> +	int ret;
> +
> +	subs_data = kunit_kcalloc(test, params->num_subs, sizeof(*subs_data),
> +				  GFP_KERNEL);
> +	KUNIT_ASSERT_NOT_NULL(test, subs_data);
> +
> +	/*
> +	 * Pre-allocate entities and jobs in the main thread to avoid KUnit
> +	 * assertions in submitters threads
> +	 */
> +	for (i = 0; i < params->num_subs; i++) {
> +		subs_data[i].id = i;
> +		subs_data[i].ctx = ctx;
> +		subs_data[i].test = test;
> +		subs_data[i].timedout = false;
> +		subs_data[i].entity = drm_mock_sched_entity_new(test,
> +								DRM_SCHED_PRIORITY_NORMAL,
> +								ctx->sched);
> +
> +		ret = kunit_add_action_or_reset(test, drm_mock_sched_entity_free_wrap,
> +						subs_data[i].entity);
> +		KUNIT_ASSERT_EQ(test, ret, 0);
> +
> +		subs_data[i].jobs = kunit_kcalloc(test, params->num_jobs,
> +						  sizeof(*subs_data[i].jobs), GFP_KERNEL);
> +		KUNIT_ASSERT_NOT_NULL(test, subs_data[i].jobs);
> +
> +		for (j = 0; j < params->num_jobs; j++)
> +			subs_data[i].jobs[j] = drm_mock_sched_job_new(test,
> +								      subs_data[i].entity);
> +
> +		INIT_WORK(&subs_data[i].work, drm_sched_submitter_worker);
> +		queue_work(ctx->sub_wq, &subs_data[i].work);
> +	}
> +
> +	complete_all(&ctx->wait_go);
> +	flush_workqueue(ctx->sub_wq);
> +
> +	for (i = 0; i < params->num_subs; i++)
> +		KUNIT_ASSERT_FALSE_MSG(test, subs_data[i].timedout,
> +				       "Job submitter worker %u timedout", i);
> +}
> +
> +static struct kunit_case drm_sched_concurrent_tests[] = {
> +	KUNIT_CASE_PARAM(drm_sched_concurrent_submit_test, drm_sched_concurrent_gen_params),
> +	{}
> +};
> +
> +static struct kunit_suite drm_sched_concurrent = {
> +	.name = "drm_sched_concurrent_tests",
> +	.init = drm_sched_concurrent_init,
> +	.test_cases = drm_sched_concurrent_tests,
> +};
> +
>   static struct kunit_case drm_sched_cancel_tests[] = {
>   	KUNIT_CASE(drm_sched_basic_cancel),
>   	{}
> @@ -556,6 +730,7 @@ static struct kunit_suite drm_sched_credits = {
>   };
>   
>   kunit_test_suites(&drm_sched_basic,
> +		  &drm_sched_concurrent,
>   		  &drm_sched_timeout,
>   		  &drm_sched_cancel,
>   		  &drm_sched_priority,

Re: [RFC PATCH] drm/sched: Add new KUnit test suite for concurrent job submission

Posted by Marco Pagani 1 week, 2 days ago

On 22/01/2026 10:51, Tvrtko Ursulin wrote:
> 
> On 20/01/2026 20:52, Marco Pagani wrote:
>> Add a new test suite to simulate concurrent job submissions from userspace.
>> The suite includes a basic test case where each worker submits a single
>> job, and a more advanced case involving the submission of multiple jobs.
> 
> New test coverage is welcome!

Hi Tvrtko, Philip, and thank you.

> But as Philipp has said some more context would be beneficial. Like are 
> you trying to hit a bug, or extend later with something which will hit a 
> bug and then you will propose improvements? Or simply improving the 
> coverage?

Sure, I'll extend the commit message to be more descriptive in the next
version.
 
> If it is about some race I would maybe consider putting this into a new 
> tests_races.c. I actually have this file locally and some unfinished 
> test cases already, although it is unclear when I will be happy with 
> them to post. But if the test is simply about adding coverage it is fine 
> to live in tests_basic.c.

The general idea is to extend the suite with some initial tests that stress
concurrency to spot race conditions. Having these initial tests grouped together
with future ones in a new tests_races.c file makes perfect sense to me.

>> Signed-off-by: Marco Pagani <marpagan@redhat.com>
>> ---
>>   drivers/gpu/drm/scheduler/tests/tests_basic.c | 175 ++++++++++++++++++
>>   1 file changed, 175 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c
>> index 82a41a456b0a..7c25bcbbe7c9 100644
>> --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c
>> +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c
>> @@ -2,6 +2,7 @@
>>   /* Copyright (c) 2025 Valve Corporation */
>>   
>>   #include <linux/delay.h>
>> +#include <linux/completion.h>
>>   
>>   #include "sched_tests.h"
>>   
>> @@ -235,6 +236,179 @@ static void drm_sched_basic_cancel(struct kunit *test)
>>   	KUNIT_ASSERT_EQ(test, job->hw_fence.error, -ECANCELED);
>>   }
>>   
>> +struct sched_concurrent_test_context {
>> +	struct drm_mock_scheduler *sched;
>> +	struct workqueue_struct *sub_wq;
>> +	struct completion wait_go;
>> +};
>> +
>> +KUNIT_DEFINE_ACTION_WRAPPER(destroy_workqueue_wrap, destroy_workqueue,
>> +			    struct workqueue_struct *);
>> +
>> +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_fini_wrap, drm_mock_sched_fini,
>> +			    struct drm_mock_scheduler *);
>> +
>> +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_entity_free_wrap, drm_mock_sched_entity_free,
>> +			    struct drm_mock_sched_entity *);
>> +
>> +static int drm_sched_concurrent_init(struct kunit *test)
>> +{
>> +	struct sched_concurrent_test_context *ctx;
>> +	int ret;
>> +
>> +	ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
>> +
>> +	init_completion(&ctx->wait_go);
>> +
>> +	ctx->sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT);
>> +
>> +	ret = kunit_add_action_or_reset(test, drm_mock_sched_fini_wrap, ctx->sched);
>> +	KUNIT_ASSERT_EQ(test, ret, 0);
>> +
>> +	/* Use an unbounded workqueue to maximize job submission concurrency */
>> +	ctx->sub_wq = alloc_workqueue("drm-sched-submitters-wq", WQ_UNBOUND,
>> +				      WQ_UNBOUND_MAX_ACTIVE);
>> +	KUNIT_ASSERT_NOT_NULL(test, ctx->sub_wq);
>> +
>> +	ret = kunit_add_action_or_reset(test, destroy_workqueue_wrap, ctx->sub_wq);
>> +	KUNIT_ASSERT_EQ(test, ret, 0);
>> +
>> +	test->priv = ctx;
>> +
>> +	return 0;
>> +}
>> +
>> +struct drm_sched_concurrent_params {
>> +	const char *description;
>> +	unsigned int job_base_us;
>> +	unsigned int num_jobs;
>> +	unsigned int num_subs;
>> +};
>> +
>> +static const struct drm_sched_concurrent_params drm_sched_concurrent_cases[] = {
>> +	{
>> +		.description = "Concurrently submit a single job in a single entity",
>> +		.job_base_us = 1000,
>> +		.num_jobs = 1,
>> +		.num_subs = 32,
>> +	},
> 
> Why is submission from a single thread interesting if it is already covered?

These two initial parameter sets cover only concurrent submission:
multiple submitters, single job / multiple submitters, multiple jobs.

>> +	{
>> +		.description = "Concurrently submit multiple jobs in a single entity",
>> +		.job_base_us = 1000,
>> +		.num_jobs = 10,
>> +		.num_subs = 64,
>> +	},
>> +};
>> +
>> +static void
>> +drm_sched_concurrent_desc(const struct drm_sched_concurrent_params *params, char *desc)
>> +{
>> +	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
>> +}
>> +
>> +KUNIT_ARRAY_PARAM(drm_sched_concurrent, drm_sched_concurrent_cases, drm_sched_concurrent_desc);
>> +
>> +struct submitter_data {
>> +	struct work_struct work;
>> +	struct sched_concurrent_test_context *ctx;
>> +	struct drm_mock_sched_entity *entity;
>> +	struct drm_mock_sched_job **jobs;
>> +	struct kunit *test;
>> +	unsigned int id;
>> +	bool timedout;
>> +};
>> +
>> +static void drm_sched_submitter_worker(struct work_struct *work)
>> +{
>> +	const struct drm_sched_concurrent_params *params;
>> +	struct sched_concurrent_test_context *ctx;
>> +	struct submitter_data *sub_data;
>> +	unsigned int i, duration_us;
>> +	unsigned long timeout_jiffies;
>> +	bool done;
>> +
>> +	sub_data = container_of(work, struct submitter_data, work);
>> +	ctx = sub_data->ctx;
>> +	params = sub_data->test->param_value;
>> +
>> +	wait_for_completion(&ctx->wait_go);
>> +
>> +	for (i = 0; i < params->num_jobs; i++) {
>> +		duration_us = params->job_base_us + (sub_data->id * 10);
> 
> Why is job duration dependent by the submitter id?

Just a simple way to have a deterministic distribution of durations.
I can change it if it doesn't fit.

> Would it be feasiable to not use auto-completing jobs and instead 
> advance the timeline manually? Given how the premise of the test seems 
> to be about concurrent submission it sounds plausible that what happens 
> after submission maybe isn't very relevant.

Good idea! I'll run some experiments and see if it works.

>> +		drm_mock_sched_job_set_duration_us(sub_data->jobs[i], duration_us);
>> +		drm_mock_sched_job_submit(sub_data->jobs[i]);
> 
> On a related note, one interesting thing to add coverage for later is 
> multi-threaded submit of multiple jobs against a single entity. But it 
> is not an immediate need. Just mentioning it as something interesting.

Currently, the test configures each submitter to submit multiple jobs
against its own dedicated entity. I considered adding a test case for
submitting multiple jobs against multiple entities, but I decided to
leave it for the future.

> It would mean open coding drm_mock_sched_job_submit() as 
> drm_sched_job_arm() and drm_sched_entity_push_job() and sticking some 
> delay in between so two threads have the chance to interleave. Mock 
> scheduler does not handle it today, neither does the scheduler itself 
> who punts responsibility to callers. So adding a test and making the 
> mock scheduler handle that properly would serve as an example on how 
> scheduler must be used. Or what can go bad if it isn't.

Do you mean having multiple (k)threads submitting against the same entity?
Would that be used to model a multithread application that uses multiple queues?

>> +	}
>> +
>> +	timeout_jiffies = usecs_to_jiffies(params->job_base_us * params->num_subs *
>> +					   params->num_jobs * 10);
> 
> The timeout calculation could use a comment. You are using num_subs * 10 
> to match the duratiot_us above being id * 10? With logic of calculating 
> a pessimistic timeout?
> 
> Have you tried it with qemu to check if it is pessimistic enough?

I'll double check on that.
 
>> +	for (i = 0; i < params->num_jobs; i++) {
>> +		done = drm_mock_sched_job_wait_finished(sub_data->jobs[i],
>> +							timeout_jiffies);
>> +		if (!done)
>> +			sub_data->timedout = true;
>> +	}
> 
> Technically you only need to wait on the last job but it is passable 
> like this too.
> 
> Also, is it important for the worker to wait for completion or the main 
> thread could simply wait for everything? Maybe that would simplify things.

I would say they serve different purposes. The completion is used to pause
all worker threads until they are all created to ensure they start submitting
jobs together to maximize concurrency.
 
> Manual timeline advance and this combined would mean the workers only 
> submit jobs, while the main thread simply does 
> drm_mock_sched_advance(sched, num_subs * num_jobs) and waits for last 
> job from each submitter to finish.
> 
> Again, auto-completion and timeout reporting is something I do not 
> immediate see is relevant for multi-threaded submission testing.
>
> Maybe if you want to test the mock scheduler itself it could be, but 
> then I would add it as separate entry in drm_sched_concurrent_cases[]. 
> Like maybe have a flag/boolean "auto-complete jobs". So one without and 
> one with that set.

I think it's a good idea and I'll experiment to see if it works.
 
> Other than that it looks tidy and was easy to follow. Only thing which 
> slightly got me was the term "subs" since I don't intuitively associate 
> it with a submitter but, well, a sub entity of some kind. Might be worth 
> renaming that to submitter(s), or even dropping the prefix in some cases 
> might be feasible (like sub(s)_data).

Agreed. I'll rename "subs" for better clarity.

> Regards,
> 
> Tvrtko
> 

Cheers,
Marco

Re: [RFC PATCH] drm/sched: Add new KUnit test suite for concurrent job submission

Posted by Tvrtko Ursulin 1 week, 1 day ago

On 28/01/2026 11:39, Marco Pagani wrote:
> 
> On 22/01/2026 10:51, Tvrtko Ursulin wrote:
>>
>> On 20/01/2026 20:52, Marco Pagani wrote:
>>> Add a new test suite to simulate concurrent job submissions from userspace.
>>> The suite includes a basic test case where each worker submits a single
>>> job, and a more advanced case involving the submission of multiple jobs.
>>
>> New test coverage is welcome!
> 
> Hi Tvrtko, Philip, and thank you.
> 
>> But as Philipp has said some more context would be beneficial. Like are
>> you trying to hit a bug, or extend later with something which will hit a
>> bug and then you will propose improvements? Or simply improving the
>> coverage?
> 
> Sure, I'll extend the commit message to be more descriptive in the next
> version.
>   
>> If it is about some race I would maybe consider putting this into a new
>> tests_races.c. I actually have this file locally and some unfinished
>> test cases already, although it is unclear when I will be happy with
>> them to post. But if the test is simply about adding coverage it is fine
>> to live in tests_basic.c.
> 
> The general idea is to extend the suite with some initial tests that stress
> concurrency to spot race conditions. Having these initial tests grouped together
> with future ones in a new tests_races.c file makes perfect sense to me.

It can also be moved later, if/when tests_basic.c grows too long for 
comfort. My suggestion was conditional on knowing how many tests cases 
and for what kind of races you plan to add soon. If there are no clear 
plans to hit already known bugs, then again, it is fine to keep it in a 
single file for now.

>>> Signed-off-by: Marco Pagani <marpagan@redhat.com>
>>> ---
>>>    drivers/gpu/drm/scheduler/tests/tests_basic.c | 175 ++++++++++++++++++
>>>    1 file changed, 175 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c
>>> index 82a41a456b0a..7c25bcbbe7c9 100644
>>> --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c
>>> +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c
>>> @@ -2,6 +2,7 @@
>>>    /* Copyright (c) 2025 Valve Corporation */
>>>    
>>>    #include <linux/delay.h>
>>> +#include <linux/completion.h>
>>>    
>>>    #include "sched_tests.h"
>>>    
>>> @@ -235,6 +236,179 @@ static void drm_sched_basic_cancel(struct kunit *test)
>>>    	KUNIT_ASSERT_EQ(test, job->hw_fence.error, -ECANCELED);
>>>    }
>>>    
>>> +struct sched_concurrent_test_context {
>>> +	struct drm_mock_scheduler *sched;
>>> +	struct workqueue_struct *sub_wq;
>>> +	struct completion wait_go;
>>> +};
>>> +
>>> +KUNIT_DEFINE_ACTION_WRAPPER(destroy_workqueue_wrap, destroy_workqueue,
>>> +			    struct workqueue_struct *);
>>> +
>>> +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_fini_wrap, drm_mock_sched_fini,
>>> +			    struct drm_mock_scheduler *);
>>> +
>>> +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_entity_free_wrap, drm_mock_sched_entity_free,
>>> +			    struct drm_mock_sched_entity *);
>>> +
>>> +static int drm_sched_concurrent_init(struct kunit *test)
>>> +{
>>> +	struct sched_concurrent_test_context *ctx;
>>> +	int ret;
>>> +
>>> +	ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
>>> +
>>> +	init_completion(&ctx->wait_go);
>>> +
>>> +	ctx->sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT);
>>> +
>>> +	ret = kunit_add_action_or_reset(test, drm_mock_sched_fini_wrap, ctx->sched);
>>> +	KUNIT_ASSERT_EQ(test, ret, 0);
>>> +
>>> +	/* Use an unbounded workqueue to maximize job submission concurrency */
>>> +	ctx->sub_wq = alloc_workqueue("drm-sched-submitters-wq", WQ_UNBOUND,
>>> +				      WQ_UNBOUND_MAX_ACTIVE);
>>> +	KUNIT_ASSERT_NOT_NULL(test, ctx->sub_wq);
>>> +
>>> +	ret = kunit_add_action_or_reset(test, destroy_workqueue_wrap, ctx->sub_wq);
>>> +	KUNIT_ASSERT_EQ(test, ret, 0);
>>> +
>>> +	test->priv = ctx;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +struct drm_sched_concurrent_params {
>>> +	const char *description;
>>> +	unsigned int job_base_us;
>>> +	unsigned int num_jobs;
>>> +	unsigned int num_subs;
>>> +};
>>> +
>>> +static const struct drm_sched_concurrent_params drm_sched_concurrent_cases[] = {
>>> +	{
>>> +		.description = "Concurrently submit a single job in a single entity",
>>> +		.job_base_us = 1000,
>>> +		.num_jobs = 1,
>>> +		.num_subs = 32,
>>> +	},
>>
>> Why is submission from a single thread interesting if it is already covered?
> 
> These two initial parameter sets cover only concurrent submission:
> multiple submitters, single job / multiple submitters, multiple jobs.

Yes sorry I got confused jumping back and forth.

>>> +	{
>>> +		.description = "Concurrently submit multiple jobs in a single entity",
>>> +		.job_base_us = 1000,
>>> +		.num_jobs = 10,
>>> +		.num_subs = 64,
>>> +	},
>>> +};
>>> +
>>> +static void
>>> +drm_sched_concurrent_desc(const struct drm_sched_concurrent_params *params, char *desc)
>>> +{
>>> +	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
>>> +}
>>> +
>>> +KUNIT_ARRAY_PARAM(drm_sched_concurrent, drm_sched_concurrent_cases, drm_sched_concurrent_desc);
>>> +
>>> +struct submitter_data {
>>> +	struct work_struct work;
>>> +	struct sched_concurrent_test_context *ctx;
>>> +	struct drm_mock_sched_entity *entity;
>>> +	struct drm_mock_sched_job **jobs;
>>> +	struct kunit *test;
>>> +	unsigned int id;
>>> +	bool timedout;
>>> +};
>>> +
>>> +static void drm_sched_submitter_worker(struct work_struct *work)
>>> +{
>>> +	const struct drm_sched_concurrent_params *params;
>>> +	struct sched_concurrent_test_context *ctx;
>>> +	struct submitter_data *sub_data;
>>> +	unsigned int i, duration_us;
>>> +	unsigned long timeout_jiffies;
>>> +	bool done;
>>> +
>>> +	sub_data = container_of(work, struct submitter_data, work);
>>> +	ctx = sub_data->ctx;
>>> +	params = sub_data->test->param_value;
>>> +
>>> +	wait_for_completion(&ctx->wait_go);
>>> +
>>> +	for (i = 0; i < params->num_jobs; i++) {
>>> +		duration_us = params->job_base_us + (sub_data->id * 10);
>>
>> Why is job duration dependent by the submitter id?
> 
> Just a simple way to have a deterministic distribution of durations.
> I can change it if it doesn't fit.
> 
>> Would it be feasiable to not use auto-completing jobs and instead
>> advance the timeline manually? Given how the premise of the test seems
>> to be about concurrent submission it sounds plausible that what happens
>> after submission maybe isn't very relevant.
> 
> Good idea! I'll run some experiments and see if it works.

Cool, I will await your findings in v2. :)

>>> +		drm_mock_sched_job_set_duration_us(sub_data->jobs[i], duration_us);
>>> +		drm_mock_sched_job_submit(sub_data->jobs[i]);
>>
>> On a related note, one interesting thing to add coverage for later is
>> multi-threaded submit of multiple jobs against a single entity. But it
>> is not an immediate need. Just mentioning it as something interesting.
> 
> Currently, the test configures each submitter to submit multiple jobs
> against its own dedicated entity. I considered adding a test case for
> submitting multiple jobs against multiple entities, but I decided to
> leave it for the future.
> 
>> It would mean open coding drm_mock_sched_job_submit() as
>> drm_sched_job_arm() and drm_sched_entity_push_job() and sticking some
>> delay in between so two threads have the chance to interleave. Mock
>> scheduler does not handle it today, neither does the scheduler itself
>> who punts responsibility to callers. So adding a test and making the
>> mock scheduler handle that properly would serve as an example on how
>> scheduler must be used. Or what can go bad if it isn't.
> 
> Do you mean having multiple (k)threads submitting against the same entity?
> Would that be used to model a multithread application that uses multiple queues?

Yes, since there is nothing preventing userspace to do that today, with 
any driver. So IMO would be interesting to see what explodes, and to 
then model the mock scheduler as an example on what is the proper way to 
handle it. It isn't something I was suggesting to try straight away, and 
Phillip is also suggesting to not try.

>>> +	}
>>> +
>>> +	timeout_jiffies = usecs_to_jiffies(params->job_base_us * params->num_subs *
>>> +					   params->num_jobs * 10);
>>
>> The timeout calculation could use a comment. You are using num_subs * 10
>> to match the duratiot_us above being id * 10? With logic of calculating
>> a pessimistic timeout?
>>
>> Have you tried it with qemu to check if it is pessimistic enough?
> 
> I'll double check on that.

For context I tend to runt the unit tests with the kunit.py runner and 
the qemu system backend and it is regularly quite slow when there is a 
lot of timer and workqueue activity. Anyway, this would become 
irrelevant if you move to manual job signaling.

>>> +	for (i = 0; i < params->num_jobs; i++) {
>>> +		done = drm_mock_sched_job_wait_finished(sub_data->jobs[i],
>>> +							timeout_jiffies);
>>> +		if (!done)
>>> +			sub_data->timedout = true;
>>> +	}
>>
>> Technically you only need to wait on the last job but it is passable
>> like this too.
>>
>> Also, is it important for the worker to wait for completion or the main
>> thread could simply wait for everything? Maybe that would simplify things.
> 
> I would say they serve different purposes. The completion is used to pause
> all worker threads until they are all created to ensure they start submitting
> jobs together to maximize concurrency.

Makes sense.

>> Manual timeline advance and this combined would mean the workers only
>> submit jobs, while the main thread simply does
>> drm_mock_sched_advance(sched, num_subs * num_jobs) and waits for last
>> job from each submitter to finish.
>>
>> Again, auto-completion and timeout reporting is something I do not
>> immediate see is relevant for multi-threaded submission testing.
>>
>> Maybe if you want to test the mock scheduler itself it could be, but
>> then I would add it as separate entry in drm_sched_concurrent_cases[].
>> Like maybe have a flag/boolean "auto-complete jobs". So one without and
>> one with that set.
> 
> I think it's a good idea and I'll experiment to see if it works.

Ack.

>   
>> Other than that it looks tidy and was easy to follow. Only thing which
>> slightly got me was the term "subs" since I don't intuitively associate
>> it with a submitter but, well, a sub entity of some kind. Might be worth
>> renaming that to submitter(s), or even dropping the prefix in some cases
>> might be feasible (like sub(s)_data).
> 
> Agreed. I'll rename "subs" for better clarity.

Thank you!

Regards,

Tvrtko

Re: [RFC PATCH] drm/sched: Add new KUnit test suite for concurrent job submission

Posted by Marco Pagani 2 days, 17 hours ago


On 29/01/2026 16:44, Tvrtko Ursulin wrote:
> 
> On 28/01/2026 11:39, Marco Pagani wrote:
>>
>> On 22/01/2026 10:51, Tvrtko Ursulin wrote:
>>>
>>> On 20/01/2026 20:52, Marco Pagani wrote:
>>>> Add a new test suite to simulate concurrent job submissions from userspace.
>>>> The suite includes a basic test case where each worker submits a single
>>>> job, and a more advanced case involving the submission of multiple jobs.
>>>
>>> New test coverage is welcome!
>>
>> Hi Tvrtko, Philip, and thank you.
>>
>>> But as Philipp has said some more context would be beneficial. Like are
>>> you trying to hit a bug, or extend later with something which will hit a
>>> bug and then you will propose improvements? Or simply improving the
>>> coverage?
>>
>> Sure, I'll extend the commit message to be more descriptive in the next
>> version.
>>   
>>> If it is about some race I would maybe consider putting this into a new
>>> tests_races.c. I actually have this file locally and some unfinished
>>> test cases already, although it is unclear when I will be happy with
>>> them to post. But if the test is simply about adding coverage it is fine
>>> to live in tests_basic.c.
>>
>> The general idea is to extend the suite with some initial tests that stress
>> concurrency to spot race conditions. Having these initial tests grouped together
>> with future ones in a new tests_races.c file makes perfect sense to me.
> 
> It can also be moved later, if/when tests_basic.c grows too long for 
> comfort. My suggestion was conditional on knowing how many tests cases 
> and for what kind of races you plan to add soon. If there are no clear 
> plans to hit already known bugs, then again, it is fine to keep it in a 
> single file for now.
> 
>>>> Signed-off-by: Marco Pagani <marpagan@redhat.com>
>>>> ---
>>>>    drivers/gpu/drm/scheduler/tests/tests_basic.c | 175 ++++++++++++++++++
>>>>    1 file changed, 175 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c
>>>> index 82a41a456b0a..7c25bcbbe7c9 100644
>>>> --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c
>>>> +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c
>>>> @@ -2,6 +2,7 @@
>>>>    /* Copyright (c) 2025 Valve Corporation */
>>>>    
>>>>    #include <linux/delay.h>
>>>> +#include <linux/completion.h>
>>>>    
>>>>    #include "sched_tests.h"
>>>>    
>>>> @@ -235,6 +236,179 @@ static void drm_sched_basic_cancel(struct kunit *test)
>>>>    	KUNIT_ASSERT_EQ(test, job->hw_fence.error, -ECANCELED);
>>>>    }
>>>>    
>>>> +struct sched_concurrent_test_context {
>>>> +	struct drm_mock_scheduler *sched;
>>>> +	struct workqueue_struct *sub_wq;
>>>> +	struct completion wait_go;
>>>> +};
>>>> +
>>>> +KUNIT_DEFINE_ACTION_WRAPPER(destroy_workqueue_wrap, destroy_workqueue,
>>>> +			    struct workqueue_struct *);
>>>> +
>>>> +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_fini_wrap, drm_mock_sched_fini,
>>>> +			    struct drm_mock_scheduler *);
>>>> +
>>>> +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_entity_free_wrap, drm_mock_sched_entity_free,
>>>> +			    struct drm_mock_sched_entity *);
>>>> +
>>>> +static int drm_sched_concurrent_init(struct kunit *test)
>>>> +{
>>>> +	struct sched_concurrent_test_context *ctx;
>>>> +	int ret;
>>>> +
>>>> +	ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
>>>> +
>>>> +	init_completion(&ctx->wait_go);
>>>> +
>>>> +	ctx->sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT);
>>>> +
>>>> +	ret = kunit_add_action_or_reset(test, drm_mock_sched_fini_wrap, ctx->sched);
>>>> +	KUNIT_ASSERT_EQ(test, ret, 0);
>>>> +
>>>> +	/* Use an unbounded workqueue to maximize job submission concurrency */
>>>> +	ctx->sub_wq = alloc_workqueue("drm-sched-submitters-wq", WQ_UNBOUND,
>>>> +				      WQ_UNBOUND_MAX_ACTIVE);
>>>> +	KUNIT_ASSERT_NOT_NULL(test, ctx->sub_wq);
>>>> +
>>>> +	ret = kunit_add_action_or_reset(test, destroy_workqueue_wrap, ctx->sub_wq);
>>>> +	KUNIT_ASSERT_EQ(test, ret, 0);
>>>> +
>>>> +	test->priv = ctx;
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +struct drm_sched_concurrent_params {
>>>> +	const char *description;
>>>> +	unsigned int job_base_us;
>>>> +	unsigned int num_jobs;
>>>> +	unsigned int num_subs;
>>>> +};
>>>> +
>>>> +static const struct drm_sched_concurrent_params drm_sched_concurrent_cases[] = {
>>>> +	{
>>>> +		.description = "Concurrently submit a single job in a single entity",
>>>> +		.job_base_us = 1000,
>>>> +		.num_jobs = 1,
>>>> +		.num_subs = 32,
>>>> +	},
>>>
>>> Why is submission from a single thread interesting if it is already covered?
>>
>> These two initial parameter sets cover only concurrent submission:
>> multiple submitters, single job / multiple submitters, multiple jobs.
> 
> Yes sorry I got confused jumping back and forth.
> 
>>>> +	{
>>>> +		.description = "Concurrently submit multiple jobs in a single entity",
>>>> +		.job_base_us = 1000,
>>>> +		.num_jobs = 10,
>>>> +		.num_subs = 64,
>>>> +	},
>>>> +};
>>>> +
>>>> +static void
>>>> +drm_sched_concurrent_desc(const struct drm_sched_concurrent_params *params, char *desc)
>>>> +{
>>>> +	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
>>>> +}
>>>> +
>>>> +KUNIT_ARRAY_PARAM(drm_sched_concurrent, drm_sched_concurrent_cases, drm_sched_concurrent_desc);
>>>> +
>>>> +struct submitter_data {
>>>> +	struct work_struct work;
>>>> +	struct sched_concurrent_test_context *ctx;
>>>> +	struct drm_mock_sched_entity *entity;
>>>> +	struct drm_mock_sched_job **jobs;
>>>> +	struct kunit *test;
>>>> +	unsigned int id;
>>>> +	bool timedout;
>>>> +};
>>>> +
>>>> +static void drm_sched_submitter_worker(struct work_struct *work)
>>>> +{
>>>> +	const struct drm_sched_concurrent_params *params;
>>>> +	struct sched_concurrent_test_context *ctx;
>>>> +	struct submitter_data *sub_data;
>>>> +	unsigned int i, duration_us;
>>>> +	unsigned long timeout_jiffies;
>>>> +	bool done;
>>>> +
>>>> +	sub_data = container_of(work, struct submitter_data, work);
>>>> +	ctx = sub_data->ctx;
>>>> +	params = sub_data->test->param_value;
>>>> +
>>>> +	wait_for_completion(&ctx->wait_go);
>>>> +
>>>> +	for (i = 0; i < params->num_jobs; i++) {
>>>> +		duration_us = params->job_base_us + (sub_data->id * 10);
>>>
>>> Why is job duration dependent by the submitter id?
>>
>> Just a simple way to have a deterministic distribution of durations.
>> I can change it if it doesn't fit.
>>
>>> Would it be feasiable to not use auto-completing jobs and instead
>>> advance the timeline manually? Given how the premise of the test seems
>>> to be about concurrent submission it sounds plausible that what happens
>>> after submission maybe isn't very relevant.
>>
>> Good idea! I'll run some experiments and see if it works.
> 
> Cool, I will await your findings in v2. :)


After fiddling a bit with the code, I came to the conclusion that
changing the design to use manual timeline advancement is doable, but
not beneficial, since it would require serializing job submission into
"batches" using a two-step process, i.e., (i) workers submit jobs, and
(ii) the main thread waits for all submissions, advances the timeline,
and then releases the workers for the next iteration.

This approach would partially defeat the purpose of a concurrency test
as it would not allow job submission (KUnit process context) to overlap
with job completion (hrtimer callback interrupt context) that models
asynchronous hardware in the mock scheduler, limiting contention on the
DRM scheduler internal locking. Moreover, while manual advancement might
appear to make the test deterministic, it does not since the order in
which the workers are scheduled will still be non-deterministic.


> 
>>>> +		drm_mock_sched_job_set_duration_us(sub_data->jobs[i], duration_us);
>>>> +		drm_mock_sched_job_submit(sub_data->jobs[i]);
>>>
>>> On a related note, one interesting thing to add coverage for later is
>>> multi-threaded submit of multiple jobs against a single entity. But it
>>> is not an immediate need. Just mentioning it as something interesting.
>>
>> Currently, the test configures each submitter to submit multiple jobs
>> against its own dedicated entity. I considered adding a test case for
>> submitting multiple jobs against multiple entities, but I decided to
>> leave it for the future.
>>
>>> It would mean open coding drm_mock_sched_job_submit() as
>>> drm_sched_job_arm() and drm_sched_entity_push_job() and sticking some
>>> delay in between so two threads have the chance to interleave. Mock
>>> scheduler does not handle it today, neither does the scheduler itself
>>> who punts responsibility to callers. So adding a test and making the
>>> mock scheduler handle that properly would serve as an example on how
>>> scheduler must be used. Or what can go bad if it isn't.
>>
>> Do you mean having multiple (k)threads submitting against the same entity?
>> Would that be used to model a multithread application that uses multiple queues?
> 
> Yes, since there is nothing preventing userspace to do that today, with 
> any driver. So IMO would be interesting to see what explodes, and to 
> then model the mock scheduler as an example on what is the proper way to 
> handle it. It isn't something I was suggesting to try straight away, and 
> Phillip is also suggesting to not try.
> 
>>>> +	}
>>>> +
>>>> +	timeout_jiffies = usecs_to_jiffies(params->job_base_us * params->num_subs *
>>>> +					   params->num_jobs * 10);
>>>
>>> The timeout calculation could use a comment. You are using num_subs * 10
>>> to match the duratiot_us above being id * 10? With logic of calculating
>>> a pessimistic timeout?
>>>
>>> Have you tried it with qemu to check if it is pessimistic enough?
>>
>> I'll double check on that.
> 
> For context I tend to runt the unit tests with the kunit.py runner and 
> the qemu system backend and it is regularly quite slow when there is a 
> lot of timer and workqueue activity. Anyway, this would become 
> irrelevant if you move to manual job signaling.
> 
>>>> +	for (i = 0; i < params->num_jobs; i++) {
>>>> +		done = drm_mock_sched_job_wait_finished(sub_data->jobs[i],
>>>> +							timeout_jiffies);
>>>> +		if (!done)
>>>> +			sub_data->timedout = true;
>>>> +	}
>>>
>>> Technically you only need to wait on the last job but it is passable
>>> like this too.
>>>
>>> Also, is it important for the worker to wait for completion or the main
>>> thread could simply wait for everything? Maybe that would simplify things.
>>
>> I would say they serve different purposes. The completion is used to pause
>> all worker threads until they are all created to ensure they start submitting
>> jobs together to maximize concurrency.
> 
> Makes sense.
> 
>>> Manual timeline advance and this combined would mean the workers only
>>> submit jobs, while the main thread simply does
>>> drm_mock_sched_advance(sched, num_subs * num_jobs) and waits for last
>>> job from each submitter to finish.
>>>
>>> Again, auto-completion and timeout reporting is something I do not
>>> immediate see is relevant for multi-threaded submission testing.
>>>
>>> Maybe if you want to test the mock scheduler itself it could be, but
>>> then I would add it as separate entry in drm_sched_concurrent_cases[].
>>> Like maybe have a flag/boolean "auto-complete jobs". So one without and
>>> one with that set.
>>
>> I think it's a good idea and I'll experiment to see if it works.
> 
> Ack.
> 
>>   
>>> Other than that it looks tidy and was easy to follow. Only thing which
>>> slightly got me was the term "subs" since I don't intuitively associate
>>> it with a submitter but, well, a sub entity of some kind. Might be worth
>>> renaming that to submitter(s), or even dropping the prefix in some cases
>>> might be feasible (like sub(s)_data).
>>
>> Agreed. I'll rename "subs" for better clarity.
> 
> Thank you!
> 
> Regards,
> 
> Tvrtko

Thanks,
Marco

Re: [RFC PATCH] drm/sched: Add new KUnit test suite for concurrent job submission

Posted by Tvrtko Ursulin 2 days ago

On 04/02/2026 16:33, Marco Pagani wrote:

8><

>>>>> +	{
>>>>> +		.description = "Concurrently submit multiple jobs in a single entity",
>>>>> +		.job_base_us = 1000,
>>>>> +		.num_jobs = 10,
>>>>> +		.num_subs = 64,
>>>>> +	},
>>>>> +};
>>>>> +
>>>>> +static void
>>>>> +drm_sched_concurrent_desc(const struct drm_sched_concurrent_params *params, char *desc)
>>>>> +{
>>>>> +	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
>>>>> +}
>>>>> +
>>>>> +KUNIT_ARRAY_PARAM(drm_sched_concurrent, drm_sched_concurrent_cases, drm_sched_concurrent_desc);
>>>>> +
>>>>> +struct submitter_data {
>>>>> +	struct work_struct work;
>>>>> +	struct sched_concurrent_test_context *ctx;
>>>>> +	struct drm_mock_sched_entity *entity;
>>>>> +	struct drm_mock_sched_job **jobs;
>>>>> +	struct kunit *test;
>>>>> +	unsigned int id;
>>>>> +	bool timedout;
>>>>> +};
>>>>> +
>>>>> +static void drm_sched_submitter_worker(struct work_struct *work)
>>>>> +{
>>>>> +	const struct drm_sched_concurrent_params *params;
>>>>> +	struct sched_concurrent_test_context *ctx;
>>>>> +	struct submitter_data *sub_data;
>>>>> +	unsigned int i, duration_us;
>>>>> +	unsigned long timeout_jiffies;
>>>>> +	bool done;
>>>>> +
>>>>> +	sub_data = container_of(work, struct submitter_data, work);
>>>>> +	ctx = sub_data->ctx;
>>>>> +	params = sub_data->test->param_value;
>>>>> +
>>>>> +	wait_for_completion(&ctx->wait_go);
>>>>> +
>>>>> +	for (i = 0; i < params->num_jobs; i++) {
>>>>> +		duration_us = params->job_base_us + (sub_data->id * 10);
>>>>
>>>> Why is job duration dependent by the submitter id?
>>>
>>> Just a simple way to have a deterministic distribution of durations.
>>> I can change it if it doesn't fit.
>>>
>>>> Would it be feasiable to not use auto-completing jobs and instead
>>>> advance the timeline manually? Given how the premise of the test seems
>>>> to be about concurrent submission it sounds plausible that what happens
>>>> after submission maybe isn't very relevant.
>>>
>>> Good idea! I'll run some experiments and see if it works.
>>
>> Cool, I will await your findings in v2. :)
> 
> 
> After fiddling a bit with the code, I came to the conclusion that
> changing the design to use manual timeline advancement is doable, but
> not beneficial, since it would require serializing job submission into
> "batches" using a two-step process, i.e., (i) workers submit jobs, and
> (ii) the main thread waits for all submissions, advances the timeline,
> and then releases the workers for the next iteration.

What do you mean by next iteration?

In the patch you have each worker submit all jobs in one go.

> This approach would partially defeat the purpose of a concurrency test
> as it would not allow job submission (KUnit process context) to overlap
> with job completion (hrtimer callback interrupt context) that models
> asynchronous hardware in the mock scheduler, limiting contention on the
> DRM scheduler internal locking. Moreover, while manual advancement might
> appear to make the test deterministic, it does not since the order in
> which the workers are scheduled will still be non-deterministic.

Ah, so it depends what is the test actually wanting to test. In my view 
exercising parallel submit is one thing, while interleaving submission 
with completion is something else.

In the test as written I think there is very little of the latter. Each 
worker submits all their jobs in one tight loop. Jobs you set to be 1ms 
so first job completion is 1ms away from when workers are released. A 
lot of the jobs can be submitted in 1ms and it would be interesting to 
see exactly how much, from how many workers.

If desire is to interleave completion and submission I think that should 
be made more explicit (less depending on how fast is the underlying 
machine). For example adding delays into the submit loop to actually 
guarantee that.

But I would also recommend parallel submit and parallel submit vs 
completions are tested in separate test cases. It should be easy to do 
with some flags and almost no new code. I was suggesting flags for some 
other thing in the initial review as well. Right, for auto-complete. So 
flag could be something like:

+static const struct drm_sched_concurrent_params 
drm_sched_concurrent_cases[] = {
+	{
+		.description = "Concurrently submit a single job in a single entity",
+		.job_base_us = 1000,
+		.num_jobs = 1,
+		.num_subs = 32,
		.flags = INTERLEAVE_SUBMIT_AND_COMPLETE,
+	},

In the submit loop:

+	for (i = 0; i < params->num_jobs; i++) {
+		duration_us = params->job_base_us + (sub_data->id * 10);
		if (flags & INTERLEAVE_SUBMIT_AND_COMPLETE) {
			drm_mock_sched_job_set_duration_us(sub_data->jobs[i], duration_us);
			// Add a delay based on time elapse and job duration to guarantee job 
completions start arriving
		}
+		drm_mock_sched_job_submit(sub_data->jobs[i]);
+	}


And of course handle the job waiting stage appropriately depending on 
auto-complete or not.

Anyway, testing as little as possible at a time is a best practice I 
recommend, but if you insist, there is also nothing fundamentally wrong 
with the test as you have it so I won't block it.

Regards,

Tvrtko

Re: [RFC PATCH] drm/sched: Add new KUnit test suite for concurrent job submission

Posted by Marco Pagani 23 hours ago


On 05/02/2026 10:53, Tvrtko Ursulin wrote:
> 
> On 04/02/2026 16:33, Marco Pagani wrote:
> 
> 8><
> 
>>>>>> +	{
>>>>>> +		.description = "Concurrently submit multiple jobs in a single entity",
>>>>>> +		.job_base_us = 1000,
>>>>>> +		.num_jobs = 10,
>>>>>> +		.num_subs = 64,
>>>>>> +	},
>>>>>> +};
>>>>>> +
>>>>>> +static void
>>>>>> +drm_sched_concurrent_desc(const struct drm_sched_concurrent_params *params, char *desc)
>>>>>> +{
>>>>>> +	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
>>>>>> +}
>>>>>> +
>>>>>> +KUNIT_ARRAY_PARAM(drm_sched_concurrent, drm_sched_concurrent_cases, drm_sched_concurrent_desc);
>>>>>> +
>>>>>> +struct submitter_data {
>>>>>> +	struct work_struct work;
>>>>>> +	struct sched_concurrent_test_context *ctx;
>>>>>> +	struct drm_mock_sched_entity *entity;
>>>>>> +	struct drm_mock_sched_job **jobs;
>>>>>> +	struct kunit *test;
>>>>>> +	unsigned int id;
>>>>>> +	bool timedout;
>>>>>> +};
>>>>>> +
>>>>>> +static void drm_sched_submitter_worker(struct work_struct *work)
>>>>>> +{
>>>>>> +	const struct drm_sched_concurrent_params *params;
>>>>>> +	struct sched_concurrent_test_context *ctx;
>>>>>> +	struct submitter_data *sub_data;
>>>>>> +	unsigned int i, duration_us;
>>>>>> +	unsigned long timeout_jiffies;
>>>>>> +	bool done;
>>>>>> +
>>>>>> +	sub_data = container_of(work, struct submitter_data, work);
>>>>>> +	ctx = sub_data->ctx;
>>>>>> +	params = sub_data->test->param_value;
>>>>>> +
>>>>>> +	wait_for_completion(&ctx->wait_go);
>>>>>> +
>>>>>> +	for (i = 0; i < params->num_jobs; i++) {
>>>>>> +		duration_us = params->job_base_us + (sub_data->id * 10);
>>>>>
>>>>> Why is job duration dependent by the submitter id?
>>>>
>>>> Just a simple way to have a deterministic distribution of durations.
>>>> I can change it if it doesn't fit.
>>>>
>>>>> Would it be feasiable to not use auto-completing jobs and instead
>>>>> advance the timeline manually? Given how the premise of the test seems
>>>>> to be about concurrent submission it sounds plausible that what happens
>>>>> after submission maybe isn't very relevant.
>>>>
>>>> Good idea! I'll run some experiments and see if it works.
>>>
>>> Cool, I will await your findings in v2. :)
>>
>>
>> After fiddling a bit with the code, I came to the conclusion that
>> changing the design to use manual timeline advancement is doable, but
>> not beneficial, since it would require serializing job submission into
>> "batches" using a two-step process, i.e., (i) workers submit jobs, and
>> (ii) the main thread waits for all submissions, advances the timeline,
>> and then releases the workers for the next iteration.
> 
> What do you mean by next iteration?
> 
> In the patch you have each worker submit all jobs in one go.

I mean, if I change the code to use manual timeline advancement, then I
must introduce some synchronization logic that makes the main thread
advance the timeline only after the workers have submitted their jobs.
Since workers submit multiple jobs, I was thinking it would be better to
have the workers submit jobs in batches instead of all in one go.

>> This approach would partially defeat the purpose of a concurrency test
>> as it would not allow job submission (KUnit process context) to overlap
>> with job completion (hrtimer callback interrupt context) that models
>> asynchronous hardware in the mock scheduler, limiting contention on the
>> DRM scheduler internal locking. Moreover, while manual advancement might
>> appear to make the test deterministic, it does not since the order in
>> which the workers are scheduled will still be non-deterministic.
> 
> Ah, so it depends what is the test actually wanting to test. In my view 
> exercising parallel submit is one thing, while interleaving submission 
> with completion is something else.
> 
> In the test as written I think there is very little of the latter. Each 
> worker submits all their jobs in one tight loop. Jobs you set to be 1ms 
> so first job completion is 1ms away from when workers are released. A 
> lot of the jobs can be submitted in 1ms and it would be interesting to 
> see exactly how much, from how many workers.
> 
> If desire is to interleave completion and submission I think that should 
> be made more explicit (less depending on how fast is the underlying 
> machine). For example adding delays into the submit loop to actually 
> guarantee that.

Fair point.

> But I would also recommend parallel submit and parallel submit vs 
> completions are tested in separate test cases. It should be easy to do 
> with some flags and almost no new code. I was suggesting flags for some 
> other thing in the initial review as well. Right, for auto-complete. So 
> flag could be something like:
> 
> +static const struct drm_sched_concurrent_params 
> drm_sched_concurrent_cases[] = {
> +	{
> +		.description = "Concurrently submit a single job in a single entity",
> +		.job_base_us = 1000,
> +		.num_jobs = 1,
> +		.num_subs = 32,
> 		.flags = INTERLEAVE_SUBMIT_AND_COMPLETE,
> +	},
> 
> In the submit loop:
> 
> +	for (i = 0; i < params->num_jobs; i++) {
> +		duration_us = params->job_base_us + (sub_data->id * 10);
> 		if (flags & INTERLEAVE_SUBMIT_AND_COMPLETE) {
> 			drm_mock_sched_job_set_duration_us(sub_data->jobs[i], duration_us);
> 			// Add a delay based on time elapse and job duration to guarantee job 
> completions start arriving
> 		}
> +		drm_mock_sched_job_submit(sub_data->jobs[i]);
> +	}
> 
> 
> And of course handle the job waiting stage appropriately depending on 
> auto-complete or not.
> 
> Anyway, testing as little as possible at a time is a best practice I 
> recommend, but if you insist, there is also nothing fundamentally wrong 
> with the test as you have it so I won't block it.

Agreed. Unit tests should test one functionality at a time and be clear
about which one. I'll follow your suggestions and have two separate test
cases: a basic one for concurrent submissions with manual timeline
advancement (no batches, workers submit all jobs at once) and then a
second one with automatic timeline advancement for testing interleaving
submissions with completions.

At this point though, I think it would be better to move the tests to a
separate source file, as the partial similarity of the first concurrent
test case with drm_sched_basic_submit could create some confusion.

Thanks,
Marco

Re: [RFC PATCH] drm/sched: Add new KUnit test suite for concurrent job submission

Posted by Tvrtko Ursulin 21 hours ago

On 06/02/2026 10:36, Marco Pagani wrote:
> 
> 
> On 05/02/2026 10:53, Tvrtko Ursulin wrote:
>>
>> On 04/02/2026 16:33, Marco Pagani wrote:
>>
>> 8><
>>
>>>>>>> +	{
>>>>>>> +		.description = "Concurrently submit multiple jobs in a single entity",
>>>>>>> +		.job_base_us = 1000,
>>>>>>> +		.num_jobs = 10,
>>>>>>> +		.num_subs = 64,
>>>>>>> +	},
>>>>>>> +};
>>>>>>> +
>>>>>>> +static void
>>>>>>> +drm_sched_concurrent_desc(const struct drm_sched_concurrent_params *params, char *desc)
>>>>>>> +{
>>>>>>> +	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
>>>>>>> +}
>>>>>>> +
>>>>>>> +KUNIT_ARRAY_PARAM(drm_sched_concurrent, drm_sched_concurrent_cases, drm_sched_concurrent_desc);
>>>>>>> +
>>>>>>> +struct submitter_data {
>>>>>>> +	struct work_struct work;
>>>>>>> +	struct sched_concurrent_test_context *ctx;
>>>>>>> +	struct drm_mock_sched_entity *entity;
>>>>>>> +	struct drm_mock_sched_job **jobs;
>>>>>>> +	struct kunit *test;
>>>>>>> +	unsigned int id;
>>>>>>> +	bool timedout;
>>>>>>> +};
>>>>>>> +
>>>>>>> +static void drm_sched_submitter_worker(struct work_struct *work)
>>>>>>> +{
>>>>>>> +	const struct drm_sched_concurrent_params *params;
>>>>>>> +	struct sched_concurrent_test_context *ctx;
>>>>>>> +	struct submitter_data *sub_data;
>>>>>>> +	unsigned int i, duration_us;
>>>>>>> +	unsigned long timeout_jiffies;
>>>>>>> +	bool done;
>>>>>>> +
>>>>>>> +	sub_data = container_of(work, struct submitter_data, work);
>>>>>>> +	ctx = sub_data->ctx;
>>>>>>> +	params = sub_data->test->param_value;
>>>>>>> +
>>>>>>> +	wait_for_completion(&ctx->wait_go);
>>>>>>> +
>>>>>>> +	for (i = 0; i < params->num_jobs; i++) {
>>>>>>> +		duration_us = params->job_base_us + (sub_data->id * 10);
>>>>>>
>>>>>> Why is job duration dependent by the submitter id?
>>>>>
>>>>> Just a simple way to have a deterministic distribution of durations.
>>>>> I can change it if it doesn't fit.
>>>>>
>>>>>> Would it be feasiable to not use auto-completing jobs and instead
>>>>>> advance the timeline manually? Given how the premise of the test seems
>>>>>> to be about concurrent submission it sounds plausible that what happens
>>>>>> after submission maybe isn't very relevant.
>>>>>
>>>>> Good idea! I'll run some experiments and see if it works.
>>>>
>>>> Cool, I will await your findings in v2. :)
>>>
>>>
>>> After fiddling a bit with the code, I came to the conclusion that
>>> changing the design to use manual timeline advancement is doable, but
>>> not beneficial, since it would require serializing job submission into
>>> "batches" using a two-step process, i.e., (i) workers submit jobs, and
>>> (ii) the main thread waits for all submissions, advances the timeline,
>>> and then releases the workers for the next iteration.
>>
>> What do you mean by next iteration?
>>
>> In the patch you have each worker submit all jobs in one go.
> 
> I mean, if I change the code to use manual timeline advancement, then I
> must introduce some synchronization logic that makes the main thread
> advance the timeline only after the workers have submitted their jobs.

Oh that, I was thinking advancing after flushing the workqueue would be 
enough for this use case. Since that one does not care about when 
completions happens they can just be cleaned up at the exit.

> Since workers submit multiple jobs, I was thinking it would be better to
> have the workers submit jobs in batches instead of all in one go.

No strong opinion from me, as long as it is clear what is being tested.

>>> This approach would partially defeat the purpose of a concurrency test
>>> as it would not allow job submission (KUnit process context) to overlap
>>> with job completion (hrtimer callback interrupt context) that models
>>> asynchronous hardware in the mock scheduler, limiting contention on the
>>> DRM scheduler internal locking. Moreover, while manual advancement might
>>> appear to make the test deterministic, it does not since the order in
>>> which the workers are scheduled will still be non-deterministic.
>>
>> Ah, so it depends what is the test actually wanting to test. In my view
>> exercising parallel submit is one thing, while interleaving submission
>> with completion is something else.
>>
>> In the test as written I think there is very little of the latter. Each
>> worker submits all their jobs in one tight loop. Jobs you set to be 1ms
>> so first job completion is 1ms away from when workers are released. A
>> lot of the jobs can be submitted in 1ms and it would be interesting to
>> see exactly how much, from how many workers.
>>
>> If desire is to interleave completion and submission I think that should
>> be made more explicit (less depending on how fast is the underlying
>> machine). For example adding delays into the submit loop to actually
>> guarantee that.
> 
> Fair point.
> 
>> But I would also recommend parallel submit and parallel submit vs
>> completions are tested in separate test cases. It should be easy to do
>> with some flags and almost no new code. I was suggesting flags for some
>> other thing in the initial review as well. Right, for auto-complete. So
>> flag could be something like:
>>
>> +static const struct drm_sched_concurrent_params
>> drm_sched_concurrent_cases[] = {
>> +	{
>> +		.description = "Concurrently submit a single job in a single entity",
>> +		.job_base_us = 1000,
>> +		.num_jobs = 1,
>> +		.num_subs = 32,
>> 		.flags = INTERLEAVE_SUBMIT_AND_COMPLETE,
>> +	},
>>
>> In the submit loop:
>>
>> +	for (i = 0; i < params->num_jobs; i++) {
>> +		duration_us = params->job_base_us + (sub_data->id * 10);
>> 		if (flags & INTERLEAVE_SUBMIT_AND_COMPLETE) {
>> 			drm_mock_sched_job_set_duration_us(sub_data->jobs[i], duration_us);
>> 			// Add a delay based on time elapse and job duration to guarantee job
>> completions start arriving
>> 		}
>> +		drm_mock_sched_job_submit(sub_data->jobs[i]);
>> +	}
>>
>>
>> And of course handle the job waiting stage appropriately depending on
>> auto-complete or not.
>>
>> Anyway, testing as little as possible at a time is a best practice I
>> recommend, but if you insist, there is also nothing fundamentally wrong
>> with the test as you have it so I won't block it.
> 
> Agreed. Unit tests should test one functionality at a time and be clear
> about which one. I'll follow your suggestions and have two separate test
> cases: a basic one for concurrent submissions with manual timeline
> advancement (no batches, workers submit all jobs at once) and then a
> second one with automatic timeline advancement for testing interleaving
> submissions with completions.
> 
> At this point though, I think it would be better to move the tests to a
> separate source file, as the partial similarity of the first concurrent
> test case with drm_sched_basic_submit could create some confusion.

Works for me, thank you!

Regards,

Tvrtko

Re: [RFC PATCH] drm/sched: Add new KUnit test suite for concurrent job submission

Posted by Philipp Stanner 1 week, 2 days ago

On Wed, 2026-01-28 at 12:39 +0100, Marco Pagani wrote:
> 
> On 22/01/2026 10:51, Tvrtko Ursulin wrote:
> > 
> > On 20/01/2026 20:52, Marco Pagani wrote:
> > > Add a new test suite to simulate concurrent job submissions from userspace.
> > > The suite includes a basic test case where each worker submits a single
> > > job, and a more advanced case involving the submission of multiple jobs.
> > 
> > New test coverage is welcome!
> 
> Hi Tvrtko, Philip, and thank you.
> 
> > But as Philipp has said some more context would be beneficial. Like are 
> > you trying to hit a bug, or extend later with something which will hit a 
> > bug and then you will propose improvements? Or simply improving the
> > coverage?
> 
> Sure, I'll extend the commit message to be more descriptive in the next
> version.
>  
> > If it is about some race I would maybe consider putting this into a new 
> > tests_races.c. I actually have this file locally and some unfinished 
> > test cases already, although it is unclear when I will be happy with 
> > them to post. But if the test is simply about adding coverage it is fine 
> > to live in tests_basic.c.
> 
> The general idea is to extend the suite with some initial tests that stress
> concurrency to spot race conditions. Having these initial tests grouped together
> with future ones in a new tests_races.c file makes perfect sense to me.


I am not so convinced of a separate file.

All the drm_sched tests are there to ensure the soundness and
robustness of the scheduler. How is a race special? What would
tests_basic.c be for – checking for bugs that are not races? And
races.c would be full of tests that are threaded?
(questions more directed at Tvrtko)

From my POV having this little test in tests_basic.c is perfectly fine.
Having multiple entities per scheduler, associated with multiple task,
*is* basic functionality of the scheduler.


> 
> > > Signed-off-by: Marco Pagani <marpagan@redhat.com>
> > > ---
> > >   drivers/gpu/drm/scheduler/tests/tests_basic.c | 175 ++++++++++++++++++
> > >   1 file changed, 175 insertions(+)
> > > 
> > > diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c
> > > index 82a41a456b0a..7c25bcbbe7c9 100644
> > > --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c
> > > +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c
> > > @@ -2,6 +2,7 @@
> > >   /* Copyright (c) 2025 Valve Corporation */
> > >   
> > >   #include <linux/delay.h>
> > > +#include <linux/completion.h>
> > >   
> > >   #include "sched_tests.h"
> > >   
> > > @@ -235,6 +236,179 @@ static void drm_sched_basic_cancel(struct kunit *test)
> > >   	KUNIT_ASSERT_EQ(test, job->hw_fence.error, -ECANCELED);
> > >   }
> > >   
> > > +struct sched_concurrent_test_context {
> > > +	struct drm_mock_scheduler *sched;
> > > +	struct workqueue_struct *sub_wq;
> > > +	struct completion wait_go;
> > > +};
> > > +
> > > +KUNIT_DEFINE_ACTION_WRAPPER(destroy_workqueue_wrap, destroy_workqueue,
> > > +			    struct workqueue_struct *);
> > > +
> > > +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_fini_wrap, drm_mock_sched_fini,
> > > +			    struct drm_mock_scheduler *);
> > > +
> > > +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_entity_free_wrap, drm_mock_sched_entity_free,
> > > +			    struct drm_mock_sched_entity *);
> > > +
> > > +static int drm_sched_concurrent_init(struct kunit *test)
> > > +{
> > > +	struct sched_concurrent_test_context *ctx;
> > > +	int ret;
> > > +
> > > +	ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
> > > +
> > > +	init_completion(&ctx->wait_go);
> > > +
> > > +	ctx->sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT);
> > > +
> > > +	ret = kunit_add_action_or_reset(test, drm_mock_sched_fini_wrap, ctx->sched);
> > > +	KUNIT_ASSERT_EQ(test, ret, 0);
> > > +
> > > +	/* Use an unbounded workqueue to maximize job submission concurrency */
> > > +	ctx->sub_wq = alloc_workqueue("drm-sched-submitters-wq", WQ_UNBOUND,
> > > +				      WQ_UNBOUND_MAX_ACTIVE);
> > > +	KUNIT_ASSERT_NOT_NULL(test, ctx->sub_wq);
> > > +
> > > +	ret = kunit_add_action_or_reset(test, destroy_workqueue_wrap, ctx->sub_wq);
> > > +	KUNIT_ASSERT_EQ(test, ret, 0);
> > > +
> > > +	test->priv = ctx;
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +struct drm_sched_concurrent_params {
> > > +	const char *description;
> > > +	unsigned int job_base_us;
> > > +	unsigned int num_jobs;
> > > +	unsigned int num_subs;
> > > +};
> > > +
> > > +static const struct drm_sched_concurrent_params drm_sched_concurrent_cases[] = {
> > > +	{
> > > +		.description = "Concurrently submit a single job in a single entity",
> > > +		.job_base_us = 1000,
> > > +		.num_jobs = 1,
> > > +		.num_subs = 32,
> > > +	},
> > 
> > Why is submission from a single thread interesting if it is already covered?
> 
> These two initial parameter sets cover only concurrent submission:
> multiple submitters, single job / multiple submitters, multiple jobs.
> 
> > > +	{
> > > +		.description = "Concurrently submit multiple jobs in a single entity",
> > > +		.job_base_us = 1000,
> > > +		.num_jobs = 10,
> > > +		.num_subs = 64,
> > > +	},
> > > +};
> > > +
> > > +static void
> > > +drm_sched_concurrent_desc(const struct drm_sched_concurrent_params *params, char *desc)
> > > +{
> > > +	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
> > > +}
> > > +
> > > +KUNIT_ARRAY_PARAM(drm_sched_concurrent, drm_sched_concurrent_cases, drm_sched_concurrent_desc);
> > > +
> > > +struct submitter_data {
> > > +	struct work_struct work;
> > > +	struct sched_concurrent_test_context *ctx;
> > > +	struct drm_mock_sched_entity *entity;
> > > +	struct drm_mock_sched_job **jobs;
> > > +	struct kunit *test;
> > > +	unsigned int id;
> > > +	bool timedout;
> > > +};
> > > +
> > > +static void drm_sched_submitter_worker(struct work_struct *work)
> > > +{
> > > +	const struct drm_sched_concurrent_params *params;
> > > +	struct sched_concurrent_test_context *ctx;
> > > +	struct submitter_data *sub_data;
> > > +	unsigned int i, duration_us;
> > > +	unsigned long timeout_jiffies;
> > > +	bool done;
> > > +
> > > +	sub_data = container_of(work, struct submitter_data, work);
> > > +	ctx = sub_data->ctx;
> > > +	params = sub_data->test->param_value;
> > > +
> > > +	wait_for_completion(&ctx->wait_go);
> > > +
> > > +	for (i = 0; i < params->num_jobs; i++) {
> > > +		duration_us = params->job_base_us + (sub_data->id * 10);
> > 
> > Why is job duration dependent by the submitter id?
> 
> Just a simple way to have a deterministic distribution of durations.
> I can change it if it doesn't fit.
> 
> > Would it be feasiable to not use auto-completing jobs and instead 
> > advance the timeline manually? Given how the premise of the test seems 
> > to be about concurrent submission it sounds plausible that what happens 
> > after submission maybe isn't very relevant.
> 
> Good idea! I'll run some experiments and see if it works.
> 
> > > +		drm_mock_sched_job_set_duration_us(sub_data->jobs[i], duration_us);
> > > +		drm_mock_sched_job_submit(sub_data->jobs[i]);
> > 
> > On a related note, one interesting thing to add coverage for later is 
> > multi-threaded submit of multiple jobs against a single entity. But it 
> > is not an immediate need. Just mentioning it as something interesting.
> 
> Currently, the test configures each submitter to submit multiple jobs
> against its own dedicated entity. I considered adding a test case for
> submitting multiple jobs against multiple entities, but I decided to
> leave it for the future.
> 
> > It would mean open coding drm_mock_sched_job_submit() as 
> > drm_sched_job_arm() and drm_sched_entity_push_job() and sticking some 
> > delay in between so two threads have the chance to interleave. Mock
> > scheduler does not handle it today, neither does the scheduler itself 
> > who punts responsibility to callers. So adding a test and making the 
> > mock scheduler handle that properly would serve as an example on how 
> > scheduler must be used. Or what can go bad if it isn't.
> 
> Do you mean having multiple (k)threads submitting against the same entity?
> Would that be used to model a multithread application that uses multiple queues?

Submitters to entities must always ensure their own synchronization
before pushing to it. And afterwards it's the scheduler's parallelism
that counts.
I don't see how multiple threads on an entity are worthwhile testing.
It's beyond our scope.


Thx.
P.


> 
> > > +	}
> > > +
> > > +	timeout_jiffies = usecs_to_jiffies(params->job_base_us * params->num_subs *
> > > +					   params->num_jobs * 10);
> > 
> > The timeout calculation could use a comment. You are using num_subs * 10 
> > to match the duratiot_us above being id * 10? With logic of calculating 
> > a pessimistic timeout?
> > 
> > Have you tried it with qemu to check if it is pessimistic enough?
> 
> I'll double check on that.
>  
> > > +	for (i = 0; i < params->num_jobs; i++) {
> > > +		done = drm_mock_sched_job_wait_finished(sub_data->jobs[i],
> > > +							timeout_jiffies);
> > > +		if (!done)
> > > +			sub_data->timedout = true;
> > > +	}
> > 
> > Technically you only need to wait on the last job but it is passable 
> > like this too.
> > 
> > Also, is it important for the worker to wait for completion or the main 
> > thread could simply wait for everything? Maybe that would simplify things.
> 
> I would say they serve different purposes. The completion is used to pause
> all worker threads until they are all created to ensure they start submitting
> jobs together to maximize concurrency.
>  
> > Manual timeline advance and this combined would mean the workers only 
> > submit jobs, while the main thread simply does 
> > drm_mock_sched_advance(sched, num_subs * num_jobs) and waits for last 
> > job from each submitter to finish.
> > 
> > Again, auto-completion and timeout reporting is something I do not 
> > immediate see is relevant for multi-threaded submission testing.
> > 
> > Maybe if you want to test the mock scheduler itself it could be, but 
> > then I would add it as separate entry in drm_sched_concurrent_cases[]. 
> > Like maybe have a flag/boolean "auto-complete jobs". So one without and 
> > one with that set.
> 
> I think it's a good idea and I'll experiment to see if it works.
>  
> > Other than that it looks tidy and was easy to follow. Only thing which 
> > slightly got me was the term "subs" since I don't intuitively associate 
> > it with a submitter but, well, a sub entity of some kind. Might be worth 
> > renaming that to submitter(s), or even dropping the prefix in some cases 
> > might be feasible (like sub(s)_data).
> 
> Agreed. I'll rename "subs" for better clarity.
> 
> > Regards,
> > 
> > Tvrtko
> > 
> 
> Cheers,
> Marco

Re: [RFC PATCH] drm/sched: Add new KUnit test suite for concurrent job submission

Posted by Philipp Stanner 2 weeks, 2 days ago

On Tue, 2026-01-20 at 21:52 +0100, Marco Pagani wrote:
> Add a new test suite to simulate concurrent job submissions from userspace.
> The suite includes a basic test case where each worker submits a single
> job, and a more advanced case involving the submission of multiple jobs.

Hi & thx for the patch!

I think the commit message could detail a bit the motivation behind
this test, which seems to be that we currently don't really test real
threaded submission.

The general idea seems desirable to me. Since this was sent as an RFC,
I won't do a detailed review yet.

I would, however, like Tvrtko (+Cc) to take a brief look if he's got
the time to see whether he also agrees with the general idea behind the
patch.


Thx
Philipp

> 
> Signed-off-by: Marco Pagani <marpagan@redhat.com>
> ---
>  drivers/gpu/drm/scheduler/tests/tests_basic.c | 175 ++++++++++++++++++
>  1 file changed, 175 insertions(+)
> 
> diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c
> index 82a41a456b0a..7c25bcbbe7c9 100644
> --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c
> +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c
> @@ -2,6 +2,7 @@
>  /* Copyright (c) 2025 Valve Corporation */
>  
>  #include <linux/delay.h>
> +#include <linux/completion.h>
>  
>  #include "sched_tests.h"
>  
> @@ -235,6 +236,179 @@ static void drm_sched_basic_cancel(struct kunit *test)
>  	KUNIT_ASSERT_EQ(test, job->hw_fence.error, -ECANCELED);
>  }
>  
> +struct sched_concurrent_test_context {
> +	struct drm_mock_scheduler *sched;
> +	struct workqueue_struct *sub_wq;
> +	struct completion wait_go;
> +};
> +
> +KUNIT_DEFINE_ACTION_WRAPPER(destroy_workqueue_wrap, destroy_workqueue,
> +			    struct workqueue_struct *);
> +
> +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_fini_wrap, drm_mock_sched_fini,
> +			    struct drm_mock_scheduler *);
> +
> +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_entity_free_wrap, drm_mock_sched_entity_free,
> +			    struct drm_mock_sched_entity *);
> +
> +static int drm_sched_concurrent_init(struct kunit *test)
> +{
> +	struct sched_concurrent_test_context *ctx;
> +	int ret;
> +
> +	ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
> +
> +	init_completion(&ctx->wait_go);
> +
> +	ctx->sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT);
> +
> +	ret = kunit_add_action_or_reset(test, drm_mock_sched_fini_wrap, ctx->sched);
> +	KUNIT_ASSERT_EQ(test, ret, 0);
> +
> +	/* Use an unbounded workqueue to maximize job submission concurrency */
> +	ctx->sub_wq = alloc_workqueue("drm-sched-submitters-wq", WQ_UNBOUND,
> +				      WQ_UNBOUND_MAX_ACTIVE);
> +	KUNIT_ASSERT_NOT_NULL(test, ctx->sub_wq);
> +
> +	ret = kunit_add_action_or_reset(test, destroy_workqueue_wrap, ctx->sub_wq);
> +	KUNIT_ASSERT_EQ(test, ret, 0);
> +
> +	test->priv = ctx;
> +
> +	return 0;
> +}
> +
> +struct drm_sched_concurrent_params {
> +	const char *description;
> +	unsigned int job_base_us;
> +	unsigned int num_jobs;
> +	unsigned int num_subs;
> +};
> +
> +static const struct drm_sched_concurrent_params drm_sched_concurrent_cases[] = {
> +	{
> +		.description = "Concurrently submit a single job in a single entity",
> +		.job_base_us = 1000,
> +		.num_jobs = 1,
> +		.num_subs = 32,
> +	},
> +	{
> +		.description = "Concurrently submit multiple jobs in a single entity",
> +		.job_base_us = 1000,
> +		.num_jobs = 10,
> +		.num_subs = 64,
> +	},
> +};
> +
> +static void
> +drm_sched_concurrent_desc(const struct drm_sched_concurrent_params *params, char *desc)
> +{
> +	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
> +}
> +
> +KUNIT_ARRAY_PARAM(drm_sched_concurrent, drm_sched_concurrent_cases, drm_sched_concurrent_desc);
> +
> +struct submitter_data {
> +	struct work_struct work;
> +	struct sched_concurrent_test_context *ctx;
> +	struct drm_mock_sched_entity *entity;
> +	struct drm_mock_sched_job **jobs;
> +	struct kunit *test;
> +	unsigned int id;
> +	bool timedout;
> +};
> +
> +static void drm_sched_submitter_worker(struct work_struct *work)
> +{
> +	const struct drm_sched_concurrent_params *params;
> +	struct sched_concurrent_test_context *ctx;
> +	struct submitter_data *sub_data;
> +	unsigned int i, duration_us;
> +	unsigned long timeout_jiffies;
> +	bool done;
> +
> +	sub_data = container_of(work, struct submitter_data, work);
> +	ctx = sub_data->ctx;
> +	params = sub_data->test->param_value;
> +
> +	wait_for_completion(&ctx->wait_go);
> +
> +	for (i = 0; i < params->num_jobs; i++) {
> +		duration_us = params->job_base_us + (sub_data->id * 10);
> +		drm_mock_sched_job_set_duration_us(sub_data->jobs[i], duration_us);
> +		drm_mock_sched_job_submit(sub_data->jobs[i]);
> +	}
> +
> +	timeout_jiffies = usecs_to_jiffies(params->job_base_us * params->num_subs *
> +					   params->num_jobs * 10);
> +	for (i = 0; i < params->num_jobs; i++) {
> +		done = drm_mock_sched_job_wait_finished(sub_data->jobs[i],
> +							timeout_jiffies);
> +		if (!done)
> +			sub_data->timedout = true;
> +	}
> +}
> +
> +static void drm_sched_concurrent_submit_test(struct kunit *test)
> +{
> +	struct sched_concurrent_test_context *ctx = test->priv;
> +	const struct drm_sched_concurrent_params *params = test->param_value;
> +	struct submitter_data *subs_data;
> +	unsigned int i, j;
> +	int ret;
> +
> +	subs_data = kunit_kcalloc(test, params->num_subs, sizeof(*subs_data),
> +				  GFP_KERNEL);
> +	KUNIT_ASSERT_NOT_NULL(test, subs_data);
> +
> +	/*
> +	 * Pre-allocate entities and jobs in the main thread to avoid KUnit
> +	 * assertions in submitters threads
> +	 */
> +	for (i = 0; i < params->num_subs; i++) {
> +		subs_data[i].id = i;
> +		subs_data[i].ctx = ctx;
> +		subs_data[i].test = test;
> +		subs_data[i].timedout = false;
> +		subs_data[i].entity = drm_mock_sched_entity_new(test,
> +								DRM_SCHED_PRIORITY_NORMAL,
> +								ctx->sched);
> +
> +		ret = kunit_add_action_or_reset(test, drm_mock_sched_entity_free_wrap,
> +						subs_data[i].entity);
> +		KUNIT_ASSERT_EQ(test, ret, 0);
> +
> +		subs_data[i].jobs = kunit_kcalloc(test, params->num_jobs,
> +						  sizeof(*subs_data[i].jobs), GFP_KERNEL);
> +		KUNIT_ASSERT_NOT_NULL(test, subs_data[i].jobs);
> +
> +		for (j = 0; j < params->num_jobs; j++)
> +			subs_data[i].jobs[j] = drm_mock_sched_job_new(test,
> +								      subs_data[i].entity);
> +
> +		INIT_WORK(&subs_data[i].work, drm_sched_submitter_worker);
> +		queue_work(ctx->sub_wq, &subs_data[i].work);
> +	}
> +
> +	complete_all(&ctx->wait_go);
> +	flush_workqueue(ctx->sub_wq);
> +
> +	for (i = 0; i < params->num_subs; i++)
> +		KUNIT_ASSERT_FALSE_MSG(test, subs_data[i].timedout,
> +				       "Job submitter worker %u timedout", i);
> +}
> +
> +static struct kunit_case drm_sched_concurrent_tests[] = {
> +	KUNIT_CASE_PARAM(drm_sched_concurrent_submit_test, drm_sched_concurrent_gen_params),
> +	{}
> +};
> +
> +static struct kunit_suite drm_sched_concurrent = {
> +	.name = "drm_sched_concurrent_tests",
> +	.init = drm_sched_concurrent_init,
> +	.test_cases = drm_sched_concurrent_tests,
> +};
> +
>  static struct kunit_case drm_sched_cancel_tests[] = {
>  	KUNIT_CASE(drm_sched_basic_cancel),
>  	{}
> @@ -556,6 +730,7 @@ static struct kunit_suite drm_sched_credits = {
>  };
>  
>  kunit_test_suites(&drm_sched_basic,
> +		  &drm_sched_concurrent,
>  		  &drm_sched_timeout,
>  		  &drm_sched_cancel,
>  		  &drm_sched_priority,