From nobody Tue Apr  8 13:58:58 2025
Received: from mx10.gouders.net (mx10.gouders.net [202.61.206.94])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 831261CD214;
	Sat,  5 Apr 2025 12:02:06 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=202.61.206.94
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1743854528; cv=none;
 b=fvlrB82q9j6NWwuZrTKNGKZ/ABWJz4tSlj7Yv/OtkZNEbw1gZa1vaJ38z5+qkouyh0hWY5650COLLIkMOusddwZKtTHsepktdz8o1SBFo3FKDqikOze3tTaicA019YmpWYey/HnKa4OO8Pw1QgV0aDQmRbbB0OhBIpJ8rIvFVSE=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1743854528; c=relaxed/simple;
	bh=tC5zZLuRjOquPQ7CBdWM39ShrTUkUI+qm6YsiAZs2ig=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=sqmEYdBTXtP9bdT8kR9FiOKBqYNd9ZdnPKtRKg2mGgu3x2Er0CArp68xYubMk6sknKzKoDz6gX8+tMVDDPOItsSBd/uohrVBTLilkX9Ddkms81wS9EJSEIxliIGRcSE2qMFFkMTNrLJcUcGFZ9lkmkEvnLByhUFGEWcV/RBrWhQ=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=none (p=none dis=none) header.from=gouders.net;
 spf=pass smtp.mailfrom=gouders.net;
 dkim=pass (1024-bit key) header.d=gouders.net header.i=@gouders.net
 header.b=kjQVC3F7; arc=none smtp.client-ip=202.61.206.94
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=none (p=none dis=none) header.from=gouders.net
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=gouders.net
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (1024-bit key) header.d=gouders.net header.i=@gouders.net
 header.b="kjQVC3F7"
Received: from localhost (ip-109-42-179-132.web.vodafone.de [109.42.179.132])
	(authenticated bits=0)
	by mx10.gouders.net (8.17.1.9/8.17.1.9) with ESMTPSA id 535C1Rp8022442
	(version=TLSv1.3 cipher=TLS_AES_256_GCM_SHA384 bits=256 verify=NO);
	Sat, 5 Apr 2025 14:01:28 +0200
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=gouders.net; s=gnet;
	t=1743854488; bh=tC5zZLuRjOquPQ7CBdWM39ShrTUkUI+qm6YsiAZs2ig=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References;
	b=kjQVC3F72BRrGPOk5dXsBlR6xxb6rR/zxfLjCP1mG7kcDKTtVBRduBsLG4IzKguvn
	 Mc/A2ePTesrx4Tc7T/2YGfH89iTVI0EZrAnUfyejm93vTQLOvxWQl3Lx//hpTt2WAp
	 +KtyzYLXNVyBskGbBz5M0NATehOGJY8a8dDSBKNw=
From: Dirk Gouders <dirk@gouders.net>
To: Namhyung Kim <namhyung@kernel.org>,
        Arnaldo Carvalho de Melo <acme@kernel.org>,
        Ingo Molnar <mingo@redhat.com>, Peter Zijlstra <peterz@infradead.org>
Cc: Dirk Gouders <dirk@gouders.net>, Ian Rogers <irogers@google.com>,
        Adrian Hunter <adrian.hunter@intel.com>,
        LKML <linux-kernel@vger.kernel.org>, linux-perf-users@vger.kernel.org
Subject: [PATCH v2 1/3] perf bench sched pipe: add -p/--nprocs to run more
 than 2 workers
Date: Sat,  5 Apr 2025 14:00:06 +0200
Message-ID: <20250405120039.15953-2-dirk@gouders.net>
X-Mailer: git-send-email 2.45.3
In-Reply-To: <20250405120039.15953-1-dirk@gouders.net>
References: <20250402212402.15658-2-dirk@gouders.net>
 <20250405120039.15953-1-dirk@gouders.net>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

Partly, sched-pipe.c looks as if it was designed to run more than two
workers, but that possibility is still missing.

Introduce a new option -p/--nprocs to specify the number of
processes/threads to run the worker function.
The worker function remains to simulate a ring structure, analogous
to lmbench.

Working with cgroups (-G) still limits the number of cgroups to 2.
The processes with an even index go into the first cgroup specified,
the ones with an odd index go into the second one.

Signed-off-by: Dirk Gouders <dirk@gouders.net>
---
 tools/perf/Documentation/perf-bench.txt |  27 +++-
 tools/perf/bench/sched-pipe.c           | 156 +++++++++++++++---------
 2 files changed, 121 insertions(+), 62 deletions(-)

diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documenta=
tion/perf-bench.txt
index 8331bd28b10e..8a651f2fe3aa 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -118,6 +118,16 @@ options (20 sender and receiver processes per group)
 Suite for pipe() system call.
 Based on pipe-test-1m.c by Ingo Molnar.
=20
+Create a number of processes (default is 2) and a pipe for each of
+them.  Then, send tokens using the pipe ends for a specified number of
+loops (default 1,000,000).
+
+By default, theses Processes send tokens of length 4 (an int) by
+simulating a ring structure which means each process has two peers.
+It sends the tokens to one of its peers and receives them from its other
+peer (in case of 2 processes those peers are identical and just the other
+process).
+
 Options of *pipe*
 ^^^^^^^^^^^^^^^^^
 -l::
@@ -126,11 +136,24 @@ Specify number of loops.
=20
 -G::
 --cgroups=3D::
-Names of cgroups for sender and receiver, separated by a comma.
+Names of two cgroups separated by a comma.
++
+The worker processes will be added to the cgroups in an alternating
+fashion: processes with an even index go into the first one, those
+with an odd index into the second one.
++
 This is useful to check cgroup context switching overhead.
 Note that perf doesn't create nor delete the cgroups, so users should
-make sure that the cgroups exist and are accessible before use.
+make sure that the cgroups exist and are accessible before use, e.g.
+by
++
+------
+% mkdir /sys/fs/cgroup/{AAA,BBB}
+------
=20
+-p::
+--nprocs=3D::
+Number of processes to use for sending tokens along the pipes.
=20
 Example of *pipe*
 ^^^^^^^^^^^^^^^^^
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index 70139036d68f..28dd7f3a11b2 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -32,7 +32,8 @@
 #include <pthread.h>
=20
 struct thread_data {
-	int			nr;
+	unsigned int		nr;		/* index of this worker */
+	pid_t			pid;
 	int			pipe_read;
 	int			pipe_write;
 	struct epoll_event      epoll_ev;
@@ -42,10 +43,11 @@ struct thread_data {
 };
=20
 #define LOOPS_DEFAULT 1000000
-static	int			loops =3D LOOPS_DEFAULT;
+static	unsigned int		loops =3D LOOPS_DEFAULT;
=20
 /* Use processes by default: */
 static bool			threaded;
+static unsigned int		nr_threads =3D 2;
=20
 static bool			nonblocking;
 static char			*cgrp_names[2];
@@ -86,7 +88,8 @@ static int parse_two_cgroups(const struct option *opt __m=
aybe_unused,
=20
 static const struct option options[] =3D {
 	OPT_BOOLEAN('n', "nonblocking",	&nonblocking,	"Use non-blocking operation=
s"),
-	OPT_INTEGER('l', "loop",	&loops,		"Specify number of loops"),
+	OPT_UINTEGER('p', "nprocs",	&nr_threads,    "Number of processes"),
+	OPT_UINTEGER('l', "loop",	&loops,		"Specify number of loops"),
 	OPT_BOOLEAN('T', "threaded",	&threaded,	"Specify threads/process based ta=
sk setup"),
 	OPT_CALLBACK('G', "cgroups", NULL, "SEND,RECV",
 		     "Put sender and receivers in given cgroups",
@@ -107,15 +110,15 @@ static int enter_cgroup(int nr)
 	struct cgroup *cgrp;
 	pid_t pid;
=20
-	if (cgrp_names[nr] =3D=3D NULL)
+	if (cgrp_names[nr % 2] =3D=3D NULL)
 		return 0;
=20
-	if (cgrps[nr] =3D=3D NULL) {
-		cgrps[nr] =3D cgroup__new(cgrp_names[nr], /*do_open=3D*/true);
-		if (cgrps[nr] =3D=3D NULL)
+	if (cgrps[nr % 2] =3D=3D NULL) {
+		cgrps[nr % 2] =3D cgroup__new(cgrp_names[nr % 2], /*do_open=3D*/true);
+		if (cgrps[nr % 2] =3D=3D NULL)
 			goto err;
 	}
-	cgrp =3D cgrps[nr];
+	cgrp =3D cgrps[nr % 2];
=20
 	if (threaded)
 		pid =3D syscall(__NR_gettid);
@@ -149,14 +152,14 @@ static int enter_cgroup(int nr)
=20
 err:
 	saved_errno =3D errno;
-	printf("Failed to open cgroup file in %s\n", cgrp_names[nr]);
+	printf("Failed to open cgroup file in %s\n", cgrp_names[nr % 2]);
=20
 	if (saved_errno =3D=3D ENOENT) {
 		char mnt[PATH_MAX];
=20
 		if (cgroupfs_find_mountpoint(mnt, sizeof(mnt), "perf_event") =3D=3D 0)
 			printf(" Hint: create the cgroup first, like 'mkdir %s/%s'\n",
-			       mnt, cgrp_names[nr]);
+			       mnt, cgrp_names[nr % 2]);
 	} else if (saved_errno =3D=3D EACCES && geteuid() > 0) {
 		printf(" Hint: try to run as root\n");
 	}
@@ -166,8 +169,8 @@ static int enter_cgroup(int nr)
=20
 static void exit_cgroup(int nr)
 {
-	cgroup__put(cgrps[nr]);
-	free(cgrp_names[nr]);
+	cgroup__put(cgrps[nr % 2]);
+	free(cgrp_names[nr % 2]);
 }
=20
 static inline int read_pipe(struct thread_data *td)
@@ -185,81 +188,106 @@ static inline int read_pipe(struct thread_data *td)
 	return ret;
 }
=20
+/*
+ * Worker thread for nodes forming a ring, receiving tokens from the left
+ * neighbor and sending them to the right one.
+ */
 static void *worker_thread(void *__tdata)
 {
-	struct thread_data *td =3D __tdata;
-	int i, ret, m =3D 0;
+	struct thread_data *this_thread =3D __tdata;
+	struct thread_data *first_thread =3D this_thread - this_thread->nr;
=20
-	ret =3D enter_cgroup(td->nr);
+	unsigned int i;
+	int ret, m =3D 0;
+	int write_fd;
+
+	ret =3D enter_cgroup(this_thread->nr);
 	if (ret < 0) {
-		td->cgroup_failed =3D true;
+		this_thread->cgroup_failed =3D true;
 		return NULL;
 	}
=20
 	if (nonblocking) {
-		td->epoll_ev.events =3D EPOLLIN;
-		td->epoll_fd =3D epoll_create(1);
-		BUG_ON(td->epoll_fd < 0);
-		BUG_ON(epoll_ctl(td->epoll_fd, EPOLL_CTL_ADD, td->pipe_read, &td->epoll_=
ev) < 0);
+		this_thread->epoll_ev.events =3D EPOLLIN;
+		this_thread->epoll_fd =3D epoll_create(1);
+		BUG_ON(this_thread->epoll_fd < 0);
+		BUG_ON(epoll_ctl(this_thread->epoll_fd, EPOLL_CTL_ADD, this_thread->pipe=
_read, &this_thread->epoll_ev) < 0);
 	}
=20
+	/* Find write_fd of right peer in the ring. */
+	if ((this_thread->nr + 1) =3D=3D nr_threads)
+		write_fd =3D first_thread->pipe_write;
+	else
+		write_fd =3D (this_thread + 1)->pipe_write;
+
+
 	for (i =3D 0; i < loops; i++) {
-		ret =3D write(td->pipe_write, &m, sizeof(int));
+		ret =3D write(write_fd, &m, sizeof(int));
 		BUG_ON(ret !=3D sizeof(int));
-		ret =3D read_pipe(td);
+		ret =3D read_pipe(this_thread);
 		BUG_ON(ret !=3D sizeof(int));
 	}
=20
 	return NULL;
 }
=20
+static struct thread_data *create_thread_data(void)
+{
+	struct thread_data *threads;
+	int __maybe_unused flags =3D 0;
+	int pipe_fds[2];
+	unsigned int i;
+
+	if (nonblocking)
+		flags |=3D O_NONBLOCK;
+
+	threads =3D malloc(nr_threads * sizeof(struct thread_data));
+
+	if (!threads) {
+		fprintf(stderr, "Allocation of thread data memory failed.");
+		exit(1);
+	}
+
+	for (i =3D 0; i < nr_threads; i++) {
+		threads[i].nr =3D i;
+
+		BUG_ON(pipe2(pipe_fds, flags));
+
+		threads[i].pipe_read =3D pipe_fds[0];
+		threads[i].pipe_write =3D pipe_fds[1];
+	}
+
+	return threads;
+}
+
 int bench_sched_pipe(int argc, const char **argv)
 {
-	struct thread_data threads[2] =3D {};
+	struct thread_data *threads;
 	struct thread_data *td;
-	int pipe_1[2], pipe_2[2];
+
 	struct timeval start, stop, diff;
 	unsigned long long result_usec =3D 0;
-	int nr_threads =3D 2;
-	int t;
+	unsigned int t;
=20
 	/*
 	 * why does "ret" exist?
 	 * discarding returned value of read(), write()
 	 * causes error in building environment for perf
 	 */
-	int __maybe_unused ret, wait_stat, flags =3D 0;
-	pid_t pid, retpid __maybe_unused;
+	int __maybe_unused ret, wait_stat;
+	pid_t retpid __maybe_unused;
=20
 	argc =3D parse_options(argc, argv, options, bench_sched_pipe_usage, 0);
=20
-	if (nonblocking)
-		flags |=3D O_NONBLOCK;
-
-	BUG_ON(pipe2(pipe_1, flags));
-	BUG_ON(pipe2(pipe_2, flags));
+	threads =3D create_thread_data();
=20
 	gettimeofday(&start, NULL);
=20
-	for (t =3D 0; t < nr_threads; t++) {
-		td =3D threads + t;
-
-		td->nr =3D t;
-
-		if (t =3D=3D 0) {
-			td->pipe_read =3D pipe_1[0];
-			td->pipe_write =3D pipe_2[1];
-		} else {
-			td->pipe_write =3D pipe_1[1];
-			td->pipe_read =3D pipe_2[0];
-		}
-	}
-
 	if (threaded) {
 		for (t =3D 0; t < nr_threads; t++) {
 			td =3D threads + t;
=20
-			ret =3D pthread_create(&td->pthread, NULL, worker_thread, td);
+			ret =3D pthread_create(&td->pthread, NULL, worker_thread, threads + t);
 			BUG_ON(ret);
 		}
=20
@@ -270,18 +298,26 @@ int bench_sched_pipe(int argc, const char **argv)
 			BUG_ON(ret);
 		}
 	} else {
-		pid =3D fork();
-		assert(pid >=3D 0);
-
-		if (!pid) {
-			worker_thread(threads + 0);
-			exit(0);
-		} else {
-			worker_thread(threads + 1);
+		/*
+		 * Start at '1', because the parent eventually also becomes a
+		 * worker.
+		 */
+		for (t =3D 1; t < nr_threads; t++) {
+			threads[t].pid =3D fork();
+			assert(threads[t].pid >=3D 0);
+
+			if (!threads[t].pid) {
+				worker_thread(threads + t);
+				exit(0);
+			}
 		}
=20
-		retpid =3D waitpid(pid, &wait_stat, 0);
-		assert((retpid =3D=3D pid) && WIFEXITED(wait_stat));
+		worker_thread(threads);
+
+		for (t =3D 1; t < nr_threads; t++) {
+			retpid =3D waitpid(threads[t].pid, &wait_stat, 0);
+			assert((retpid =3D=3D threads[t].pid) && WIFEXITED(wait_stat));
+		}
 	}
=20
 	gettimeofday(&stop, NULL);
@@ -295,8 +331,8 @@ int bench_sched_pipe(int argc, const char **argv)
=20
 	switch (bench_format) {
 	case BENCH_FORMAT_DEFAULT:
-		printf("# Executed %d pipe operations between two %s\n\n",
-			loops, threaded ? "threads" : "processes");
+		printf("# Executed %d pipe operations between %u %s\n\n", loops,
+		       nr_threads, threaded ? "threads" : "processes");
=20
 		result_usec =3D diff.tv_sec * USEC_PER_SEC;
 		result_usec +=3D diff.tv_usec;
--=20
2.45.3
From nobody Tue Apr  8 13:58:58 2025
Received: from mx10.gouders.net (mx10.gouders.net [202.61.206.94])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id CDF651DED42;
	Sat,  5 Apr 2025 12:02:07 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=202.61.206.94
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1743854529; cv=none;
 b=e1KdmymnPvSErKcXwu1ZHPR+suAzitegHsgCpvA/pgTVz7UDak+EHyoCcNdkr/w7lBTI+hnW3ZinPSDR+UaHuTWRBmemq+UzHAdncp7mVIt+pafP2iFzkfJLeb0RrYY4e8INxKDkNudPBWLmaWW/qJms7iSAoYHku0zcw4GFJRw=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1743854529; c=relaxed/simple;
	bh=q36thpo4qph9HO2471J3L8SDoZKI0EjIb7JRIbOTa3c=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=fWUY+ywjbnIO5zYLzqtsYv1a0FbvX2JxvlB42iCt6W8t+Y4t5Lxj3na872B+FxQGqtBFZTns3Bt9U4zyb2o4iobla5bgn+u1V9eiIPRxqkw61o7E9byOGQbjFhGGgEt75QmsmTka98A8eg+/W19Yk3ZwvsH/drtUpPO7329uR0M=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=none (p=none dis=none) header.from=gouders.net;
 spf=pass smtp.mailfrom=gouders.net;
 dkim=pass (1024-bit key) header.d=gouders.net header.i=@gouders.net
 header.b=n7ppOQfa; arc=none smtp.client-ip=202.61.206.94
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=none (p=none dis=none) header.from=gouders.net
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=gouders.net
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (1024-bit key) header.d=gouders.net header.i=@gouders.net
 header.b="n7ppOQfa"
Received: from localhost (ip-109-42-179-132.web.vodafone.de [109.42.179.132])
	(authenticated bits=0)
	by mx10.gouders.net (8.17.1.9/8.17.1.9) with ESMTPSA id 535C1aJL022451
	(version=TLSv1.3 cipher=TLS_AES_256_GCM_SHA384 bits=256 verify=NO);
	Sat, 5 Apr 2025 14:01:36 +0200
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=gouders.net; s=gnet;
	t=1743854497; bh=q36thpo4qph9HO2471J3L8SDoZKI0EjIb7JRIbOTa3c=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References;
	b=n7ppOQfak5bGpMjs/1zK4iVTEX/jOaofVz2g90pNjwA1Scsev06mtdKaGLTZ+ES6Z
	 PJF8N4f+10A5xOibeagSo1QvTP1LcA07viZrnVxXj8LIOil9RmNmMD+agcQZNUrdmZ
	 Ln9mGRE5MK4MOOjHIFtq9EOPvdtXC0tKJikaHyXw=
From: Dirk Gouders <dirk@gouders.net>
To: Namhyung Kim <namhyung@kernel.org>,
        Arnaldo Carvalho de Melo <acme@kernel.org>,
        Ingo Molnar <mingo@redhat.com>, Peter Zijlstra <peterz@infradead.org>
Cc: Dirk Gouders <dirk@gouders.net>, Ian Rogers <irogers@google.com>,
        Adrian Hunter <adrian.hunter@intel.com>,
        LKML <linux-kernel@vger.kernel.org>, linux-perf-users@vger.kernel.org
Subject: [PATCH v2 2/3] perf bench sched pipe: add complete graph simulation
Date: Sat,  5 Apr 2025 14:00:07 +0200
Message-ID: <20250405120039.15953-3-dirk@gouders.net>
X-Mailer: git-send-email 2.45.3
In-Reply-To: <20250405120039.15953-1-dirk@gouders.net>
References: <20250402212402.15658-2-dirk@gouders.net>
 <20250405120039.15953-1-dirk@gouders.net>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

Currently, we have only one worker function: the simulation of a ring
for token traversal.

Add another worker to simulate a complete graph (Kn) for token
traversal.  A new option -K/--Kn can be used to use the new worker.

Those different workers could be interesting, because they produce
workload varieties noticeable by perf-report(1), for example:

(booted with mitigations=3Doff, 6 processes)

Ring simulation:

Samples: 92K of event 'cycles:P', Event count (approx.): 18690208287
Overhead  Command     Shared Object         Symbol
  13.16%  sched-pipe  [kernel.kallsyms]     [k] timerqueue_add
   7.10%  sched-pipe  [kernel.kallsyms]     [k] read_hpet
   3.36%  sched-pipe  [kernel.kallsyms]     [k] _copy_from_iter
   3.23%  sched-pipe  [kernel.kallsyms]     [k] _copy_to_iter
   2.64%  sched-pipe  [kernel.kallsyms]     [k] vfs_write
   2.55%  sched-pipe  [kernel.kallsyms]     [k] vfs_read

Kn simulation:

Samples: 163K of event 'cycles:P', Event count (approx.): 100366721164
Overhead  Command     Shared Object         Symbol
   5.11%  sched-pipe  [kernel.kallsyms]     [k] _copy_from_iter
   4.90%  sched-pipe  [kernel.kallsyms]     [k] queued_spin_lock_slowpath
   3.99%  sched-pipe  [kernel.kallsyms]     [k] _copy_to_iter
   3.35%  sched-pipe  [kernel.kallsyms]     [k] timerqueue_add
   2.80%  sched-pipe  [kernel.kallsyms]     [k] check_preemption_disabled
   2.56%  sched-pipe  [kernel.kallsyms]     [k] vfs_write
   2.40%  sched-pipe  [kernel.kallsyms]     [k] vfs_read

Signed-off-by: Dirk Gouders <dirk@gouders.net>
---
 tools/perf/Documentation/perf-bench.txt |  5 +++
 tools/perf/bench/sched-pipe.c           | 60 +++++++++++++++++++++++--
 2 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documenta=
tion/perf-bench.txt
index 8a651f2fe3aa..6f7df3d47821 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -130,6 +130,11 @@ process).
=20
 Options of *pipe*
 ^^^^^^^^^^^^^^^^^
+-K::
+--Kn::
+Simulate a complete graph instead of a ring for sending tokens.
+Each process sends and receives tokens to/from every other process.
+
 -l::
 --loop=3D::
 Specify number of loops.
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index 28dd7f3a11b2..3c76e8249a9b 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -50,6 +50,8 @@ static bool			threaded;
 static unsigned int		nr_threads =3D 2;
=20
 static bool			nonblocking;
+static bool			Kn_mode;	/* Toggle for ring mode -> complete graph mode */
+
 static char			*cgrp_names[2];
 static struct cgroup		*cgrps[2];
=20
@@ -90,6 +92,7 @@ static const struct option options[] =3D {
 	OPT_BOOLEAN('n', "nonblocking",	&nonblocking,	"Use non-blocking operation=
s"),
 	OPT_UINTEGER('p', "nprocs",	&nr_threads,    "Number of processes"),
 	OPT_UINTEGER('l', "loop",	&loops,		"Specify number of loops"),
+	OPT_BOOLEAN('K', "Kn",		&Kn_mode,	"Send tokens in a complete graph instea=
d of a ring."),
 	OPT_BOOLEAN('T', "threaded",	&threaded,	"Specify threads/process based ta=
sk setup"),
 	OPT_CALLBACK('G', "cgroups", NULL, "SEND,RECV",
 		     "Put sender and receivers in given cgroups",
@@ -188,11 +191,55 @@ static inline int read_pipe(struct thread_data *td)
 	return ret;
 }
=20
+/*
+ * Worker thread for processes forming a complete graph,
+ * sending tokens one to each other.
+ */
+static void *worker_thread_kn(void *__tdata)
+{
+	struct thread_data *this_thread =3D __tdata;
+	struct thread_data *all_threads =3D this_thread - this_thread->nr;
+
+	int ret, m =3D 0;
+	unsigned int i;
+	unsigned int t;
+
+	ret =3D enter_cgroup(this_thread->nr);
+	if (ret < 0) {
+		this_thread->cgroup_failed =3D true;
+		return NULL;
+	}
+
+	if (nonblocking) {
+		this_thread->epoll_ev.events =3D EPOLLIN;
+		this_thread->epoll_fd =3D epoll_create(1);
+		BUG_ON(this_thread->epoll_fd < 0);
+		BUG_ON(epoll_ctl(this_thread->epoll_fd, EPOLL_CTL_ADD, this_thread->pipe=
_read, &this_thread->epoll_ev) < 0);
+	}
+
+	for (i =3D 0; i < loops; i++) {
+		/* First: feed all other workers. */
+		for (t =3D 0; t < nr_threads; t++)
+			if (t !=3D this_thread->nr) {
+				ret =3D write(all_threads[t].pipe_write, &m, sizeof(int));
+				BUG_ON(ret !=3D sizeof(int));
+			}
+
+		/* Read a token from all other workers. */
+		for (t =3D 1; t < nr_threads; t++) {
+			ret =3D read_pipe(this_thread);
+			BUG_ON(ret !=3D sizeof(int));
+		}
+	}
+
+	return NULL;
+}
+
 /*
  * Worker thread for nodes forming a ring, receiving tokens from the left
  * neighbor and sending them to the right one.
  */
-static void *worker_thread(void *__tdata)
+static void *worker_thread_ring(void *__tdata)
 {
 	struct thread_data *this_thread =3D __tdata;
 	struct thread_data *first_thread =3D this_thread - this_thread->nr;
@@ -231,6 +278,9 @@ static void *worker_thread(void *__tdata)
 	return NULL;
 }
=20
+/* Ring mode is the default. */
+void * (*worker_thread)(void *) =3D worker_thread_ring;
+
 static struct thread_data *create_thread_data(void)
 {
 	struct thread_data *threads;
@@ -279,6 +329,9 @@ int bench_sched_pipe(int argc, const char **argv)
=20
 	argc =3D parse_options(argc, argv, options, bench_sched_pipe_usage, 0);
=20
+	if (Kn_mode)
+		worker_thread =3D worker_thread_kn;
+
 	threads =3D create_thread_data();
=20
 	gettimeofday(&start, NULL);
@@ -331,8 +384,9 @@ int bench_sched_pipe(int argc, const char **argv)
=20
 	switch (bench_format) {
 	case BENCH_FORMAT_DEFAULT:
-		printf("# Executed %d pipe operations between %u %s\n\n", loops,
-		       nr_threads, threaded ? "threads" : "processes");
+		printf("# Executed %d pipe operations (%s) between %u %s\n\n", loops,
+		       Kn_mode ? "Kn" : "ring", nr_threads,
+		       threaded ? "threads" : "processes");
=20
 		result_usec =3D diff.tv_sec * USEC_PER_SEC;
 		result_usec +=3D diff.tv_usec;
--=20
2.45.3
From nobody Tue Apr  8 13:58:58 2025
Received: from mx10.gouders.net (mx10.gouders.net [202.61.206.94])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id CDFBF1DF962;
	Sat,  5 Apr 2025 12:02:07 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=202.61.206.94
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1743854529; cv=none;
 b=Lkrzq9yC9sA39KjYKqVqDgw3LgxxkyDbT9rXo6S47oODGRuIQnExElv+YN5D7Cc1gde/HVEKEYR7vAI7DKDfN9bFS6V9Yq28aJ+6zYNUZ4xD5dG3RA/YIbTif7RUe4+spX9SoKbDhdSAF+9Z/Y3ivwCyLufpzPQaZyoTlH21UI4=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1743854529; c=relaxed/simple;
	bh=mvmvEdDAeeh44vcA+nPRYycDzVnRUY/MTVaMYBnRGo8=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=o1UmKszZHUC/7PYjHVLhVi/5i5yCRyaCHro4P+Kte7nKmNQWpFEHgGow0jcuaZZ0KQffR5T4FP25OEOi2eQp1MS7jcL+BbgBi/KZsAnIIscNDgwCQ5vB/FYFAz292inI0Zv79x+7KGD1LhkePzqCOf8lC8AELX71VfkskZtGzkc=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=none (p=none dis=none) header.from=gouders.net;
 spf=pass smtp.mailfrom=gouders.net;
 dkim=pass (1024-bit key) header.d=gouders.net header.i=@gouders.net
 header.b=m4LbBUnA; arc=none smtp.client-ip=202.61.206.94
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=none (p=none dis=none) header.from=gouders.net
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=gouders.net
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (1024-bit key) header.d=gouders.net header.i=@gouders.net
 header.b="m4LbBUnA"
Received: from localhost (ip-109-42-179-132.web.vodafone.de [109.42.179.132])
	(authenticated bits=0)
	by mx10.gouders.net (8.17.1.9/8.17.1.9) with ESMTPSA id 535C1jnB022465
	(version=TLSv1.3 cipher=TLS_AES_256_GCM_SHA384 bits=256 verify=NO);
	Sat, 5 Apr 2025 14:01:46 +0200
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=gouders.net; s=gnet;
	t=1743854506; bh=mvmvEdDAeeh44vcA+nPRYycDzVnRUY/MTVaMYBnRGo8=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References;
	b=m4LbBUnAbUYxVZhwSr2U3Ut+UXqhFJOjBnLJxbh/R19vXIdGZdzx5EkPst2G0Qzf/
	 q70svt0QoFtADIw+MPgg2iMgnHJU3r51p4teRDIzbChk9NiFCEZL4UKckv+kjYV8+C
	 Sso9FzKUkSfx58mPS2JDPcMAA+7mhxWpaVdpCr8k=
From: Dirk Gouders <dirk@gouders.net>
To: Namhyung Kim <namhyung@kernel.org>,
        Arnaldo Carvalho de Melo <acme@kernel.org>,
        Ingo Molnar <mingo@redhat.com>, Peter Zijlstra <peterz@infradead.org>
Cc: Dirk Gouders <dirk@gouders.net>, Ian Rogers <irogers@google.com>,
        Adrian Hunter <adrian.hunter@intel.com>,
        LKML <linux-kernel@vger.kernel.org>, linux-perf-users@vger.kernel.org
Subject: [PATCH v2 3/3] perf bench sched pipe: introduce multipliers for
 number of processes
Date: Sat,  5 Apr 2025 14:00:08 +0200
Message-ID: <20250405120039.15953-4-dirk@gouders.net>
X-Mailer: git-send-email 2.45.3
In-Reply-To: <20250405120039.15953-1-dirk@gouders.net>
References: <20250402212402.15658-2-dirk@gouders.net>
 <20250405120039.15953-1-dirk@gouders.net>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

Introduce multipliers to specify the number of processes to run:

K|K|m|M: multiply leading number by 2^10 or 2^20, respectively

p|P: multiply intermediate result by number of online processors

Examples:

-p  2K =3D 2048
-p 10p =3D 10 * number of online processors
-p 1kp =3D 1024 * number of online processors

Signed-off-by: Dirk Gouders <dirk@gouders.net>
---
 tools/perf/Documentation/perf-bench.txt | 14 ++++++
 tools/perf/bench/sched-pipe.c           | 36 ++++++++++++++-
 tools/perf/util/string.c                | 58 +++++++++++++++++++++++++
 tools/perf/util/string2.h               |  1 +
 4 files changed, 107 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documenta=
tion/perf-bench.txt
index 6f7df3d47821..a2a44d80ee26 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -159,6 +159,20 @@ by
 -p::
 --nprocs=3D::
 Number of processes to use for sending tokens along the pipes.
+This option accepts a number follwed by optional (case insensitive)
+multipliers in this order:
+
+- k, m
++
+Multipliers 1024 and 1048576 for the leading number.
+
+- p
++
+Multiplier replaced by the number of online processors.
+
+Example:
+
+        -p 1kP means: 1024 * number of online processors
=20
 Example of *pipe*
 ^^^^^^^^^^^^^^^^^
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index 3c76e8249a9b..a1fa7ad7ed67 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -12,7 +12,8 @@
 #include <subcmd/parse-options.h>
 #include <api/fs/fs.h>
 #include "bench.h"
-#include "util/cgroup.h"
+#include <util/cgroup.h>
+#include <util/string2.h>
=20
 #include <unistd.h>
 #include <stdio.h>
@@ -45,6 +46,8 @@ struct thread_data {
 #define LOOPS_DEFAULT 1000000
 static	unsigned int		loops =3D LOOPS_DEFAULT;
=20
+static const char *nproc_str;	/* String that specifies a number of process=
es. */
+
 /* Use processes by default: */
 static bool			threaded;
 static unsigned int		nr_threads =3D 2;
@@ -90,7 +93,8 @@ static int parse_two_cgroups(const struct option *opt __m=
aybe_unused,
=20
 static const struct option options[] =3D {
 	OPT_BOOLEAN('n', "nonblocking",	&nonblocking,	"Use non-blocking operation=
s"),
-	OPT_UINTEGER('p', "nprocs",	&nr_threads,    "Number of processes"),
+	OPT_STRING('p', "nprocs",	&nproc_str,	"2P",
+		   "Number of processes (2P :=3D 2 * online processors)"),
 	OPT_UINTEGER('l', "loop",	&loops,		"Specify number of loops"),
 	OPT_BOOLEAN('K', "Kn",		&Kn_mode,	"Send tokens in a complete graph instea=
d of a ring."),
 	OPT_BOOLEAN('T', "threaded",	&threaded,	"Specify threads/process based ta=
sk setup"),
@@ -281,6 +285,31 @@ static void *worker_thread_ring(void *__tdata)
 /* Ring mode is the default. */
 void * (*worker_thread)(void *) =3D worker_thread_ring;
=20
+/*
+ * Get number of processes from the given string,
+ * e.g. "1k" =3D> 1024 or
+ *      "8p" =3D> 8 * number of online processors.
+ */
+static unsigned int get_nprocs(const char *np_str)
+{
+	unsigned int np;
+
+	np =3D perf_nptou(np_str);
+
+	if (np =3D=3D -1U) {
+		fprintf(stderr, "Cannot parse number of processes/threads: %s\n",
+			nproc_str);
+		exit(1);
+	}
+
+	if (np < 2) {
+		fprintf(stderr, "Two processes are the minimum requirement.\n");
+		exit(1);
+	}
+
+	return np;
+}
+
 static struct thread_data *create_thread_data(void)
 {
 	struct thread_data *threads;
@@ -329,6 +358,9 @@ int bench_sched_pipe(int argc, const char **argv)
=20
 	argc =3D parse_options(argc, argv, options, bench_sched_pipe_usage, 0);
=20
+	if (nproc_str)
+		nr_threads =3D get_nprocs(nproc_str);
+
 	if (Kn_mode)
 		worker_thread =3D worker_thread_kn;
=20
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index c0e927bbadf6..72deb3df9c99 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -3,6 +3,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <stdlib.h>
+#include <unistd.h>
=20
 #include <linux/ctype.h>
=20
@@ -68,6 +69,63 @@ s64 perf_atoll(const char *str)
 	return -1;
 }
=20
+/*
+ * perf_nptou()
+ *
+ * Parse given string to a number of processes and return that number.
+ * Multipliers up to 'm' are accepted, and an optional final unit suffix
+ * 'p' meaning "number of online processors".
+ *
+ * str must match: (\d+)(k|K|m|M)?(p|P)?
+ *
+ * (e.g. "8P" meaning "8 * number of online processors",
+ *  or   "1k" meaning "1024",
+ *  or   "1Kp" meaning "1024 * number of online processors")
+ */
+u32 perf_nptou(const char *str)
+{
+	s32 length;
+	char *p;
+	char c;
+
+	if (!isdigit(str[0]))
+		goto out_err;
+
+	length =3D strtol(str, &p, 10);
+
+	switch (c =3D *p++) {
+		case 'p': case 'P':
+			if (*p)
+				goto out_err;
+			goto handle_p;
+		case '\0':
+			return length;
+		default:
+			goto out_err;
+
+		/* Multipliers */
+		case 'k': case 'K':
+			length <<=3D 10;
+			break;
+		case 'm': case 'M':
+			length <<=3D 20;
+			break;
+	}
+
+	if (*p =3D=3D '\0')
+		return length;
+
+	if (strcmp(p, "p") !=3D 0 && strcmp(p, "P") !=3D 0)
+		goto out_err;
+
+handle_p:
+	length *=3D sysconf(_SC_NPROCESSORS_ONLN);
+	return length;
+
+out_err:
+	return -1U;
+}
+
 /* Character class matching */
 static bool __match_charclass(const char *pat, char c, const char **npat)
 {
diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h
index 4c8bff47cfd3..bca2c1687924 100644
--- a/tools/perf/util/string2.h
+++ b/tools/perf/util/string2.h
@@ -12,6 +12,7 @@ extern const char *graph_dotted_line;
 extern const char *dots;
=20
 s64 perf_atoll(const char *str);
+u32 perf_nptou(const char *str);
 bool strglobmatch(const char *str, const char *pat);
 bool strglobmatch_nocase(const char *str, const char *pat);
 bool strlazymatch(const char *str, const char *pat);
--=20
2.45.3