Add an epoll busy poll test using netdevsim.
This test is comprised of:
- busy_poller (via busy_poller.c)
- busy_poll_test.sh which loads netdevsim, sets up network namespaces,
and runs busy_poller to receive data and netcat to send data.
The selftest tests two different scenarios:
- busy poll (the pre-existing version in the kernel)
- busy poll with suspend enabled (what this series adds)
The data transmit is a 1MiB temporary file generated from /dev/urandom
and the test is considered passing if the md5sum of the input file to
netcat matches the md5sum of the output file from busy_poller.
netdevsim was chosen instead of veth due to netdevsim's support for
netdev-genl.
For now, this test uses the functionality that netdevim provides. In the
future, perhaps netdevsim can be extended to emulate device IRQs to more
thoroughly test all pre-existing kernel options (like defer_hard_irqs)
and suspend.
Signed-off-by: Joe Damato <jdamato@fastly.com>
Co-developed-by: Martin Karsten <mkarsten@uwaterloo.ca>
Signed-off-by: Martin Karsten <mkarsten@uwaterloo.ca>
---
v3:
- New in v3
tools/testing/selftests/net/.gitignore | 1 +
tools/testing/selftests/net/Makefile | 2 +
tools/testing/selftests/net/busy_poll_test.sh | 164 ++++++++++++
tools/testing/selftests/net/busy_poller.c | 245 ++++++++++++++++++
4 files changed, 412 insertions(+)
create mode 100755 tools/testing/selftests/net/busy_poll_test.sh
create mode 100644 tools/testing/selftests/net/busy_poller.c
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 217d8b7a7365..85b0c4a2179f 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -2,6 +2,7 @@
bind_bhash
bind_timewait
bind_wildcard
+busy_poller
cmsg_sender
diag_uid
epoll_busy_poll
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 26a4883a65c9..4b5f3d7d900b 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -69,6 +69,7 @@ TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
TEST_GEN_FILES += ipsec
TEST_GEN_FILES += ioam6_parser
TEST_GEN_FILES += gro
+TEST_GEN_FILES += busy_poller
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap epoll_busy_poll
TEST_GEN_FILES += toeplitz
@@ -96,6 +97,7 @@ TEST_PROGS += fdb_flush.sh
TEST_PROGS += fq_band_pktlimit.sh
TEST_PROGS += vlan_hw_filter.sh
TEST_PROGS += bpf_offload.py
+TEST_PROGS += busy_poll_test.sh
# YNL files, must be before "include ..lib.mk"
YNL_GEN_FILES := ncdevmem
diff --git a/tools/testing/selftests/net/busy_poll_test.sh b/tools/testing/selftests/net/busy_poll_test.sh
new file mode 100755
index 000000000000..bf33737691a4
--- /dev/null
+++ b/tools/testing/selftests/net/busy_poll_test.sh
@@ -0,0 +1,164 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+source net_helper.sh
+
+NSIM_DEV_1_ID=$((256 + RANDOM % 256))
+NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID
+NSIM_DEV_2_ID=$((512 + RANDOM % 256))
+NSIM_DEV_2_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_2_ID
+
+NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
+NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device
+NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device
+
+YNL_PATH=$(realpath $(dirname ${0}))/../../../net/ynl/cli.py
+SPEC_PATH=$(realpath $(dirname ${0}))/../../../../Documentation/netlink/specs/netdev.yaml
+
+check_ynl()
+{
+ if [ ! -f ${YNL_PATH} ]; then
+ echo "YNL path invalid: ${YNL_PATH}"
+ exit 1
+ fi
+
+ if [ ! -f ${SPEC_PATH} ]; then
+ echo "spec path invalid: ${SPEC_PATH}"
+ exit 1
+ fi
+}
+
+setup_ns()
+{
+ set -e
+ ip netns add nssv
+ ip netns add nscl
+
+ NSIM_DEV_1_NAME=$(find $NSIM_DEV_1_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_DEV_1_SYS/net -exec basename {} \;)
+ NSIM_DEV_2_NAME=$(find $NSIM_DEV_2_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_DEV_2_SYS/net -exec basename {} \;)
+
+ # ensure the server has 1 queue
+ ethtool -L $NSIM_DEV_1_NAME combined 1 2>/dev/null
+
+ ip link set $NSIM_DEV_1_NAME netns nssv
+ ip link set $NSIM_DEV_2_NAME netns nscl
+
+ ip netns exec nssv ip addr add '192.168.1.1/24' dev $NSIM_DEV_1_NAME
+ ip netns exec nscl ip addr add '192.168.1.2/24' dev $NSIM_DEV_2_NAME
+
+ ip netns exec nssv ip link set dev $NSIM_DEV_1_NAME up
+ ip netns exec nscl ip link set dev $NSIM_DEV_2_NAME up
+
+ set +e
+}
+
+cleanup_ns()
+{
+ ip netns del nscl
+ ip netns del nssv
+}
+
+test_busypoll()
+{
+ tmp_file=$(mktemp)
+ out_file=$(mktemp)
+
+ # fill a test file with random data
+ dd if=/dev/urandom of=${tmp_file} bs=1M count=1 2> /dev/null
+
+ timeout -k 60s 60s ip netns exec nssv ./busy_poller -p48675 -b192.168.1.1 -m8 -u0 -P1 -g16 -o${out_file}&
+
+ wait_local_port_listen nssv 48675 tcp
+
+ ip netns exec nscl nc -N 192.168.1.1 48675 < $tmp_file
+
+ wait
+
+ tmp_file_md5sum=$(md5sum $tmp_file | cut -f1 -d' ')
+ out_file_md5sum=$(md5sum $out_file | cut -f1 -d' ')
+
+ if [ "$tmp_file_md5sum" = "$out_file_md5sum" ]; then
+ res=0
+ else
+ echo "md5sum mismatch"
+ echo "input file md5sum: ${tmp_file_md5sum}";
+ echo "output file md5sum: ${out_file_md5sum}";
+ res=1
+ fi
+
+ rm $out_file $tmp_file
+
+ return $res
+}
+
+test_busypoll_with_suspend()
+{
+ # set the suspend parameter for the server via its IFIDX
+
+ DUMP_CMD="${YNL_PATH} --spec ${SPEC_PATH} --dump napi-get --json=\"{\\\"ifindex\\\": ${NSIM_DEV_1_IFIDX}}\" --output-json"
+ NSIM_DEV_1_NAPIID=$(ip netns exec nssv bash -c "$DUMP_CMD")
+ NSIM_DEV_1_NAPIID=$(echo $NSIM_DEV_1_NAPIID | jq '.[] | .id')
+
+ SUSPEND_CMD="${YNL_PATH} --spec ${SPEC_PATH} --do napi-set --json=\"{\\\"id\\\": ${NSIM_DEV_1_NAPIID}, \\\"irq-suspend-timeout\\\": 20000000, \\\"gro-flush-timeout\\\": 50000, \\\"defer-hard-irqs\\\": 100}\""
+ NSIM_DEV_1_SETCONFIG=$(ip netns exec nssv bash -c "$SUSPEND_CMD")
+
+ test_busypoll
+ return $?
+}
+
+###
+### Code start
+###
+
+check_ynl
+
+modprobe netdevsim
+
+# linking
+
+echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_NEW
+echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_NEW
+udevadm settle
+
+setup_ns
+
+NSIM_DEV_1_FD=$((256 + RANDOM % 256))
+exec {NSIM_DEV_1_FD}</var/run/netns/nssv
+NSIM_DEV_1_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_DEV_1_NAME/ifindex)
+
+NSIM_DEV_2_FD=$((256 + RANDOM % 256))
+exec {NSIM_DEV_2_FD}</var/run/netns/nscl
+NSIM_DEV_2_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_DEV_2_NAME/ifindex)
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK
+if [ $? -ne 0 ]; then
+ echo "linking netdevsim1 with netdevsim2 should succeed"
+ cleanup_ns
+ exit 1
+fi
+
+test_busypoll
+if [ $? -ne 0 ]; then
+ echo "test_busypoll failed"
+ cleanup_ns
+ exit 1
+fi
+
+test_busypoll_with_suspend
+if [ $? -ne 0 ]; then
+ echo "test_busypoll_with_suspend failed"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_UNLINK
+
+echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_DEL
+
+cleanup_ns
+
+modprobe -r netdevsim
+
+exit 0
diff --git a/tools/testing/selftests/net/busy_poller.c b/tools/testing/selftests/net/busy_poller.c
new file mode 100644
index 000000000000..b49d67ce0ca8
--- /dev/null
+++ b/tools/testing/selftests/net/busy_poller.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <assert.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+
+#include <sys/ioctl.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+/* if the headers haven't been updated, we need to define some things */
+#if !defined(EPOLL_IOC_TYPE)
+struct epoll_params {
+ uint32_t busy_poll_usecs;
+ uint16_t busy_poll_budget;
+ uint8_t prefer_busy_poll;
+
+ /* pad the struct to a multiple of 64bits */
+ uint8_t __pad;
+};
+
+#define EPOLL_IOC_TYPE 0x8A
+#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params)
+#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params)
+#endif
+
+static uint32_t cfg_port = 8000;
+static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY };
+static char *cfg_outfile;
+static int cfg_max_events = 8;
+
+/* busy poll params */
+static uint32_t cfg_busy_poll_usecs;
+static uint16_t cfg_busy_poll_budget;
+static uint8_t cfg_prefer_busy_poll;
+
+static void usage(const char *filepath)
+{
+ error(1, 0,
+ "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile>",
+ filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int ret;
+ int c;
+
+ if (argc <= 1)
+ usage(argv[0]);
+
+ while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:")) != -1) {
+ switch (c) {
+ case 'u':
+ cfg_busy_poll_usecs = strtoul(optarg, NULL, 0);
+ if (cfg_busy_poll_usecs == ULONG_MAX ||
+ cfg_busy_poll_usecs > UINT32_MAX)
+ error(1, ERANGE, "busy_poll_usecs too large");
+ break;
+ case 'P':
+ cfg_prefer_busy_poll = strtoul(optarg, NULL, 0);
+ if (cfg_prefer_busy_poll == ULONG_MAX ||
+ cfg_prefer_busy_poll > 1)
+ error(1, ERANGE,
+ "prefer busy poll should be 0 or 1");
+ break;
+ case 'g':
+ cfg_busy_poll_budget = strtoul(optarg, NULL, 0);
+ if (cfg_busy_poll_budget == ULONG_MAX ||
+ cfg_busy_poll_budget > UINT16_MAX)
+ error(1, ERANGE,
+ "busy poll budget must be [0, UINT16_MAX]");
+ break;
+ case 'p':
+ cfg_port = strtoul(optarg, NULL, 0);
+ if (cfg_port > UINT16_MAX)
+ error(1, ERANGE, "port must be <= 65535");
+ break;
+ case 'b':
+ ret = inet_aton(optarg, &cfg_bind_addr);
+ if (ret == 0)
+ error(1, errno,
+ "bind address %s invalid", optarg);
+ break;
+ case 'o':
+ cfg_outfile = strdup(optarg);
+ if (!cfg_outfile)
+ error(1, 0, "outfile invalid");
+ break;
+ case 'm':
+ cfg_max_events = strtol(optarg, NULL, 0);
+
+ if (cfg_max_events == LONG_MIN ||
+ cfg_max_events == LONG_MAX ||
+ cfg_max_events <= 0)
+ error(1, ERANGE,
+ "max events must be > 0 and < LONG_MAX");
+ break;
+ }
+ }
+
+ if (optind != argc)
+ usage(argv[0]);
+}
+
+static void epoll_ctl_add(int epfd, int fd, uint32_t events)
+{
+ struct epoll_event ev;
+
+ ev.events = events;
+ ev.data.fd = fd;
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) == -1)
+ error(1, errno, "epoll_ctl add fd: %d", fd);
+}
+
+static void setnonblock(int sockfd)
+{
+ int flags;
+
+ flags = fcntl(sockfd, F_GETFL, 0);
+
+ if (fcntl(sockfd, F_SETFL, flags | O_NONBLOCK) == -1)
+ error(1, errno, "unable to set socket to nonblocking mode");
+}
+
+static void write_chunk(int fd, char *buf, ssize_t buflen)
+{
+ ssize_t remaining = buflen;
+ char *buf_offset = buf;
+ ssize_t writelen = 0;
+ ssize_t write_result;
+
+ while (writelen < buflen) {
+ write_result = write(fd, buf_offset, remaining);
+ if (write_result == -1)
+ error(1, errno, "unable to write data to outfile");
+
+ writelen += write_result;
+ remaining -= write_result;
+ buf_offset += write_result;
+ }
+}
+
+static void run_poller(void)
+{
+ struct epoll_event events[cfg_max_events];
+ struct epoll_params epoll_params = {0};
+ struct sockaddr_in server_addr;
+ int i, epfd, nfds;
+ ssize_t readlen;
+ int outfile_fd;
+ char buf[1024];
+ int sockfd;
+ int conn;
+ int val;
+
+ outfile_fd = open(cfg_outfile, O_WRONLY | O_CREAT, 0644);
+ if (outfile_fd == -1)
+ error(1, errno, "unable to open outfile: %s", cfg_outfile);
+
+ sockfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+ if (sockfd == -1)
+ error(1, errno, "unable to create listen socket");
+
+ server_addr.sin_family = AF_INET;
+ server_addr.sin_port = htons(cfg_port);
+ server_addr.sin_addr = cfg_bind_addr;
+
+ epoll_params.busy_poll_usecs = cfg_busy_poll_usecs;
+ epoll_params.busy_poll_budget = cfg_busy_poll_budget;
+ epoll_params.prefer_busy_poll = cfg_prefer_busy_poll;
+ epoll_params.__pad = 0;
+
+ val = 1;
+ if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)))
+ error(1, errno, "poller setsockopt reuseaddr");
+
+ setnonblock(sockfd);
+
+ if (bind(sockfd, (struct sockaddr *)&server_addr,
+ sizeof(struct sockaddr_in)))
+ error(0, errno, "poller bind to port: %d\n", cfg_port);
+
+ if (listen(sockfd, 1))
+ error(1, errno, "poller listen");
+
+ epfd = epoll_create1(0);
+ if (ioctl(epfd, EPIOCSPARAMS, &epoll_params) == -1)
+ error(1, errno, "unable to set busy poll params");
+
+ epoll_ctl_add(epfd, sockfd, EPOLLIN | EPOLLOUT | EPOLLET);
+
+ for (;;) {
+ nfds = epoll_wait(epfd, events, cfg_max_events, -1);
+ for (i = 0; i < nfds; i++) {
+ if (events[i].data.fd == sockfd) {
+ conn = accept(sockfd, NULL, NULL);
+ if (conn == -1)
+ error(1, errno,
+ "accepting incoming connection failed");
+
+ setnonblock(conn);
+ epoll_ctl_add(epfd, conn,
+ EPOLLIN | EPOLLET | EPOLLRDHUP |
+ EPOLLHUP);
+ } else if (events[i].events & EPOLLIN) {
+ for (;;) {
+ readlen = read(events[i].data.fd, buf,
+ sizeof(buf));
+ if (readlen > 0)
+ write_chunk(outfile_fd, buf,
+ readlen);
+ else
+ break;
+ }
+ } else {
+ /* spurious event ? */
+ }
+ if (events[i].events & (EPOLLRDHUP | EPOLLHUP)) {
+ epoll_ctl(epfd, EPOLL_CTL_DEL,
+ events[i].data.fd, NULL);
+ close(events[i].data.fd);
+ close(outfile_fd);
+ return;
+ }
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ parse_opts(argc, argv);
+ run_poller();
+ return 0;
+}
--
2.25.1
On Fri, 1 Nov 2024 00:48:33 +0000 Joe Damato wrote: > + ip netns exec nscl nc -N 192.168.1.1 48675 < $tmp_file Thanks a lot for adding the test. Could you replace nc with socat or a similar tool? There are multiple incompatible implementations of netcat packaged by various distros, we get: # selftests: net: busy_poll_test.sh # nc: invalid option -- 'N' # Ncat: Try `--help' or man(1) ncat for more information, usage options and help. QUITTING. nc is a known PITA. > + # set the suspend parameter for the server via its IFIDX > + > + DUMP_CMD="${YNL_PATH} --spec ${SPEC_PATH} --dump napi-get --json=\"{\\\"ifindex\\\": ${NSIM_DEV_1_IFIDX}}\" --output-json" > + NSIM_DEV_1_NAPIID=$(ip netns exec nssv bash -c "$DUMP_CMD") > + NSIM_DEV_1_NAPIID=$(echo $NSIM_DEV_1_NAPIID | jq '.[] | .id') > + > + SUSPEND_CMD="${YNL_PATH} --spec ${SPEC_PATH} --do napi-set --json=\"{\\\"id\\\": ${NSIM_DEV_1_NAPIID}, \\\"irq-suspend-timeout\\\": 20000000, \\\"gro-flush-timeout\\\": 50000, \\\"defer-hard-irqs\\\": 100}\"" > + NSIM_DEV_1_SETCONFIG=$(ip netns exec nssv bash -c "$SUSPEND_CMD") Can you try to run this test in installed mode? https://docs.kernel.org/dev-tools/kselftest.html#install-selftests IIRC YNL moves around when we install, you'd either need to do autodetection of the path (see tools/testing/selftests/net/lib/py/ynl.py and if you go down this route please move the helper which exports the YNL variables to lib.sh so other tests can reuse); or teach the C code to do the setup, you can link against YNL fairly easily (look at where ncdevmem is added in the Makefile, it uses YNL)
On Fri, Nov 01, 2024 at 06:34:26AM -0700, Jakub Kicinski wrote: > On Fri, 1 Nov 2024 00:48:33 +0000 Joe Damato wrote: > > + ip netns exec nscl nc -N 192.168.1.1 48675 < $tmp_file > > Thanks a lot for adding the test. Thanks for the review. > Could you replace nc with socat or > a similar tool? There are multiple incompatible implementations of > netcat packaged by various distros, we get: > > # selftests: net: busy_poll_test.sh > # nc: invalid option -- 'N' > # Ncat: Try `--help' or man(1) ncat for more information, usage options and help. QUITTING. > > nc is a known PITA. OK, I've replaced with socat for the v4 I am working on. I've also added some additional documentation to the FAQ in the cover letter and the kernel documentation to help answer Sridhar's question because it may be a question others have as well. > > + # set the suspend parameter for the server via its IFIDX > > + > > + DUMP_CMD="${YNL_PATH} --spec ${SPEC_PATH} --dump napi-get --json=\"{\\\"ifindex\\\": ${NSIM_DEV_1_IFIDX}}\" --output-json" > > + NSIM_DEV_1_NAPIID=$(ip netns exec nssv bash -c "$DUMP_CMD") > > + NSIM_DEV_1_NAPIID=$(echo $NSIM_DEV_1_NAPIID | jq '.[] | .id') > > + > > + SUSPEND_CMD="${YNL_PATH} --spec ${SPEC_PATH} --do napi-set --json=\"{\\\"id\\\": ${NSIM_DEV_1_NAPIID}, \\\"irq-suspend-timeout\\\": 20000000, \\\"gro-flush-timeout\\\": 50000, \\\"defer-hard-irqs\\\": 100}\"" > > + NSIM_DEV_1_SETCONFIG=$(ip netns exec nssv bash -c "$SUSPEND_CMD") > > Can you try to run this test in installed mode? > > https://docs.kernel.org/dev-tools/kselftest.html#install-selftests > > IIRC YNL moves around when we install, you'd either need to do > autodetection of the path (see tools/testing/selftests/net/lib/py/ynl.py > and if you go down this route please move the helper which exports the > YNL variables to lib.sh so other tests can reuse); or teach the C code > to do the setup, you can link against YNL fairly easily (look at where > ncdevmem is added in the Makefile, it uses YNL) I think I'm going to go the C route. Just to make sure I'm following how to do this properly: - I've added busy_poller to YNL_GEN_FILES (and removed it from TEST_GEN_FILES) - It still needs to be run via the script (which sets up netdevsim etc), so I am leaving that script (busy_poll_test.sh) in TEST_PROGS. busy_poller is not intended to be run on its own for selftest purposes. I am not sure if YNL_GEN_FILES are expected to run standalone when invoked or if this will work as I expect it to (busy_poll_test.sh is run): [...] TEST_PROGS += busy_poll_test.sh # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := ncdevmem busy_poller [...] Doing the above: I am hoping that busy_poll_test.sh will be run (which will use busy_poller) and that busy_poller won't be run on its own. Thanks for the guidance.
© 2016 - 2024 Red Hat, Inc.