Add support to set per-NAPI defer_hard_irqs and gro_flush_timeout.
Signed-off-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
---
Documentation/netlink/specs/netdev.yaml | 11 ++++++
include/uapi/linux/netdev.h | 1 +
net/core/netdev-genl-gen.c | 18 ++++++++++
net/core/netdev-genl-gen.h | 1 +
net/core/netdev-genl.c | 45 +++++++++++++++++++++++++
tools/include/uapi/linux/netdev.h | 1 +
6 files changed, 77 insertions(+)
diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index 7b47454c51dd..f9cb97d6106c 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -693,6 +693,17 @@ operations:
reply:
attributes:
- id
+ -
+ name: napi-set
+ doc: Set configurable NAPI instance settings.
+ attribute-set: napi
+ flags: [ admin-perm ]
+ do:
+ request:
+ attributes:
+ - id
+ - defer-hard-irqs
+ - gro-flush-timeout
kernel-family:
headers: [ "linux/list.h"]
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index cacd33359c76..e3ebb49f60d2 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -201,6 +201,7 @@ enum {
NETDEV_CMD_NAPI_GET,
NETDEV_CMD_QSTATS_GET,
NETDEV_CMD_BIND_RX,
+ NETDEV_CMD_NAPI_SET,
__NETDEV_CMD_MAX,
NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index b28424ae06d5..e197bd84997c 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -22,6 +22,10 @@ static const struct netlink_range_validation netdev_a_page_pool_ifindex_range =
.max = 2147483647ULL,
};
+static const struct netlink_range_validation netdev_a_napi_defer_hard_irqs_range = {
+ .max = 2147483647ULL,
+};
+
/* Common nested types */
const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1] = {
[NETDEV_A_PAGE_POOL_ID] = NLA_POLICY_FULL_RANGE(NLA_UINT, &netdev_a_page_pool_id_range),
@@ -87,6 +91,13 @@ static const struct nla_policy netdev_bind_rx_nl_policy[NETDEV_A_DMABUF_FD + 1]
[NETDEV_A_DMABUF_QUEUES] = NLA_POLICY_NESTED(netdev_queue_id_nl_policy),
};
+/* NETDEV_CMD_NAPI_SET - do */
+static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT + 1] = {
+ [NETDEV_A_NAPI_ID] = { .type = NLA_U32, },
+ [NETDEV_A_NAPI_DEFER_HARD_IRQS] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_napi_defer_hard_irqs_range),
+ [NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT] = { .type = NLA_UINT, },
+};
+
/* Ops table for netdev */
static const struct genl_split_ops netdev_nl_ops[] = {
{
@@ -171,6 +182,13 @@ static const struct genl_split_ops netdev_nl_ops[] = {
.maxattr = NETDEV_A_DMABUF_FD,
.flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
},
+ {
+ .cmd = NETDEV_CMD_NAPI_SET,
+ .doit = netdev_nl_napi_set_doit,
+ .policy = netdev_napi_set_nl_policy,
+ .maxattr = NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
};
static const struct genl_multicast_group netdev_nl_mcgrps[] = {
diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h
index 8cda334fd042..e09dd7539ff2 100644
--- a/net/core/netdev-genl-gen.h
+++ b/net/core/netdev-genl-gen.h
@@ -33,6 +33,7 @@ int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info);
+int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info);
enum {
NETDEV_NLGRP_MGMT,
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index ac19f2e6cfbe..b49c3b4e5fbe 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -303,6 +303,51 @@ int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
return err;
}
+static int
+netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info)
+{
+ u64 gro_flush_timeout = 0;
+ u32 defer = 0;
+
+ if (info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]) {
+ defer = nla_get_u32(info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]);
+ napi_set_defer_hard_irqs(napi, defer);
+ }
+
+ if (info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]) {
+ gro_flush_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]);
+ napi_set_gro_flush_timeout(napi, gro_flush_timeout);
+ }
+
+ return 0;
+}
+
+int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct napi_struct *napi;
+ unsigned int napi_id;
+ int err;
+
+ if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID))
+ return -EINVAL;
+
+ napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]);
+
+ rtnl_lock();
+
+ napi = napi_by_id(napi_id);
+ if (napi) {
+ err = netdev_nl_napi_set_config(napi, info);
+ } else {
+ NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]);
+ err = -ENOENT;
+ }
+
+ rtnl_unlock();
+
+ return err;
+}
+
static int
netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
u32 q_idx, u32 q_type, const struct genl_info *info)
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index cacd33359c76..e3ebb49f60d2 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -201,6 +201,7 @@ enum {
NETDEV_CMD_NAPI_GET,
NETDEV_CMD_QSTATS_GET,
NETDEV_CMD_BIND_RX,
+ NETDEV_CMD_NAPI_SET,
__NETDEV_CMD_MAX,
NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
--
2.25.1
On 10/11/24 20:45, Joe Damato wrote: > +int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info) > +{ > + struct napi_struct *napi; > + unsigned int napi_id; > + int err; > + > + if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID)) > + return -EINVAL; > + > + napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]); > + > + rtnl_lock(); > + > + napi = napi_by_id(napi_id); AFAICS the above causes a RCU splat in the selftests: https://netdev-3.bots.linux.dev/vmksft-net-dbg/results/856342/61-busy-poll-test-sh/stderr because napi_by_id() only checks for the RCU lock. Could you please have a look? Thanks! Paolo
On Tue, Nov 12, 2024 at 10:17:40AM +0100, Paolo Abeni wrote: > On 10/11/24 20:45, Joe Damato wrote: > > +int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info) > > +{ > > + struct napi_struct *napi; > > + unsigned int napi_id; > > + int err; > > + > > + if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID)) > > + return -EINVAL; > > + > > + napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]); > > + > > + rtnl_lock(); > > + > > + napi = napi_by_id(napi_id); > > AFAICS the above causes a RCU splat in the selftests: > > https://netdev-3.bots.linux.dev/vmksft-net-dbg/results/856342/61-busy-poll-test-sh/stderr > > because napi_by_id() only checks for the RCU lock. > > Could you please have a look? Thanks for letting me know. I rebuilt my kernel with CONFIG_PROVE_RCU_LIST and a couple other debugging options and I was able to reproduce the splat you mentioned. I took a look and it looks like there might be two things: - netdev_nl_napi_set_doit needs to call rcu_read_lock / rcu_read_unlock, which would be a Fixes on the commit in the series just merged, and - netdev_nl_napi_get_doit also has the same issue and should be fixed in a separate commit with its own fixes tag. If that sounds right to you, I'll propose a short series of 2 patches, 1 to fix each. Let me know if that sounds OK? In the meantime, I'm rebuilding a kernel now to ensure my proposed fix fixes the splat.
On Fri, Oct 11, 2024 at 8:46 PM Joe Damato <jdamato@fastly.com> wrote: > > Add support to set per-NAPI defer_hard_irqs and gro_flush_timeout. > > Signed-off-by: Joe Damato <jdamato@fastly.com> > Reviewed-by: Jakub Kicinski <kuba@kernel.org> Reviewed-by: Eric Dumazet <edumazet@google.com>
© 2016 - 2024 Red Hat, Inc.