We have a number of netlink commands in the ethnl family that may have
multiple objects to dump even for a single net_device, including :
- PLCA, PSE-PD, phy: one message per PHY device
- tsinfo: one message per timestamp source (netdev + phys)
- rss: One per RSS context
To get this behaviour, these netlink commands need to roll a custom
->dumpit().
To prepare making per-netdev DUMP more generic in ethnl, introduce a
member in the ethnl ops to indicate if a given command may allow
pernetdev DUMPs (also referred to as filtered DUMPs).
Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
---
net/ethtool/netlink.c | 45 +++++++++++++++++++++++++++++--------------
net/ethtool/netlink.h | 2 ++
2 files changed, 33 insertions(+), 14 deletions(-)
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index a163d40c6431..7adede5e4ff1 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -587,21 +587,38 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
int ret = 0;
rcu_read_lock();
- for_each_netdev_dump(net, dev, ctx->pos_ifindex) {
- dev_hold(dev);
+ if (ctx->req_info->dev) {
+ dev = ctx->req_info->dev;
rcu_read_unlock();
+ /* Filtered DUMP request targeted to a single netdev. We already
+ * hold a ref to the netdev from ->start()
+ */
+ ret = ethnl_default_dump_one(skb, dev, ctx,
+ genl_info_dump(cb));
+ rcu_read_lock();
+ netdev_put(ctx->req_info->dev, &ctx->req_info->dev_tracker);
- ret = ethnl_default_dump_one(skb, dev, ctx, genl_info_dump(cb));
+ if (ret < 0 && ret != -EOPNOTSUPP && likely(skb->len))
+ ret = skb->len;
- rcu_read_lock();
- dev_put(dev);
+ } else {
+ for_each_netdev_dump(net, dev, ctx->pos_ifindex) {
+ dev_hold(dev);
+ rcu_read_unlock();
+
+ ret = ethnl_default_dump_one(skb, dev, ctx,
+ genl_info_dump(cb));
+
+ rcu_read_lock();
+ dev_put(dev);
- if (ret < 0 && ret != -EOPNOTSUPP) {
- if (likely(skb->len))
- ret = skb->len;
- break;
+ if (ret < 0 && ret != -EOPNOTSUPP) {
+ if (likely(skb->len))
+ ret = skb->len;
+ break;
+ }
+ ret = 0;
}
- ret = 0;
}
rcu_read_unlock();
@@ -635,10 +652,10 @@ static int ethnl_default_start(struct netlink_callback *cb)
}
ret = ethnl_default_parse(req_info, &info->info, ops, false);
- if (req_info->dev) {
- /* We ignore device specification in dump requests but as the
- * same parser as for non-dump (doit) requests is used, it
- * would take reference to the device if it finds one
+ if (req_info->dev && !ops->allow_pernetdev_dump) {
+ /* We ignore device specification in unfiltered dump requests
+ * but as the same parser as for non-dump (doit) requests is
+ * used, it would take reference to the device if it finds one
*/
netdev_put(req_info->dev, &req_info->dev_tracker);
req_info->dev = NULL;
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index ec6ab5443a6f..4aaa73282d6a 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -331,6 +331,7 @@ int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid,
* @req_info_size: size of request info
* @reply_data_size: size of reply data
* @allow_nodev_do: allow non-dump request with no device identification
+ * @allow_pernetdev_dump: allow filtering dump requests with ifname/ifindex
* @set_ntf_cmd: notification to generate on changes (SET)
* @parse_request:
* Parse request except common header (struct ethnl_req_info). Common
@@ -388,6 +389,7 @@ struct ethnl_request_ops {
unsigned int req_info_size;
unsigned int reply_data_size;
bool allow_nodev_do;
+ bool allow_pernetdev_dump;
u8 set_ntf_cmd;
int (*parse_request)(struct ethnl_req_info *req_info,
--
2.48.1
On 3/13/25 7:26 PM, Maxime Chevallier wrote:
> We have a number of netlink commands in the ethnl family that may have
> multiple objects to dump even for a single net_device, including :
>
> - PLCA, PSE-PD, phy: one message per PHY device
> - tsinfo: one message per timestamp source (netdev + phys)
> - rss: One per RSS context
>
> To get this behaviour, these netlink commands need to roll a custom
> ->dumpit().
>
> To prepare making per-netdev DUMP more generic in ethnl, introduce a
> member in the ethnl ops to indicate if a given command may allow
> pernetdev DUMPs (also referred to as filtered DUMPs).
>
> Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
> ---
> net/ethtool/netlink.c | 45 +++++++++++++++++++++++++++++--------------
> net/ethtool/netlink.h | 2 ++
> 2 files changed, 33 insertions(+), 14 deletions(-)
>
> diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
> index a163d40c6431..7adede5e4ff1 100644
> --- a/net/ethtool/netlink.c
> +++ b/net/ethtool/netlink.c
> @@ -587,21 +587,38 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
> int ret = 0;
>
> rcu_read_lock();
Maintain the RCU read lock here is IMHO confusing...
> - for_each_netdev_dump(net, dev, ctx->pos_ifindex) {
> - dev_hold(dev);
> + if (ctx->req_info->dev) {
> + dev = ctx->req_info->dev;
.. as this is refcounted.
I suggest to move the rcu_read_lock inside the if.
> rcu_read_unlock();
> + /* Filtered DUMP request targeted to a single netdev. We already
> + * hold a ref to the netdev from ->start()
> + */
> + ret = ethnl_default_dump_one(skb, dev, ctx,
> + genl_info_dump(cb));
> + rcu_read_lock();
> + netdev_put(ctx->req_info->dev, &ctx->req_info->dev_tracker);
>
> - ret = ethnl_default_dump_one(skb, dev, ctx, genl_info_dump(cb));
> + if (ret < 0 && ret != -EOPNOTSUPP && likely(skb->len))
> + ret = skb->len;
>
> - rcu_read_lock();
> - dev_put(dev);
> + } else {
> + for_each_netdev_dump(net, dev, ctx->pos_ifindex) {
> + dev_hold(dev);
> + rcu_read_unlock();
> +
> + ret = ethnl_default_dump_one(skb, dev, ctx,
> + genl_info_dump(cb));
> +
> + rcu_read_lock();
> + dev_put(dev);
>
> - if (ret < 0 && ret != -EOPNOTSUPP) {
> - if (likely(skb->len))
> - ret = skb->len;
> - break;
> + if (ret < 0 && ret != -EOPNOTSUPP) {
> + if (likely(skb->len))
> + ret = skb->len;
IMHO a bit too many levels of indentation. It's possibly better to move
this code in a separate helper.
Thanks,
Paolo
On Fri, 21 Mar 2025 17:31:17 +0100
Paolo Abeni <pabeni@redhat.com> wrote:
> On 3/13/25 7:26 PM, Maxime Chevallier wrote:
> > We have a number of netlink commands in the ethnl family that may have
> > multiple objects to dump even for a single net_device, including :
> >
> > - PLCA, PSE-PD, phy: one message per PHY device
> > - tsinfo: one message per timestamp source (netdev + phys)
> > - rss: One per RSS context
> >
> > To get this behaviour, these netlink commands need to roll a custom
> > ->dumpit().
> >
> > To prepare making per-netdev DUMP more generic in ethnl, introduce a
> > member in the ethnl ops to indicate if a given command may allow
> > pernetdev DUMPs (also referred to as filtered DUMPs).
> >
> > Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
> > ---
> > net/ethtool/netlink.c | 45 +++++++++++++++++++++++++++++--------------
> > net/ethtool/netlink.h | 2 ++
> > 2 files changed, 33 insertions(+), 14 deletions(-)
> >
> > diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
> > index a163d40c6431..7adede5e4ff1 100644
> > --- a/net/ethtool/netlink.c
> > +++ b/net/ethtool/netlink.c
> > @@ -587,21 +587,38 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
> > int ret = 0;
> >
> > rcu_read_lock();
>
> Maintain the RCU read lock here is IMHO confusing...
>
> > - for_each_netdev_dump(net, dev, ctx->pos_ifindex) {
> > - dev_hold(dev);
> > + if (ctx->req_info->dev) {
> > + dev = ctx->req_info->dev;
>
> .. as this is refcounted.
>
> I suggest to move the rcu_read_lock inside the if.
Indeed, maybe not the best place indeed. I'll address that, thanks for
pointing this out
>
> > rcu_read_unlock();
> > + /* Filtered DUMP request targeted to a single netdev. We already
> > + * hold a ref to the netdev from ->start()
> > + */
> > + ret = ethnl_default_dump_one(skb, dev, ctx,
> > + genl_info_dump(cb));
> > + rcu_read_lock();
> > + netdev_put(ctx->req_info->dev, &ctx->req_info->dev_tracker);
> >
> > - ret = ethnl_default_dump_one(skb, dev, ctx, genl_info_dump(cb));
> > + if (ret < 0 && ret != -EOPNOTSUPP && likely(skb->len))
> > + ret = skb->len;
> >
> > - rcu_read_lock();
> > - dev_put(dev);
> > + } else {
> > + for_each_netdev_dump(net, dev, ctx->pos_ifindex) {
> > + dev_hold(dev);
> > + rcu_read_unlock();
> > +
> > + ret = ethnl_default_dump_one(skb, dev, ctx,
> > + genl_info_dump(cb));
> > +
> > + rcu_read_lock();
> > + dev_put(dev);
> >
> > - if (ret < 0 && ret != -EOPNOTSUPP) {
> > - if (likely(skb->len))
> > - ret = skb->len;
> > - break;
> > + if (ret < 0 && ret != -EOPNOTSUPP) {
> > + if (likely(skb->len))
> > + ret = skb->len;
>
> IMHO a bit too many levels of indentation. It's possibly better to move
> this code in a separate helper.
That's true, not the prettiest piece of that patch. I'll refactor this
better then.
Thanks for the review,
Maxime
© 2016 - 2025 Red Hat, Inc.