[PATCH net-next 03/10] devlink: Serialize access to rate domains

Tariq Toukan posted 10 patches 10 months, 1 week ago
[PATCH net-next 03/10] devlink: Serialize access to rate domains
Posted by Tariq Toukan 10 months, 1 week ago
From: Cosmin Ratiu <cratiu@nvidia.com>

Access to rates in a rate domain should be serialized.

This commit introduces two new functions, devl_rate_domain_lock and
devl_rate_domain_unlock, and uses them whenever serial access to a rate
domain is needed. For now, they are no-ops since access to a rate domain
is protected by the devlink lock.

Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Carolina Jubran <cjubran@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
 net/devlink/devl_internal.h |   4 ++
 net/devlink/rate.c          | 114 +++++++++++++++++++++++++++---------
 2 files changed, 89 insertions(+), 29 deletions(-)

diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index 209b4a4c7070..fae81dd6953f 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -121,6 +121,10 @@ static inline void devl_dev_unlock(struct devlink *devlink, bool dev_lock)
 		device_unlock(devlink->dev);
 }
 
+static inline void devl_rate_domain_lock(struct devlink *devlink) { }
+
+static inline void devl_rate_domain_unlock(struct devlink *devlink) { }
+
 typedef void devlink_rel_notify_cb_t(struct devlink *devlink, u32 obj_index);
 typedef void devlink_rel_cleanup_cb_t(struct devlink *devlink, u32 obj_index,
 				      u32 rel_index);
diff --git a/net/devlink/rate.c b/net/devlink/rate.c
index 535863bb0c17..54e6a9893e3d 100644
--- a/net/devlink/rate.c
+++ b/net/devlink/rate.c
@@ -167,18 +167,22 @@ void devlink_rates_notify_register(struct devlink *devlink)
 {
 	struct devlink_rate *rate_node;
 
+	devl_rate_domain_lock(devlink);
 	list_for_each_entry(rate_node, &devlink->rate_domain->rate_list, list)
 		if (rate_node->devlink == devlink)
 			devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+	devl_rate_domain_unlock(devlink);
 }
 
 void devlink_rates_notify_unregister(struct devlink *devlink)
 {
 	struct devlink_rate *rate_node;
 
+	devl_rate_domain_lock(devlink);
 	list_for_each_entry_reverse(rate_node, &devlink->rate_domain->rate_list, list)
 		if (rate_node->devlink == devlink)
 			devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
+	devl_rate_domain_unlock(devlink);
 }
 
 static int
@@ -190,6 +194,7 @@ devlink_nl_rate_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
 	int idx = 0;
 	int err = 0;
 
+	devl_rate_domain_lock(devlink);
 	list_for_each_entry(devlink_rate, &devlink->rate_domain->rate_list, list) {
 		enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
 		u32 id = NETLINK_CB(cb->skb).portid;
@@ -209,6 +214,7 @@ devlink_nl_rate_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
 		}
 		idx++;
 	}
+	devl_rate_domain_unlock(devlink);
 
 	return err;
 }
@@ -225,23 +231,33 @@ int devlink_nl_rate_get_doit(struct sk_buff *skb, struct genl_info *info)
 	struct sk_buff *msg;
 	int err;
 
+	devl_rate_domain_lock(devlink);
 	devlink_rate = devlink_rate_get_from_info(devlink, info);
-	if (IS_ERR(devlink_rate))
-		return PTR_ERR(devlink_rate);
+	if (IS_ERR(devlink_rate)) {
+		err = PTR_ERR(devlink_rate);
+		goto unlock;
+	}
 
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
+	if (!msg) {
+		err = -ENOMEM;
+		goto unlock;
+	}
 
 	err = devlink_nl_rate_fill(msg, devlink_rate, DEVLINK_CMD_RATE_NEW,
 				   info->snd_portid, info->snd_seq, 0,
 				   info->extack);
-	if (err) {
-		nlmsg_free(msg);
-		return err;
-	}
+	if (err)
+		goto err_fill;
 
+	devl_rate_domain_unlock(devlink);
 	return genlmsg_reply(msg, info);
+
+err_fill:
+	nlmsg_free(msg);
+unlock:
+	devl_rate_domain_unlock(devlink);
+	return err;
 }
 
 static bool
@@ -470,18 +486,24 @@ int devlink_nl_rate_set_doit(struct sk_buff *skb, struct genl_info *info)
 	const struct devlink_ops *ops;
 	int err;
 
+	devl_rate_domain_lock(devlink);
 	devlink_rate = devlink_rate_get_from_info(devlink, info);
-	if (IS_ERR(devlink_rate))
-		return PTR_ERR(devlink_rate);
+	if (IS_ERR(devlink_rate)) {
+		err = PTR_ERR(devlink_rate);
+		goto unlock;
+	}
 
 	ops = devlink->ops;
-	if (!ops || !devlink_rate_set_ops_supported(ops, info, devlink_rate->type))
-		return -EOPNOTSUPP;
+	if (!ops || !devlink_rate_set_ops_supported(ops, info, devlink_rate->type)) {
+		err = -EOPNOTSUPP;
+		goto unlock;
+	}
 
 	err = devlink_nl_rate_set(devlink_rate, ops, info);
-
 	if (!err)
 		devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
+unlock:
+	devl_rate_domain_unlock(devlink);
 	return err;
 }
 
@@ -501,15 +523,21 @@ int devlink_nl_rate_new_doit(struct sk_buff *skb, struct genl_info *info)
 	if (!devlink_rate_set_ops_supported(ops, info, DEVLINK_RATE_TYPE_NODE))
 		return -EOPNOTSUPP;
 
+	devl_rate_domain_lock(devlink);
 	rate_node = devlink_rate_node_get_from_attrs(devlink, info->attrs);
-	if (!IS_ERR(rate_node))
-		return -EEXIST;
-	else if (rate_node == ERR_PTR(-EINVAL))
-		return -EINVAL;
+	if (!IS_ERR(rate_node)) {
+		err = -EEXIST;
+		goto unlock;
+	} else if (rate_node == ERR_PTR(-EINVAL)) {
+		err = -EINVAL;
+		goto unlock;
+	}
 
 	rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL);
-	if (!rate_node)
-		return -ENOMEM;
+	if (!rate_node) {
+		err = -ENOMEM;
+		goto unlock;
+	}
 
 	rate_node->devlink = devlink;
 	rate_node->type = DEVLINK_RATE_TYPE_NODE;
@@ -530,6 +558,7 @@ int devlink_nl_rate_new_doit(struct sk_buff *skb, struct genl_info *info)
 	refcount_set(&rate_node->refcnt, 1);
 	list_add(&rate_node->list, &devlink->rate_domain->rate_list);
 	devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+	devl_rate_domain_unlock(devlink);
 	return 0;
 
 err_rate_set:
@@ -538,6 +567,8 @@ int devlink_nl_rate_new_doit(struct sk_buff *skb, struct genl_info *info)
 	kfree(rate_node->name);
 err_strdup:
 	kfree(rate_node);
+unlock:
+	devl_rate_domain_unlock(devlink);
 	return err;
 }
 
@@ -547,13 +578,17 @@ int devlink_nl_rate_del_doit(struct sk_buff *skb, struct genl_info *info)
 	struct devlink_rate *rate_node;
 	int err;
 
+	devl_rate_domain_lock(devlink);
 	rate_node = devlink_rate_node_get_from_info(devlink, info);
-	if (IS_ERR(rate_node))
-		return PTR_ERR(rate_node);
+	if (IS_ERR(rate_node)) {
+		err = PTR_ERR(rate_node);
+		goto unlock;
+	}
 
 	if (refcount_read(&rate_node->refcnt) > 1) {
 		NL_SET_ERR_MSG(info->extack, "Node has children. Cannot delete node.");
-		return -EBUSY;
+		err = -EBUSY;
+		goto unlock;
 	}
 
 	devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
@@ -564,20 +599,26 @@ int devlink_nl_rate_del_doit(struct sk_buff *skb, struct genl_info *info)
 	list_del(&rate_node->list);
 	kfree(rate_node->name);
 	kfree(rate_node);
+unlock:
+	devl_rate_domain_unlock(devlink);
 	return err;
 }
 
 int devlink_rate_nodes_check(struct devlink *devlink, struct netlink_ext_ack *extack)
 {
 	struct devlink_rate *devlink_rate;
+	int err = 0;
 
+	devl_rate_domain_lock(devlink);
 	list_for_each_entry(devlink_rate, &devlink->rate_domain->rate_list, list)
 		if (devlink_rate->devlink == devlink &&
 		    devlink_rate_is_node(devlink_rate)) {
 			NL_SET_ERR_MSG(extack, "Rate node(s) exists.");
-			return -EBUSY;
+			err = -EBUSY;
+			break;
 		}
-	return 0;
+	devl_rate_domain_unlock(devlink);
+	return err;
 }
 
 /**
@@ -595,13 +636,19 @@ devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name,
 {
 	struct devlink_rate *rate_node;
 
+	devl_rate_domain_lock(devlink);
+
 	rate_node = devlink_rate_node_get_by_name(devlink, node_name);
-	if (!IS_ERR(rate_node))
-		return ERR_PTR(-EEXIST);
+	if (!IS_ERR(rate_node)) {
+		rate_node = ERR_PTR(-EEXIST);
+		goto unlock;
+	}
 
 	rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL);
-	if (!rate_node)
-		return ERR_PTR(-ENOMEM);
+	if (!rate_node) {
+		rate_node = ERR_PTR(-ENOMEM);
+		goto unlock;
+	}
 
 	if (parent) {
 		rate_node->parent = parent;
@@ -615,12 +662,15 @@ devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name,
 	rate_node->name = kstrdup(node_name, GFP_KERNEL);
 	if (!rate_node->name) {
 		kfree(rate_node);
-		return ERR_PTR(-ENOMEM);
+		rate_node = ERR_PTR(-ENOMEM);
+		goto unlock;
 	}
 
 	refcount_set(&rate_node->refcnt, 1);
 	list_add(&rate_node->list, &devlink->rate_domain->rate_list);
 	devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+unlock:
+	devl_rate_domain_unlock(devlink);
 	return rate_node;
 }
 EXPORT_SYMBOL_GPL(devl_rate_node_create);
@@ -648,6 +698,7 @@ int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv,
 	if (!devlink_rate)
 		return -ENOMEM;
 
+	devl_rate_domain_lock(devlink);
 	if (parent) {
 		devlink_rate->parent = parent;
 		refcount_inc(&devlink_rate->parent->refcnt);
@@ -660,6 +711,7 @@ int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv,
 	list_add_tail(&devlink_rate->list, &devlink->rate_domain->rate_list);
 	devlink_port->devlink_rate = devlink_rate;
 	devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
+	devl_rate_domain_unlock(devlink);
 
 	return 0;
 }
@@ -680,11 +732,13 @@ void devl_rate_leaf_destroy(struct devlink_port *devlink_port)
 	if (!devlink_rate)
 		return;
 
+	devl_rate_domain_lock(devlink_port->devlink);
 	devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
 	if (devlink_rate->parent)
 		refcount_dec(&devlink_rate->parent->refcnt);
 	list_del(&devlink_rate->list);
 	devlink_port->devlink_rate = NULL;
+	devl_rate_domain_unlock(devlink_port->devlink);
 	kfree(devlink_rate);
 }
 EXPORT_SYMBOL_GPL(devl_rate_leaf_destroy);
@@ -702,6 +756,7 @@ void devl_rate_nodes_destroy(struct devlink *devlink)
 	const struct devlink_ops *ops = devlink->ops;
 
 	devl_assert_locked(devlink);
+	devl_rate_domain_lock(devlink);
 
 	list_for_each_entry(devlink_rate, &devlink->rate_domain->rate_list, list) {
 		if (!devlink_rate->parent || devlink_rate->devlink != devlink)
@@ -723,5 +778,6 @@ void devl_rate_nodes_destroy(struct devlink *devlink)
 			kfree(devlink_rate);
 		}
 	}
+	devl_rate_domain_unlock(devlink);
 }
 EXPORT_SYMBOL_GPL(devl_rate_nodes_destroy);
-- 
2.45.0
Re: [PATCH net-next 03/10] devlink: Serialize access to rate domains
Posted by Jiri Pirko 10 months, 1 week ago
Thu, Feb 13, 2025 at 07:01:27PM +0100, tariqt@nvidia.com wrote:
>From: Cosmin Ratiu <cratiu@nvidia.com>
>
>Access to rates in a rate domain should be serialized.
>
>This commit introduces two new functions, devl_rate_domain_lock and
>devl_rate_domain_unlock, and uses them whenever serial access to a rate
>domain is needed. For now, they are no-ops since access to a rate domain
>is protected by the devlink lock.
>
>Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
>Reviewed-by: Carolina Jubran <cjubran@nvidia.com>
>Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
>---
> net/devlink/devl_internal.h |   4 ++
> net/devlink/rate.c          | 114 +++++++++++++++++++++++++++---------
> 2 files changed, 89 insertions(+), 29 deletions(-)
>
>diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
>index 209b4a4c7070..fae81dd6953f 100644
>--- a/net/devlink/devl_internal.h
>+++ b/net/devlink/devl_internal.h
>@@ -121,6 +121,10 @@ static inline void devl_dev_unlock(struct devlink *devlink, bool dev_lock)
> 		device_unlock(devlink->dev);
> }
> 
>+static inline void devl_rate_domain_lock(struct devlink *devlink) { }
>+
>+static inline void devl_rate_domain_unlock(struct devlink *devlink) { }


For the record, I'm still not convinced that introducing this kind of
shared inter-devlink lock is good idea. We spent quite a bit of painful
times getting rid of global devlink_mutex and making devlink locking
scheme nice and simple as it currently is.

But at the same time I admit I can't think of any other nicer solution
to the problem this patchset is trying to solve.

Jakub, any thoughts?
Re: [PATCH net-next 03/10] devlink: Serialize access to rate domains
Posted by Jakub Kicinski 10 months ago
On Fri, 14 Feb 2025 13:54:43 +0100 Jiri Pirko wrote:
> For the record, I'm still not convinced that introducing this kind of
> shared inter-devlink lock is good idea. We spent quite a bit of painful
> times getting rid of global devlink_mutex and making devlink locking
> scheme nice and simple as it currently is.
> 
> But at the same time I admit I can't think of any other nicer solution
> to the problem this patchset is trying to solve.
> 
> Jakub, any thoughts?

The problem comes from having a devlink instance per function /
port rather than for the ASIC. Spawn a single instance and the
problem will go away 🤷️

I think we talked about this multiple times, I think at least
once with Jake, too. Not that I remember all the details now..
-- 
pw-bot: cr
Re: [PATCH net-next 03/10] devlink: Serialize access to rate domains
Posted by Jiri Pirko 9 months, 3 weeks ago
Wed, Feb 19, 2025 at 03:21:30AM +0100, kuba@kernel.org wrote:
>On Fri, 14 Feb 2025 13:54:43 +0100 Jiri Pirko wrote:
>> For the record, I'm still not convinced that introducing this kind of
>> shared inter-devlink lock is good idea. We spent quite a bit of painful
>> times getting rid of global devlink_mutex and making devlink locking
>> scheme nice and simple as it currently is.
>> 
>> But at the same time I admit I can't think of any other nicer solution
>> to the problem this patchset is trying to solve.
>> 
>> Jakub, any thoughts?
>
>The problem comes from having a devlink instance per function /
>port rather than for the ASIC. Spawn a single instance and the
>problem will go away 🤷️

Yeah, we currently have VF devlink ports created under PF devlink instance.
That is aligned with PCI geometry. If we have a single per-ASIC parent
devlink, this does not change and we still need to configure cross
PF devlink instances.

The only benefit I see is that we don't need rate domain, but
we can use parent devlink instance lock instead. The locking ordering
might be a bit tricky to fix though.


>
>I think we talked about this multiple times, I think at least
>once with Jake, too. Not that I remember all the details now..
>-- 
>pw-bot: cr
Re: [PATCH net-next 03/10] devlink: Serialize access to rate domains
Posted by Jakub Kicinski 9 months, 3 weeks ago
On Tue, 25 Feb 2025 14:36:07 +0100 Jiri Pirko wrote:
> >The problem comes from having a devlink instance per function /
> >port rather than for the ASIC. Spawn a single instance and the
> >problem will go away 🤷️  
> 
> Yeah, we currently have VF devlink ports created under PF devlink instance.
> That is aligned with PCI geometry. If we have a single per-ASIC parent
> devlink, this does not change and we still need to configure cross
> PF devlink instances.

Why would there still be PF instances? I'm not suggesting that you
create a hierarchy of instances.

> The only benefit I see is that we don't need rate domain, but
> we can use parent devlink instance lock instead. The locking ordering
> might be a bit tricky to fix though.
Re: [PATCH net-next 03/10] devlink: Serialize access to rate domains
Posted by Jiri Pirko 9 months, 3 weeks ago
Wed, Feb 26, 2025 at 02:40:05AM +0100, kuba@kernel.org wrote:
>On Tue, 25 Feb 2025 14:36:07 +0100 Jiri Pirko wrote:
>> >The problem comes from having a devlink instance per function /
>> >port rather than for the ASIC. Spawn a single instance and the
>> >problem will go away 🤷️  
>> 
>> Yeah, we currently have VF devlink ports created under PF devlink instance.
>> That is aligned with PCI geometry. If we have a single per-ASIC parent
>> devlink, this does not change and we still need to configure cross
>> PF devlink instances.
>
>Why would there still be PF instances? I'm not suggesting that you
>create a hierarchy of instances.

I'm not sure how you imagine getting rid of them. One PCI PF
instantiates one devlink now. There are lots of configuration (e.g. params)
that is per-PF. You need this instance for that, how else would you do
per-PF things on shared ASIC instance?
Creating SFs is per-PF operation for example. I didn't to thorough
analysis, but I'm sure there are couple of per-PF things like these.
Also not breaking the existing users may be an argument to keep per-PF
instances.

>
>> The only benefit I see is that we don't need rate domain, but
>> we can use parent devlink instance lock instead. The locking ordering
>> might be a bit tricky to fix though.
Re: [PATCH net-next 03/10] devlink: Serialize access to rate domains
Posted by Jakub Kicinski 9 months, 3 weeks ago
On Wed, 26 Feb 2025 15:44:35 +0100 Jiri Pirko wrote:
> > Why would there still be PF instances? I'm not suggesting that you
> > create a hierarchy of instances.  
> 
> I'm not sure how you imagine getting rid of them. One PCI PF
> instantiates one devlink now. There are lots of configuration (e.g. params)
> that is per-PF. You need this instance for that, how else would you do
> per-PF things on shared ASIC instance?

There are per-PF ports, right?

> Creating SFs is per-PF operation for example. I didn't to thorough
> analysis, but I'm sure there are couple of per-PF things like these.

Seems like adding a port attribute to SF creation would be a much
smaller extension than adding a layer of objects.

> Also not breaking the existing users may be an argument to keep per-PF
> instances.

We're talking about multi-PF devices only. Besides pretty sure we 
moved multiple params and health reporters to be per port, so IDK 
what changed now.
Re: [PATCH net-next 03/10] devlink: Serialize access to rate domains
Posted by Jiri Pirko 9 months, 3 weeks ago
Thu, Feb 27, 2025 at 03:53:10AM +0100, kuba@kernel.org wrote:
>On Wed, 26 Feb 2025 15:44:35 +0100 Jiri Pirko wrote:
>> > Why would there still be PF instances? I'm not suggesting that you
>> > create a hierarchy of instances.  
>> 
>> I'm not sure how you imagine getting rid of them. One PCI PF
>> instantiates one devlink now. There are lots of configuration (e.g. params)
>> that is per-PF. You need this instance for that, how else would you do
>> per-PF things on shared ASIC instance?
>
>There are per-PF ports, right?

Depends. On normal host sr-iov, no. On smartnic where you have PF in
host, yes.


>
>> Creating SFs is per-PF operation for example. I didn't to thorough
>> analysis, but I'm sure there are couple of per-PF things like these.
>
>Seems like adding a port attribute to SF creation would be a much
>smaller extension than adding a layer of objects.
>
>> Also not breaking the existing users may be an argument to keep per-PF
>> instances.
>
>We're talking about multi-PF devices only. Besides pretty sure we 
>moved multiple params and health reporters to be per port, so IDK 
>what changed now.

Looks like pretty much all current NICs are multi-PFs, aren't they?
Re: [PATCH net-next 03/10] devlink: Serialize access to rate domains
Posted by Jakub Kicinski 9 months, 2 weeks ago
On Thu, 27 Feb 2025 13:22:25 +0100 Jiri Pirko wrote:
> >> I'm not sure how you imagine getting rid of them. One PCI PF
> >> instantiates one devlink now. There are lots of configuration (e.g. params)
> >> that is per-PF. You need this instance for that, how else would you do
> >> per-PF things on shared ASIC instance?  
> >
> >There are per-PF ports, right?  
> 
> Depends. On normal host sr-iov, no. On smartnic where you have PF in
> host, yes.

Yet another "great choice" in mlx5 other drivers have foreseen
problems with and avoided.

> >> Creating SFs is per-PF operation for example. I didn't to thorough
> >> analysis, but I'm sure there are couple of per-PF things like these.  
> >
> >Seems like adding a port attribute to SF creation would be a much
> >smaller extension than adding a layer of objects.
> >  
> >> Also not breaking the existing users may be an argument to keep per-PF
> >> instances.  
> >
> >We're talking about multi-PF devices only. Besides pretty sure we 
> >moved multiple params and health reporters to be per port, so IDK 
> >what changed now.  
> 
> Looks like pretty much all current NICs are multi-PFs, aren't they?

Not in a way which requires cross-port state sharing, no.
You should know this.
Re: [PATCH net-next 03/10] devlink: Serialize access to rate domains
Posted by Jiri Pirko 9 months, 2 weeks ago
Mon, Mar 03, 2025 at 11:06:23PM +0100, kuba@kernel.org wrote:
>On Thu, 27 Feb 2025 13:22:25 +0100 Jiri Pirko wrote:
>> >> I'm not sure how you imagine getting rid of them. One PCI PF
>> >> instantiates one devlink now. There are lots of configuration (e.g. params)
>> >> that is per-PF. You need this instance for that, how else would you do
>> >> per-PF things on shared ASIC instance?  
>> >
>> >There are per-PF ports, right?  
>> 
>> Depends. On normal host sr-iov, no. On smartnic where you have PF in
>> host, yes.
>
>Yet another "great choice" in mlx5 other drivers have foreseen
>problems with and avoided.

What do you mean? How else to model it? Do you suggest having PF devlink
port for the PF that instantiates? That would sound like Uroboros to me.


>
>> >> Creating SFs is per-PF operation for example. I didn't to thorough
>> >> analysis, but I'm sure there are couple of per-PF things like these.  
>> >
>> >Seems like adding a port attribute to SF creation would be a much
>> >smaller extension than adding a layer of objects.
>> >  
>> >> Also not breaking the existing users may be an argument to keep per-PF
>> >> instances.  
>> >
>> >We're talking about multi-PF devices only. Besides pretty sure we 
>> >moved multiple params and health reporters to be per port, so IDK 
>> >what changed now.  
>> 
>> Looks like pretty much all current NICs are multi-PFs, aren't they?
>
>Not in a way which requires cross-port state sharing, no.
>You should know this.

This is not about cross-port state sharing. This is about per-PF
configuration. What am I missing?
Re: [PATCH net-next 03/10] devlink: Serialize access to rate domains
Posted by Jakub Kicinski 9 months, 2 weeks ago
On Tue, 4 Mar 2025 14:11:40 +0100 Jiri Pirko wrote:
> Mon, Mar 03, 2025 at 11:06:23PM +0100, kuba@kernel.org wrote:
> >On Thu, 27 Feb 2025 13:22:25 +0100 Jiri Pirko wrote:  
> >> Depends. On normal host sr-iov, no. On smartnic where you have PF in
> >> host, yes.  
> >
> >Yet another "great choice" in mlx5 other drivers have foreseen
> >problems with and avoided.  
> 
> What do you mean? How else to model it? Do you suggest having PF devlink
> port for the PF that instantiates? That would sound like Uroboros to me.

I reckon it was always more obvious to those of us working on
NPU-derived devices, to which a PCIe port is just a PCIe port,
with no PCIe<>MAC "pipeline" to speak of.

The reason why having the "PF port" is a good idea is exactly
why we're having this conversation. If you don't you'll assign
to the global scope attributes which are really just port attributes.

> >> Looks like pretty much all current NICs are multi-PFs, aren't they?  
> >
> >Not in a way which requires cross-port state sharing, no.
> >You should know this.  
> 
> This is not about cross-port state sharing. This is about per-PF
> configuration. What am I missing?

Maybe we lost the thread of the conversation.. :)
I'm looking at the next patch in this series and it says:

  devlink: Introduce shared rate domains

  The underlying idea is modeling a piece of hardware which:
  1. Exposes multiple functions as separate devlink objects.
  2. Is capable of instantiating a transmit scheduling tree spanning
     multiple functions.

  Modeling this requires devlink rate nodes with parents across other
  devlink objects.

Are these domains are not cross port?
Re: [PATCH net-next 03/10] devlink: Serialize access to rate domains
Posted by Jiri Pirko 9 months, 2 weeks ago
Wed, Mar 05, 2025 at 01:04:12AM +0100, kuba@kernel.org wrote:
>On Tue, 4 Mar 2025 14:11:40 +0100 Jiri Pirko wrote:
>> Mon, Mar 03, 2025 at 11:06:23PM +0100, kuba@kernel.org wrote:
>> >On Thu, 27 Feb 2025 13:22:25 +0100 Jiri Pirko wrote:  
>> >> Depends. On normal host sr-iov, no. On smartnic where you have PF in
>> >> host, yes.  
>> >
>> >Yet another "great choice" in mlx5 other drivers have foreseen
>> >problems with and avoided.  
>> 
>> What do you mean? How else to model it? Do you suggest having PF devlink
>> port for the PF that instantiates? That would sound like Uroboros to me.
>
>I reckon it was always more obvious to those of us working on
>NPU-derived devices, to which a PCIe port is just a PCIe port,
>with no PCIe<>MAC "pipeline" to speak of.
>
>The reason why having the "PF port" is a good idea is exactly
>why we're having this conversation. If you don't you'll assign
>to the global scope attributes which are really just port attributes.

Well, we have devlink port for uplink for this purpose. Why isn't that
enough?


>
>> >> Looks like pretty much all current NICs are multi-PFs, aren't they?  
>> >
>> >Not in a way which requires cross-port state sharing, no.
>> >You should know this.  
>> 
>> This is not about cross-port state sharing. This is about per-PF
>> configuration. What am I missing?
>
>Maybe we lost the thread of the conversation.. :)
>I'm looking at the next patch in this series and it says:
>
>  devlink: Introduce shared rate domains
>
>  The underlying idea is modeling a piece of hardware which:
>  1. Exposes multiple functions as separate devlink objects.
>  2. Is capable of instantiating a transmit scheduling tree spanning
>     multiple functions.
>
>  Modeling this requires devlink rate nodes with parents across other
>  devlink objects.
>
>Are these domains are not cross port?

Sure. Cross PF even. What I suggest is, if we have devlink instance of
which these 2 PFs are nested, we have this "domain" explicitly defined
and we don't need any other construct.