[PATCH] net: ethtool: fix unheld rtnl lock

Diogo Jahchan Koike posted 1 patch 1 year, 3 months ago
There is a newer version of this series
net/ethtool/pse-pd.c | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
[PATCH] net: ethtool: fix unheld rtnl lock
Posted by Diogo Jahchan Koike 1 year, 3 months ago
ethnl_req_get_phydev should be called with rtnl lock
held.

Reported-by: syzbot+ec369e6d58e210135f71@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=ec369e6d58e210135f71
Fixes: 31748765bed3 ("net: ethtool: pse-pd: Target the command to the requested PHY")
Signed-off-by: Diogo Jahchan Koike <djahchankoike@gmail.com>
---
 net/ethtool/pse-pd.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c
index 507cb21d6bf0..290edbfd47d2 100644
--- a/net/ethtool/pse-pd.c
+++ b/net/ethtool/pse-pd.c
@@ -226,17 +226,21 @@ ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
 {
 	struct nlattr **tb = info->attrs;
 	struct phy_device *phydev;
+	int ret = 1;
 
+	rtnl_lock();
 	phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PSE_HEADER],
 				      info->extack);
 	if (IS_ERR_OR_NULL(phydev)) {
 		NL_SET_ERR_MSG(info->extack, "No PHY is attached");
-		return -EOPNOTSUPP;
+		ret = -EOPNOTSUPP;
+		goto out;
 	}
 
 	if (!phydev->psec) {
 		NL_SET_ERR_MSG(info->extack, "No PSE is attached");
-		return -EOPNOTSUPP;
+		ret = -EOPNOTSUPP;
+		goto out;
 	}
 
 	if (tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL] &&
@@ -244,17 +248,21 @@ ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
 		NL_SET_ERR_MSG_ATTR(info->extack,
 				    tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL],
 				    "setting PoDL PSE admin control not supported");
-		return -EOPNOTSUPP;
+		ret = -EOPNOTSUPP;
+		goto out;
 	}
 	if (tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL] &&
 	    !pse_has_c33(phydev->psec)) {
 		NL_SET_ERR_MSG_ATTR(info->extack,
 				    tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL],
 				    "setting C33 PSE admin control not supported");
-		return -EOPNOTSUPP;
+		ret = -EOPNOTSUPP;
+		goto out;
 	}
 
-	return 1;
+out:
+	rtnl_unlock();
+	return ret;
 }
 
 static int
-- 
2.43.0
Re: [PATCH] net: ethtool: fix unheld rtnl lock
Posted by Przemek Kitszel 1 year, 3 months ago
On 8/26/24 15:06, Diogo Jahchan Koike wrote:
> ethnl_req_get_phydev should be called with rtnl lock
> held.

Next time please make use of more columns in commit message (75).

> 
> Reported-by: syzbot+ec369e6d58e210135f71@syzkaller.appspotmail.com
> Closes: https://syzkaller.appspot.com/bug?extid=ec369e6d58e210135f71
> Fixes: 31748765bed3 ("net: ethtool: pse-pd: Target the command to the requested PHY")
> Signed-off-by: Diogo Jahchan Koike <djahchankoike@gmail.com>
> ---
>   net/ethtool/pse-pd.c | 18 +++++++++++++-----
>   1 file changed, 13 insertions(+), 5 deletions(-)
> 
> diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c
> index 507cb21d6bf0..290edbfd47d2 100644
> --- a/net/ethtool/pse-pd.c
> +++ b/net/ethtool/pse-pd.c
> @@ -226,17 +226,21 @@ ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
>   {
>   	struct nlattr **tb = info->attrs;
>   	struct phy_device *phydev;
> +	int ret = 1;
>   
> +	rtnl_lock();
>   	phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PSE_HEADER],
>   				      info->extack);

Fix looks fine, thanks, I will however unlock just after
ethnl_req_get_phydev() call.
Then all ret code logic touches could be omitted.

>   	if (IS_ERR_OR_NULL(phydev)) {
>   		NL_SET_ERR_MSG(info->extack, "No PHY is attached");
> -		return -EOPNOTSUPP;
> +		ret = -EOPNOTSUPP;
> +		goto out;
>   	}
>   
>   	if (!phydev->psec) {
>   		NL_SET_ERR_MSG(info->extack, "No PSE is attached");
> -		return -EOPNOTSUPP;
> +		ret = -EOPNOTSUPP;
> +		goto out;
>   	}
>   
>   	if (tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL] &&
> @@ -244,17 +248,21 @@ ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
>   		NL_SET_ERR_MSG_ATTR(info->extack,
>   				    tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL],
>   				    "setting PoDL PSE admin control not supported");
> -		return -EOPNOTSUPP;
> +		ret = -EOPNOTSUPP;
> +		goto out;
>   	}
>   	if (tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL] &&
>   	    !pse_has_c33(phydev->psec)) {
>   		NL_SET_ERR_MSG_ATTR(info->extack,
>   				    tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL],
>   				    "setting C33 PSE admin control not supported");
> -		return -EOPNOTSUPP;
> +		ret = -EOPNOTSUPP;
> +		goto out;
>   	}
>   
> -	return 1;
> +out:
> +	rtnl_unlock();
> +	return ret;
>   }
>   
>   static int
[patch net-next v3] net: ethtool: fix unheld rtnl lock
Posted by Diogo Jahchan Koike 1 year, 3 months ago
ethnl_req_get_phydev should be called with rtnl lock held.

Reported-by: syzbot+ec369e6d58e210135f71@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=ec369e6d58e210135f71
Fixes: 31748765bed3 ("net: ethtool: pse-pd: Target the command to the requested PHY")
Signed-off-by: Diogo Jahchan Koike <djahchankoike@gmail.com>
---
 net/ethtool/pse-pd.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c
index 507cb21d6bf0..290edbfd47d2 100644
--- a/net/ethtool/pse-pd.c
+++ b/net/ethtool/pse-pd.c
@@ -226,17 +226,21 @@ ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
 {
 	struct nlattr **tb = info->attrs;
 	struct phy_device *phydev;
+	int ret = 1;
 
+	rtnl_lock();
 	phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PSE_HEADER],
 				      info->extack);
 	if (IS_ERR_OR_NULL(phydev)) {
 		NL_SET_ERR_MSG(info->extack, "No PHY is attached");
-		return -EOPNOTSUPP;
+		ret = -EOPNOTSUPP;
+		goto out;
 	}
 
 	if (!phydev->psec) {
 		NL_SET_ERR_MSG(info->extack, "No PSE is attached");
-		return -EOPNOTSUPP;
+		ret = -EOPNOTSUPP;
+		goto out;
 	}
 
 	if (tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL] &&
@@ -244,17 +248,21 @@ ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
 		NL_SET_ERR_MSG_ATTR(info->extack,
 				    tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL],
 				    "setting PoDL PSE admin control not supported");
-		return -EOPNOTSUPP;
+		ret = -EOPNOTSUPP;
+		goto out;
 	}
 	if (tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL] &&
 	    !pse_has_c33(phydev->psec)) {
 		NL_SET_ERR_MSG_ATTR(info->extack,
 				    tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL],
 				    "setting C33 PSE admin control not supported");
-		return -EOPNOTSUPP;
+		ret = -EOPNOTSUPP;
+		goto out;
 	}
 
-	return 1;
+out:
+	rtnl_unlock();
+	return ret;
 }
 
 static int
-- 
2.43.0
Re: [patch net-next v3] net: ethtool: fix unheld rtnl lock
Posted by Maxime Chevallier 1 year, 3 months ago
Hi,

On Mon, 26 Aug 2024 14:38:53 -0300
Diogo Jahchan Koike <djahchankoike@gmail.com> wrote:

> ethnl_req_get_phydev should be called with rtnl lock held.
> 
> Reported-by: syzbot+ec369e6d58e210135f71@syzkaller.appspotmail.com
> Closes: https://syzkaller.appspot.com/bug?extid=ec369e6d58e210135f71
> Fixes: 31748765bed3 ("net: ethtool: pse-pd: Target the command to the requested PHY")
> Signed-off-by: Diogo Jahchan Koike <djahchankoike@gmail.com>

This looks good to me.

Even though RTNL is released between the .validate() and .set()
calls, should the PHY disappear, the .set() callback handles that. 

Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>

Thanks,

Maxime
Re: [patch net-next v3] net: ethtool: fix unheld rtnl lock
Posted by Jakub Kicinski 1 year, 3 months ago
On Tue, 27 Aug 2024 09:23:36 +0200 Maxime Chevallier wrote:
> On Mon, 26 Aug 2024 14:38:53 -0300
> Diogo Jahchan Koike <djahchankoike@gmail.com> wrote:
> 
> > ethnl_req_get_phydev should be called with rtnl lock held.
> > 
> > Reported-by: syzbot+ec369e6d58e210135f71@syzkaller.appspotmail.com
> > Closes: https://syzkaller.appspot.com/bug?extid=ec369e6d58e210135f71
> > Fixes: 31748765bed3 ("net: ethtool: pse-pd: Target the command to the requested PHY")
> > Signed-off-by: Diogo Jahchan Koike <djahchankoike@gmail.com>  
> 
> This looks good to me.
> 
> Even though RTNL is released between the .validate() and .set()
> calls, should the PHY disappear, the .set() callback handles that. 
> 
> Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>

I know this isn't very well documented, but the point of .set_validate
is to perform checks before taking rtnl_lock (which may be quite
heavily contended), and potentially skip .set completely.
See 99132b6eb792 ("ethtool: netlink: handle SET intro/outro in the
common code"). Since we take rtnl lock and always return 1, this starts
to feel a bit cart before the horse.

How about we move the validation into set? (following code for
illustration only, please modify/test/review carefully and submit
as v4 if agreed on):

diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c
index ff81aa749784..18759d8f85a5 100644
--- a/net/ethtool/pse-pd.c
+++ b/net/ethtool/pse-pd.c
@@ -217,13 +217,10 @@ const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = {
 };
 
 static int
-ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
+ethnl_set_pse_validate(struct phy_device *phydev, struct genl_info *info)
 {
-	struct net_device *dev = req_info->dev;
 	struct nlattr **tb = info->attrs;
-	struct phy_device *phydev;
 
-	phydev = dev->phydev;
 	if (!phydev) {
 		NL_SET_ERR_MSG(info->extack, "No PHY is attached");
 		return -EOPNOTSUPP;
@@ -249,7 +246,7 @@ ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
 		return -EOPNOTSUPP;
 	}
 
-	return 1;
+	return 0;
 }
 
 static int
@@ -258,10 +255,14 @@ ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info)
 	struct net_device *dev = req_info->dev;
 	struct nlattr **tb = info->attrs;
 	struct phy_device *phydev;
-	int ret = 0;
+	int ret;
 
 	phydev = dev->phydev;
 
+	ret = ethnl_set_pse_validate(phydev, info);
+	if (ret)
+		return ret;
+
 	if (tb[ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT]) {
 		unsigned int pw_limit;
 
@@ -307,7 +308,6 @@ const struct ethnl_request_ops ethnl_pse_request_ops = {
 	.fill_reply		= pse_fill_reply,
 	.cleanup_data		= pse_cleanup_data,
 
-	.set_validate		= ethnl_set_pse_validate,
 	.set			= ethnl_set_pse,
 	/* PSE has no notification */
 };
Re: [patch net-next v3] net: ethtool: fix unheld rtnl lock
Posted by Maxime Chevallier 1 year, 3 months ago
Hi Juakub,

On Tue, 27 Aug 2024 12:46:53 -0700
Jakub Kicinski <kuba@kernel.org> wrote:

> On Tue, 27 Aug 2024 09:23:36 +0200 Maxime Chevallier wrote:
> > On Mon, 26 Aug 2024 14:38:53 -0300
> > Diogo Jahchan Koike <djahchankoike@gmail.com> wrote:
> >   
> > > ethnl_req_get_phydev should be called with rtnl lock held.
> > > 
> > > Reported-by: syzbot+ec369e6d58e210135f71@syzkaller.appspotmail.com
> > > Closes: https://syzkaller.appspot.com/bug?extid=ec369e6d58e210135f71
> > > Fixes: 31748765bed3 ("net: ethtool: pse-pd: Target the command to the requested PHY")
> > > Signed-off-by: Diogo Jahchan Koike <djahchankoike@gmail.com>    
> > 
> > This looks good to me.
> > 
> > Even though RTNL is released between the .validate() and .set()
> > calls, should the PHY disappear, the .set() callback handles that. 
> > 
> > Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>  
> 
> I know this isn't very well documented, but the point of .set_validate
> is to perform checks before taking rtnl_lock (which may be quite
> heavily contended), and potentially skip .set completely.
> See 99132b6eb792 ("ethtool: netlink: handle SET intro/outro in the
> common code"). Since we take rtnl lock and always return 1, this starts
> to feel a bit cart before the horse.

That explanation makes a lot of sense, I didn't have in mind that this
is what .set_validate is for.

> How about we move the validation into set? (following code for
> illustration only, please modify/test/review carefully and submit
> as v4 if agreed on):

That would work for me, that makes more sense than the current
approach.

> 
> diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c
> index ff81aa749784..18759d8f85a5 100644
> --- a/net/ethtool/pse-pd.c
> +++ b/net/ethtool/pse-pd.c
> @@ -217,13 +217,10 @@ const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = {
>  };
>  
>  static int
> -ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
> +ethnl_set_pse_validate(struct phy_device *phydev, struct genl_info *info)
>  {
> -	struct net_device *dev = req_info->dev;
>  	struct nlattr **tb = info->attrs;
> -	struct phy_device *phydev;
>  
> -	phydev = dev->phydev;
>  	if (!phydev) {
>  		NL_SET_ERR_MSG(info->extack, "No PHY is attached");
>  		return -EOPNOTSUPP;
> @@ -249,7 +246,7 @@ ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
>  		return -EOPNOTSUPP;
>  	}
>  
> -	return 1;
> +	return 0;
>  }
>  
>  static int
> @@ -258,10 +255,14 @@ ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info)
>  	struct net_device *dev = req_info->dev;
>  	struct nlattr **tb = info->attrs;
>  	struct phy_device *phydev;
> -	int ret = 0;
> +	int ret;
>  
>  	phydev = dev->phydev;

With the updated PHY code, the above context would look like this :

		phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PSE_HEADER],
					      info->extack);
		if (IS_ERR_OR_NULL(phydev))
			return -ENODEV;

>  
> +	ret = ethnl_set_pse_validate(phydev, info);
> +	if (ret)
> +		return ret;
> +
>  	if (tb[ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT]) {
>  		unsigned int pw_limit;
>  
> @@ -307,7 +308,6 @@ const struct ethnl_request_ops ethnl_pse_request_ops = {
>  	.fill_reply		= pse_fill_reply,
>  	.cleanup_data		= pse_cleanup_data,
>  
> -	.set_validate		= ethnl_set_pse_validate,
>  	.set			= ethnl_set_pse,
>  	/* PSE has no notification */
>  };

This is OK for me. Diogo, as you started addressing this, is it OK for
you to send a V4 with Jakub's proposed changes ?

Thanks,

Maxime
Re: [patch net-next v3] net: ethtool: fix unheld rtnl lock
Posted by Eric Dumazet 1 year, 3 months ago
On Mon, Aug 26, 2024 at 7:39 PM Diogo Jahchan Koike
<djahchankoike@gmail.com> wrote:
>
> ethnl_req_get_phydev should be called with rtnl lock held.
>
> Reported-by: syzbot+ec369e6d58e210135f71@syzkaller.appspotmail.com
> Closes: https://syzkaller.appspot.com/bug?extid=ec369e6d58e210135f71
> Fixes: 31748765bed3 ("net: ethtool: pse-pd: Target the command to the requested PHY")
> Signed-off-by: Diogo Jahchan Koike <djahchankoike@gmail.com>

Quoting Documentation/process/maintainer-netdev.rst

Resending after review
~~~~~~~~~~~~~~~~~~~~~~

Allow at least 24 hours to pass between postings. This will ensure reviewers
from all geographical locations have a chance to chime in. Do not wait
too long (weeks) between postings either as it will make it harder for reviewers
to recall all the context.

Make sure you address all the feedback in your new posting. Do not post a new
version of the code if the discussion about the previous version is still
ongoing, unless directly instructed by a reviewer.

The new version of patches should be posted as a separate thread,
not as a reply to the previous posting. Change log should include a link
to the previous posting (see :ref:`Changes requested`).


Thank you.
[patch net-next v2] net: ethtool: fix unheld rtnl lock
Posted by Diogo Jahchan Koike 1 year, 3 months ago
ethnl_req_get_phydev should be called with rtnl lock held.

Reported-by: syzbot+ec369e6d58e210135f71@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=ec369e6d58e210135f71
Fixes: 31748765bed3 ("net: ethtool: pse-pd: Target the command to the requested PHY")
Signed-off-by: Diogo Jahchan Koike <djahchankoike@gmail.com>
---
 net/ethtool/pse-pd.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c
index 507cb21d6bf0..0cd298851ea1 100644
--- a/net/ethtool/pse-pd.c
+++ b/net/ethtool/pse-pd.c
@@ -227,8 +227,11 @@ ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
 	struct nlattr **tb = info->attrs;
 	struct phy_device *phydev;
 
+	rtnl_lock();
 	phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PSE_HEADER],
 				      info->extack);
+	rtnl_unlock();
+
 	if (IS_ERR_OR_NULL(phydev)) {
 		NL_SET_ERR_MSG(info->extack, "No PHY is attached");
 		return -EOPNOTSUPP;
-- 
2.43.0
Re: [patch net-next v2] net: ethtool: fix unheld rtnl lock
Posted by Maxime Chevallier 1 year, 3 months ago
Hi,

Thanks for addressing this. I do have some comments though :

On Mon, 26 Aug 2024 11:06:13 -0300
Diogo Jahchan Koike <djahchankoike@gmail.com> wrote:

> ethnl_req_get_phydev should be called with rtnl lock held.
> 
> Reported-by: syzbot+ec369e6d58e210135f71@syzkaller.appspotmail.com
> Closes: https://syzkaller.appspot.com/bug?extid=ec369e6d58e210135f71
> Fixes: 31748765bed3 ("net: ethtool: pse-pd: Target the command to the requested PHY")
> Signed-off-by: Diogo Jahchan Koike <djahchankoike@gmail.com>
> ---
>  net/ethtool/pse-pd.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c
> index 507cb21d6bf0..0cd298851ea1 100644
> --- a/net/ethtool/pse-pd.c
> +++ b/net/ethtool/pse-pd.c
> @@ -227,8 +227,11 @@ ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
>  	struct nlattr **tb = info->attrs;
>  	struct phy_device *phydev;
>  
> +	rtnl_lock();
>  	phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PSE_HEADER],
>  				      info->extack);
> +	rtnl_unlock();

RTNL lock must be held until the PHY device is no longer being used, as
it may disappear at any point [1]. RTNL protects against that. The first
iteration of your patch had the right idea, as the lock was released at
the end of the function.

[1] : https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/tree/net/ethtool/netlink.h#n281

Thanks,

Maxime
Re: [patch net-next v2] net: ethtool: fix unheld rtnl lock
Posted by Diogo Jahchan Koike 1 year, 3 months ago
Hi Maxime

Thanks for the clarification, I missed that. Should I resend my first patch
or should I release the lock before every return (tbh, I feel like that may
lead to a lot of repeated code) and send a new patch?

Thanks,
	Diogo Jahchan Koike
Re: [patch net-next v2] net: ethtool: fix unheld rtnl lock
Posted by Christophe Leroy 1 year, 3 months ago
Hi Diogo,

Le 26/08/2024 à 19:00, Diogo Jahchan Koike a écrit :
> [Vous ne recevez pas souvent de courriers de djahchankoike@gmail.com. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
> 
> Hi Maxime
> 
> Thanks for the clarification, I missed that. Should I resend my first patch
> or should I release the lock before every return (tbh, I feel like that may
> lead to a lot of repeated code) and send a new patch?
> 

Do not duplicate release lock before every return.

See 
https://docs.kernel.org/process/coding-style.html#centralized-exiting-of-functions

Christophe
Re: [patch net-next v2] net: ethtool: fix unheld rtnl lock
Posted by Diogo Jahchan Koike 1 year, 3 months ago
Hi Cristophe

Yes, thanks for reviewing, I will just resend my first patch that used the out
label now that I am aware that the lock has to be held during all interactions
with the device.

Thanks,
	Diogo Jahchan Koike