[PATCHv3 net 1/2] bonding: add ns target multicast address to slave device

Hangbin Liu posted 2 patches 2 weeks, 4 days ago
There is a newer version of this series
[PATCHv3 net 1/2] bonding: add ns target multicast address to slave device
Posted by Hangbin Liu 2 weeks, 4 days ago
Commit 4598380f9c54 ("bonding: fix ns validation on backup slaves")
tried to resolve the issue where backup slaves couldn't be brought up when
receiving IPv6 Neighbor Solicitation (NS) messages. However, this fix only
worked for drivers that receive all multicast messages, such as the veth
interface.

For standard drivers, the NS multicast message is silently dropped because
the slave device is not a member of the NS target multicast group.

To address this, we need to make the slave device join the NS target
multicast group, ensuring it can receive these IPv6 NS messages to validate
the slave’s status properly.

There are three policies before joining the multicast group:
1. All settings must be under active-backup mode (alb and tlb do not support
   arp_validate), with backup slaves and slaves supporting multicast.
2. We can add or remove multicast groups when arp_validate changes.
3. Other operations, such as enslaving, releasing, or setting NS targets,
   need to be guarded by arp_validate.

Fixes: 4e24be018eb9 ("bonding: add new parameter ns_targets")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
 drivers/net/bonding/bond_main.c    | 18 ++++++-
 drivers/net/bonding/bond_options.c | 85 +++++++++++++++++++++++++++++-
 include/net/bond_options.h         |  1 +
 3 files changed, 102 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index b1bffd8e9a95..d7c1016619f9 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1008,6 +1008,9 @@ static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active,
 
 		if (bond->dev->flags & IFF_UP)
 			bond_hw_addr_flush(bond->dev, old_active->dev);
+
+		/* add target NS maddrs for backup slave */
+		slave_set_ns_maddrs(bond, old_active, true);
 	}
 
 	if (new_active) {
@@ -1024,6 +1027,9 @@ static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active,
 			dev_mc_sync(new_active->dev, bond->dev);
 			netif_addr_unlock_bh(bond->dev);
 		}
+
+		/* clear target NS maddrs for active slave */
+		slave_set_ns_maddrs(bond, new_active, false);
 	}
 }
 
@@ -2341,6 +2347,12 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
 	bond_compute_features(bond);
 	bond_set_carrier(bond);
 
+	/* set target NS maddrs for new slave, need to be called before
+	 * bond_select_active_slave(), which will remove the maddr if
+	 * the slave is selected as active slave
+	 */
+	slave_set_ns_maddrs(bond, new_slave, true);
+
 	if (bond_uses_primary(bond)) {
 		block_netpoll_tx();
 		bond_select_active_slave(bond);
@@ -2350,7 +2362,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
 	if (bond_mode_can_use_xmit_hash(bond))
 		bond_update_slave_arr(bond, NULL);
 
-
 	if (!slave_dev->netdev_ops->ndo_bpf ||
 	    !slave_dev->netdev_ops->ndo_xdp_xmit) {
 		if (bond->xdp_prog) {
@@ -2548,6 +2559,11 @@ static int __bond_release_one(struct net_device *bond_dev,
 	if (oldcurrent == slave)
 		bond_change_active_slave(bond, NULL);
 
+	/* clear target NS maddrs, must after bond_change_active_slave()
+	 * as we need to clear the maddrs on backup slave
+	 */
+	slave_set_ns_maddrs(bond, slave, false);
+
 	if (bond_is_lb(bond)) {
 		/* Must be called only after the slave has been
 		 * detached from the list and the curr_active_slave
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 95d59a18c022..60368cef2704 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -15,6 +15,7 @@
 #include <linux/sched/signal.h>
 
 #include <net/bonding.h>
+#include <net/ndisc.h>
 
 static int bond_option_active_slave_set(struct bonding *bond,
 					const struct bond_opt_value *newval);
@@ -1234,6 +1235,64 @@ static int bond_option_arp_ip_targets_set(struct bonding *bond,
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
+static bool slave_can_set_ns_maddr(struct bonding *bond, struct slave *slave)
+{
+	return BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP &&
+	       !bond_is_active_slave(slave) &&
+	       slave->dev->flags & IFF_MULTICAST;
+}
+
+static void _slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add)
+{
+	struct in6_addr *targets = bond->params.ns_targets;
+	char slot_maddr[MAX_ADDR_LEN];
+	int i;
+
+	if (!slave_can_set_ns_maddr(bond, slave))
+		return;
+
+	for (i = 0; i < BOND_MAX_NS_TARGETS; i++) {
+		if (ipv6_addr_any(&targets[i]))
+			break;
+
+		if (!ndisc_mc_map(&targets[i], slot_maddr, slave->dev, 0)) {
+			if (add)
+				dev_mc_add(slave->dev, slot_maddr);
+			else
+				dev_mc_del(slave->dev, slot_maddr);
+		}
+	}
+}
+
+void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add)
+{
+	if (!bond->params.arp_validate)
+		return;
+
+	_slave_set_ns_maddrs(bond, slave, add);
+}
+
+static void slave_set_ns_maddr(struct bonding *bond, struct slave *slave,
+			       struct in6_addr *target, struct in6_addr *slot)
+{
+	char target_maddr[MAX_ADDR_LEN], slot_maddr[MAX_ADDR_LEN];
+
+	if (!bond->params.arp_validate || !slave_can_set_ns_maddr(bond, slave))
+		return;
+
+	/* remove the previous maddr on salve */
+	if (!ipv6_addr_any(slot) &&
+	    !ndisc_mc_map(slot, slot_maddr, slave->dev, 0)) {
+		dev_mc_del(slave->dev, slot_maddr);
+	}
+
+	/* add new maddr on slave if target is set */
+	if (!ipv6_addr_any(target) &&
+	    !ndisc_mc_map(target, target_maddr, slave->dev, 0)) {
+		dev_mc_add(slave->dev, target_maddr);
+	}
+}
+
 static void _bond_options_ns_ip6_target_set(struct bonding *bond, int slot,
 					    struct in6_addr *target,
 					    unsigned long last_rx)
@@ -1243,8 +1302,10 @@ static void _bond_options_ns_ip6_target_set(struct bonding *bond, int slot,
 	struct slave *slave;
 
 	if (slot >= 0 && slot < BOND_MAX_NS_TARGETS) {
-		bond_for_each_slave(bond, slave, iter)
+		bond_for_each_slave(bond, slave, iter) {
 			slave->target_last_arp_rx[slot] = last_rx;
+			slave_set_ns_maddr(bond, slave, target, &targets[slot]);
+		}
 		targets[slot] = *target;
 	}
 }
@@ -1296,15 +1357,37 @@ static int bond_option_ns_ip6_targets_set(struct bonding *bond,
 {
 	return -EPERM;
 }
+
+static void _slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add)
+{
+}
+
+void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add)
+{
+}
 #endif
 
 static int bond_option_arp_validate_set(struct bonding *bond,
 					const struct bond_opt_value *newval)
 {
+	bool changed = (bond->params.arp_validate == 0 && newval->value != 0) ||
+		       (bond->params.arp_validate != 0 && newval->value == 0);
+	struct list_head *iter;
+	struct slave *slave;
+
 	netdev_dbg(bond->dev, "Setting arp_validate to %s (%llu)\n",
 		   newval->string, newval->value);
 	bond->params.arp_validate = newval->value;
 
+	if (changed) {
+		bond_for_each_slave(bond, slave, iter) {
+			if (bond->params.arp_validate)
+				_slave_set_ns_maddrs(bond, slave, true);
+			else
+				_slave_set_ns_maddrs(bond, slave, false);
+		}
+	}
+
 	return 0;
 }
 
diff --git a/include/net/bond_options.h b/include/net/bond_options.h
index 473a0147769e..59a91d12cd57 100644
--- a/include/net/bond_options.h
+++ b/include/net/bond_options.h
@@ -161,5 +161,6 @@ void bond_option_arp_ip_targets_clear(struct bonding *bond);
 #if IS_ENABLED(CONFIG_IPV6)
 void bond_option_ns_ip6_targets_clear(struct bonding *bond);
 #endif
+void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add);
 
 #endif /* _NET_BOND_OPTIONS_H */
-- 
2.46.0

Re: [PATCHv3 net 1/2] bonding: add ns target multicast address to slave device
Posted by Nikolay Aleksandrov 2 weeks, 3 days ago
On Wed, Nov 06, 2024 at 05:14:41AM +0000, Hangbin Liu wrote:
> Commit 4598380f9c54 ("bonding: fix ns validation on backup slaves")
> tried to resolve the issue where backup slaves couldn't be brought up when
> receiving IPv6 Neighbor Solicitation (NS) messages. However, this fix only
> worked for drivers that receive all multicast messages, such as the veth
> interface.
> 
> For standard drivers, the NS multicast message is silently dropped because
> the slave device is not a member of the NS target multicast group.
> 
> To address this, we need to make the slave device join the NS target
> multicast group, ensuring it can receive these IPv6 NS messages to validate
> the slave’s status properly.
> 
> There are three policies before joining the multicast group:
> 1. All settings must be under active-backup mode (alb and tlb do not support
>    arp_validate), with backup slaves and slaves supporting multicast.
> 2. We can add or remove multicast groups when arp_validate changes.
> 3. Other operations, such as enslaving, releasing, or setting NS targets,
>    need to be guarded by arp_validate.
> 
> Fixes: 4e24be018eb9 ("bonding: add new parameter ns_targets")
> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
> ---
>  drivers/net/bonding/bond_main.c    | 18 ++++++-
>  drivers/net/bonding/bond_options.c | 85 +++++++++++++++++++++++++++++-
>  include/net/bond_options.h         |  1 +
>  3 files changed, 102 insertions(+), 2 deletions(-)
> 

Hi,
A few minor comments below,

> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index b1bffd8e9a95..d7c1016619f9 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -1008,6 +1008,9 @@ static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active,
>  
>  		if (bond->dev->flags & IFF_UP)
>  			bond_hw_addr_flush(bond->dev, old_active->dev);
> +
> +		/* add target NS maddrs for backup slave */

So instead of having these redundant comments, maybe add helper functions
to show better what's happening, e.g. slave_ns_maddrs_add()...

> +		slave_set_ns_maddrs(bond, old_active, true);
>  	}
>  
>  	if (new_active) {
> @@ -1024,6 +1027,9 @@ static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active,
>  			dev_mc_sync(new_active->dev, bond->dev);
>  			netif_addr_unlock_bh(bond->dev);
>  		}
> +
> +		/* clear target NS maddrs for active slave */
> +		slave_set_ns_maddrs(bond, new_active, false);

... same here slave_ns_maddrs_del()

These true/false arguments for add/del require the reader to go search what
they mean.

>  	}
>  }
>  
> @@ -2341,6 +2347,12 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
>  	bond_compute_features(bond);
>  	bond_set_carrier(bond);
>  
> +	/* set target NS maddrs for new slave, need to be called before
> +	 * bond_select_active_slave(), which will remove the maddr if
> +	 * the slave is selected as active slave
> +	 */

"needs to be called...which will remove the maddr*s*"

> +	slave_set_ns_maddrs(bond, new_slave, true);
> +
>  	if (bond_uses_primary(bond)) {
>  		block_netpoll_tx();
>  		bond_select_active_slave(bond);
> @@ -2350,7 +2362,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
>  	if (bond_mode_can_use_xmit_hash(bond))
>  		bond_update_slave_arr(bond, NULL);
>  
> -
>  	if (!slave_dev->netdev_ops->ndo_bpf ||
>  	    !slave_dev->netdev_ops->ndo_xdp_xmit) {
>  		if (bond->xdp_prog) {
> @@ -2548,6 +2559,11 @@ static int __bond_release_one(struct net_device *bond_dev,
>  	if (oldcurrent == slave)
>  		bond_change_active_slave(bond, NULL);
>  
> +	/* clear target NS maddrs, must after bond_change_active_slave()
> +	 * as we need to clear the maddrs on backup slave
> +	 */

This should be re-worded. I think there's a word missing as well in
"...must after...".

> +	slave_set_ns_maddrs(bond, slave, false);
> +
>  	if (bond_is_lb(bond)) {
>  		/* Must be called only after the slave has been
>  		 * detached from the list and the curr_active_slave
> diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
> index 95d59a18c022..60368cef2704 100644
> --- a/drivers/net/bonding/bond_options.c
> +++ b/drivers/net/bonding/bond_options.c
> @@ -15,6 +15,7 @@
>  #include <linux/sched/signal.h>
>  
>  #include <net/bonding.h>
> +#include <net/ndisc.h>
>  
>  static int bond_option_active_slave_set(struct bonding *bond,
>  					const struct bond_opt_value *newval);
> @@ -1234,6 +1235,64 @@ static int bond_option_arp_ip_targets_set(struct bonding *bond,
>  }
>  
>  #if IS_ENABLED(CONFIG_IPV6)
> +static bool slave_can_set_ns_maddr(struct bonding *bond, struct slave *slave)

const bond/slave

> +{
> +	return BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP &&
> +	       !bond_is_active_slave(slave) &&
> +	       slave->dev->flags & IFF_MULTICAST;
> +}
> +
> +static void _slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add)

Can we please stick to using double underscore for internal helpers?
I see there are a few other places where a single underscore is used in this
file which I've missed before, we can take care of those to make them consistent
with the rest of the bonding code.

> +{
> +	struct in6_addr *targets = bond->params.ns_targets;
> +	char slot_maddr[MAX_ADDR_LEN];
> +	int i;
> +
> +	if (!slave_can_set_ns_maddr(bond, slave))
> +		return;
> +
> +	for (i = 0; i < BOND_MAX_NS_TARGETS; i++) {
> +		if (ipv6_addr_any(&targets[i]))
> +			break;
> +
> +		if (!ndisc_mc_map(&targets[i], slot_maddr, slave->dev, 0)) {
> +			if (add)
> +				dev_mc_add(slave->dev, slot_maddr);
> +			else
> +				dev_mc_del(slave->dev, slot_maddr);
> +		}
> +	}
> +}
> +
> +void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add)

Usually exported slave functions have the bond_ prefix, e.g. bond_slave...

> +{
> +	if (!bond->params.arp_validate)
> +		return;
> +
> +	_slave_set_ns_maddrs(bond, slave, add);
> +}
> +
> +static void slave_set_ns_maddr(struct bonding *bond, struct slave *slave,
> +			       struct in6_addr *target, struct in6_addr *slot)

> +{
> +	char target_maddr[MAX_ADDR_LEN], slot_maddr[MAX_ADDR_LEN];
> +
> +	if (!bond->params.arp_validate || !slave_can_set_ns_maddr(bond, slave))
> +		return;
> +
> +	/* remove the previous maddr on salve */

s/salve/slave/
s/on/from/

> +	if (!ipv6_addr_any(slot) &&
> +	    !ndisc_mc_map(slot, slot_maddr, slave->dev, 0)) {
> +		dev_mc_del(slave->dev, slot_maddr);
> +	}

drop unnecessary {}

> +
> +	/* add new maddr on slave if target is set */
> +	if (!ipv6_addr_any(target) &&
> +	    !ndisc_mc_map(target, target_maddr, slave->dev, 0)) {
> +		dev_mc_add(slave->dev, target_maddr);
> +	}

drop unnesecary {}

> +}
> +
>  static void _bond_options_ns_ip6_target_set(struct bonding *bond, int slot,
>  					    struct in6_addr *target,
>  					    unsigned long last_rx)
> @@ -1243,8 +1302,10 @@ static void _bond_options_ns_ip6_target_set(struct bonding *bond, int slot,
>  	struct slave *slave;
>  
>  	if (slot >= 0 && slot < BOND_MAX_NS_TARGETS) {
> -		bond_for_each_slave(bond, slave, iter)
> +		bond_for_each_slave(bond, slave, iter) {
>  			slave->target_last_arp_rx[slot] = last_rx;
> +			slave_set_ns_maddr(bond, slave, target, &targets[slot]);
> +		}
>  		targets[slot] = *target;
>  	}
>  }
> @@ -1296,15 +1357,37 @@ static int bond_option_ns_ip6_targets_set(struct bonding *bond,
>  {
>  	return -EPERM;
>  }
> +
> +static void _slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add)
> +{
> +}
> +
> +void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add)
> +{
> +}
>  #endif
>  
>  static int bond_option_arp_validate_set(struct bonding *bond,
>  					const struct bond_opt_value *newval)
>  {
> +	bool changed = (bond->params.arp_validate == 0 && newval->value != 0) ||
> +		       (bond->params.arp_validate != 0 && newval->value == 0);

!!bond->params.arp_validate != !!newval->value

> +	struct list_head *iter;
> +	struct slave *slave;
> +
>  	netdev_dbg(bond->dev, "Setting arp_validate to %s (%llu)\n",
>  		   newval->string, newval->value);
>  	bond->params.arp_validate = newval->value;
>  
> +	if (changed) {
> +		bond_for_each_slave(bond, slave, iter) {
> +			if (bond->params.arp_validate)
> +				_slave_set_ns_maddrs(bond, slave, true);
> +			else
> +				_slave_set_ns_maddrs(bond, slave, false);
> +		}
> +	}

This block can be written shortly as:
 bond_for_each_slave(bond, slave, iter)
	_slave_set_ns_maddrs(bond, slave, !!bond->params.arp_validate);

> +
>  	return 0;
>  }
>  
> diff --git a/include/net/bond_options.h b/include/net/bond_options.h
> index 473a0147769e..59a91d12cd57 100644
> --- a/include/net/bond_options.h
> +++ b/include/net/bond_options.h
> @@ -161,5 +161,6 @@ void bond_option_arp_ip_targets_clear(struct bonding *bond);
>  #if IS_ENABLED(CONFIG_IPV6)
>  void bond_option_ns_ip6_targets_clear(struct bonding *bond);
>  #endif
> +void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add);
>  
>  #endif /* _NET_BOND_OPTIONS_H */
> -- 
> 2.46.0
> 

Cheers,
 Nik
Re: [PATCHv3 net 1/2] bonding: add ns target multicast address to slave device
Posted by Hangbin Liu 1 week, 5 days ago
On Wed, Nov 06, 2024 at 12:25:10PM +0200, Nikolay Aleksandrov wrote:
> > diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
> > index 95d59a18c022..60368cef2704 100644
> > --- a/drivers/net/bonding/bond_options.c
> > +++ b/drivers/net/bonding/bond_options.c
> > @@ -15,6 +15,7 @@
> >  #include <linux/sched/signal.h>
> >  
> >  #include <net/bonding.h>
> > +#include <net/ndisc.h>
> >  
> >  static int bond_option_active_slave_set(struct bonding *bond,
> >  					const struct bond_opt_value *newval);
> > @@ -1234,6 +1235,64 @@ static int bond_option_arp_ip_targets_set(struct bonding *bond,
> >  }
> >  
> >  #if IS_ENABLED(CONFIG_IPV6)
> > +static bool slave_can_set_ns_maddr(struct bonding *bond, struct slave *slave)
> 
> const bond/slave
> 
> > +{
> > +	return BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP &&
> > +	       !bond_is_active_slave(slave) &&
> > +	       slave->dev->flags & IFF_MULTICAST;
> > +}

Hi, FYI, in new patch I only set bond to const as slave will be called
by bond_is_active_slave().

Thanks
Hangbin
Re: [PATCHv3 net 1/2] bonding: add ns target multicast address to slave device
Posted by Nikolay Aleksandrov 1 week, 4 days ago
On 11/11/24 12:05, Hangbin Liu wrote:
> On Wed, Nov 06, 2024 at 12:25:10PM +0200, Nikolay Aleksandrov wrote:
>>> diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
>>> index 95d59a18c022..60368cef2704 100644
>>> --- a/drivers/net/bonding/bond_options.c
>>> +++ b/drivers/net/bonding/bond_options.c
>>> @@ -15,6 +15,7 @@
>>>  #include <linux/sched/signal.h>
>>>  
>>>  #include <net/bonding.h>
>>> +#include <net/ndisc.h>
>>>  
>>>  static int bond_option_active_slave_set(struct bonding *bond,
>>>  					const struct bond_opt_value *newval);
>>> @@ -1234,6 +1235,64 @@ static int bond_option_arp_ip_targets_set(struct bonding *bond,
>>>  }
>>>  
>>>  #if IS_ENABLED(CONFIG_IPV6)
>>> +static bool slave_can_set_ns_maddr(struct bonding *bond, struct slave *slave)
>>
>> const bond/slave
>>
>>> +{
>>> +	return BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP &&
>>> +	       !bond_is_active_slave(slave) &&
>>> +	       slave->dev->flags & IFF_MULTICAST;
>>> +}
> 
> Hi, FYI, in new patch I only set bond to const as slave will be called
> by bond_is_active_slave().
> 
> Thanks
> Hangbin

Yeah, I figured. :) Thanks for letting me know!
Re: [PATCHv3 net 1/2] bonding: add ns target multicast address to slave device
Posted by Hangbin Liu 2 weeks, 3 days ago
On Wed, Nov 06, 2024 at 12:25:10PM +0200, Nikolay Aleksandrov wrote:
> Hi,
> A few minor comments below,

Thanks for all the comments. I will fix them in next version.

Hangbin