include/net/bonding.h | 5 +++++ 2 files changed, 65 insertions(+), 1 deletion(-)
In bond_setup_by_slave(), the slave’s header_ops are unconditionally
copied into the bonding device. As a result, the bonding device may invoke
the slave-specific header operations on itself, causing
netdev_priv(bond_dev) (a struct bonding) to be incorrectly interpreted
as the slave's private-data type.
This type-confusion bug can lead to out-of-bounds writes into the skb,
resulting in memory corruption.
This patch adds two members to struct bonding, bond_header_ops and
header_slave_dev, to avoid type-confusion while keeping track of the
slave's header_ops.
Fixes: 1284cd3a2b740 (bonding: two small fixes for IPoIB support)
Signed-off-by: Kota Toda <kota.toda@gmo-cybersecurity.com>
Signed-off-by: Yuki Koike <yuki.koike@gmo-cybersecurity.com>
Co-Developed-by: Yuki Koike <yuki.koike@gmo-cybersecurity.com>
Reviewed-by: Paolo Abeni <pabeni@redhat.com>
Reported-by: Kota Toda <kota.toda@gmo-cybersecurity.com>
---
drivers/net/bonding/bond_main.c | 61
++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
include/net/bonding.h | 5 +++++
2 files changed, 65 insertions(+), 1 deletion(-)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 8ea183da8d53..690f3e0971d0 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1619,14 +1619,65 @@ static void bond_compute_features(struct bonding *bond)
netdev_change_features(bond_dev);
}
+static int bond_hard_header(struct sk_buff *skb, struct net_device *dev,
+ unsigned short type, const void *daddr,
+ const void *saddr, unsigned int len)
+{
+ struct bonding *bond = netdev_priv(dev);
+ struct net_device *slave_dev;
+
+ slave_dev = bond->header_slave_dev;
+
+ return dev_hard_header(skb, slave_dev, type, daddr, saddr, len);
+}
+
+static void bond_header_cache_update(struct hh_cache *hh, const
struct net_device *dev,
+ const unsigned char *haddr)
+{
+ const struct bonding *bond = netdev_priv(dev);
+ struct net_device *slave_dev;
+
+ slave_dev = bond->header_slave_dev;
+
+ if (!slave_dev->header_ops || !slave_dev->header_ops->cache_update)
+ return;
+
+ slave_dev->header_ops->cache_update(hh, slave_dev, haddr);
+}
+
static void bond_setup_by_slave(struct net_device *bond_dev,
struct net_device *slave_dev)
{
+ struct bonding *bond = netdev_priv(bond_dev);
bool was_up = !!(bond_dev->flags & IFF_UP);
dev_close(bond_dev);
- bond_dev->header_ops = slave_dev->header_ops;
+ /* Some functions are given dev as an argument
+ * while others not. When dev is not given, we cannot
+ * find out what is the slave device through struct bonding
+ * (the private data of bond_dev). Therefore, we need a raw
+ * header_ops variable instead of its pointer to const header_ops
+ * and assign slave's functions directly.
+ * For the other case, we set the wrapper functions that pass
+ * slave_dev to the wrapped functions.
+ */
+ bond->bond_header_ops.create = bond_hard_header;
+ bond->bond_header_ops.cache_update = bond_header_cache_update;
+ if (slave_dev->header_ops) {
+ bond->bond_header_ops.parse = slave_dev->header_ops->parse;
+ bond->bond_header_ops.cache = slave_dev->header_ops->cache;
+ bond->bond_header_ops.validate = slave_dev->header_ops->validate;
+ bond->bond_header_ops.parse_protocol =
slave_dev->header_ops->parse_protocol;
+ } else {
+ bond->bond_header_ops.parse = NULL;
+ bond->bond_header_ops.cache = NULL;
+ bond->bond_header_ops.validate = NULL;
+ bond->bond_header_ops.parse_protocol = NULL;
+ }
+
+ bond->header_slave_dev = slave_dev;
+ bond_dev->header_ops = &bond->bond_header_ops;
bond_dev->type = slave_dev->type;
bond_dev->hard_header_len = slave_dev->hard_header_len;
@@ -2676,6 +2727,14 @@ static int bond_release_and_destroy(struct
net_device *bond_dev,
struct bonding *bond = netdev_priv(bond_dev);
int ret;
+ /* If slave_dev is the earliest registered one, we must clear
+ * the variables related to header_ops to avoid dangling pointer.
+ */
+ if (bond->header_slave_dev == slave_dev) {
+ bond->header_slave_dev = NULL;
+ bond_dev->header_ops = NULL;
+ }
+
ret = __bond_release_one(bond_dev, slave_dev, false, true);
if (ret == 0 && !bond_has_slaves(bond) &&
bond_dev->reg_state != NETREG_UNREGISTERING) {
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 95f67b308c19..cf8206187ce9 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -215,6 +215,11 @@ struct bond_ipsec {
*/
struct bonding {
struct net_device *dev; /* first - useful for panic debug */
+ struct net_device *header_slave_dev; /* slave net_device for
header_ops */
+ /* maintained as a non-const variable
+ * because bond's header_ops should change depending on slaves.
+ */
+ struct header_ops bond_header_ops;
struct slave __rcu *curr_active_slave;
struct slave __rcu *current_arp_slave;
struct slave __rcu *primary_slave;
On Sun, May 25, 2025 at 10:08 PM 戸田晃太 <kota.toda@gmo-cybersecurity.com> wrote:
>
> In bond_setup_by_slave(), the slave’s header_ops are unconditionally
> copied into the bonding device. As a result, the bonding device may invoke
> the slave-specific header operations on itself, causing
> netdev_priv(bond_dev) (a struct bonding) to be incorrectly interpreted
> as the slave's private-data type.
>
> This type-confusion bug can lead to out-of-bounds writes into the skb,
> resulting in memory corruption.
>
> This patch adds two members to struct bonding, bond_header_ops and
> header_slave_dev, to avoid type-confusion while keeping track of the
> slave's header_ops.
>
> Fixes: 1284cd3a2b740 (bonding: two small fixes for IPoIB support)
> Signed-off-by: Kota Toda <kota.toda@gmo-cybersecurity.com>
> Signed-off-by: Yuki Koike <yuki.koike@gmo-cybersecurity.com>
> Co-Developed-by: Yuki Koike <yuki.koike@gmo-cybersecurity.com>
> Reviewed-by: Paolo Abeni <pabeni@redhat.com>
> Reported-by: Kota Toda <kota.toda@gmo-cybersecurity.com>
> ---
> drivers/net/bonding/bond_main.c | 61
> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
> include/net/bonding.h | 5 +++++
> 2 files changed, 65 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index 8ea183da8d53..690f3e0971d0 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -1619,14 +1619,65 @@ static void bond_compute_features(struct bonding *bond)
> netdev_change_features(bond_dev);
> }
>
> +static int bond_hard_header(struct sk_buff *skb, struct net_device *dev,
> + unsigned short type, const void *daddr,
> + const void *saddr, unsigned int len)
> +{
> + struct bonding *bond = netdev_priv(dev);
> + struct net_device *slave_dev;
> +
> + slave_dev = bond->header_slave_dev;
> +
> + return dev_hard_header(skb, slave_dev, type, daddr, saddr, len);
> +}
> +
> +static void bond_header_cache_update(struct hh_cache *hh, const
> struct net_device *dev,
> + const unsigned char *haddr)
> +{
> + const struct bonding *bond = netdev_priv(dev);
> + struct net_device *slave_dev;
> +
> + slave_dev = bond->header_slave_dev;
I do not see any barrier ?
> +
> + if (!slave_dev->header_ops || !slave_dev->header_ops->cache_update)
> + return;
> +
> + slave_dev->header_ops->cache_update(hh, slave_dev, haddr);
> +}
> +
> static void bond_setup_by_slave(struct net_device *bond_dev,
> struct net_device *slave_dev)
> {
> + struct bonding *bond = netdev_priv(bond_dev);
> bool was_up = !!(bond_dev->flags & IFF_UP);
>
> dev_close(bond_dev);
>
> - bond_dev->header_ops = slave_dev->header_ops;
> + /* Some functions are given dev as an argument
> + * while others not. When dev is not given, we cannot
> + * find out what is the slave device through struct bonding
> + * (the private data of bond_dev). Therefore, we need a raw
> + * header_ops variable instead of its pointer to const header_ops
> + * and assign slave's functions directly.
> + * For the other case, we set the wrapper functions that pass
> + * slave_dev to the wrapped functions.
> + */
> + bond->bond_header_ops.create = bond_hard_header;
> + bond->bond_header_ops.cache_update = bond_header_cache_update;
> + if (slave_dev->header_ops) {
> + bond->bond_header_ops.parse = slave_dev->header_ops->parse;
> + bond->bond_header_ops.cache = slave_dev->header_ops->cache;
> + bond->bond_header_ops.validate = slave_dev->header_ops->validate;
> + bond->bond_header_ops.parse_protocol =
> slave_dev->header_ops->parse_protocol;
All these updates probably need WRITE_ONCE(), and corresponding
READ_ONCE() on reader sides, at a very minimum ...
RCU would even be better later.
> + } else {
> + bond->bond_header_ops.parse = NULL;
> + bond->bond_header_ops.cache = NULL;
> + bond->bond_header_ops.validate = NULL;
> + bond->bond_header_ops.parse_protocol = NULL;
> + }
> +
> + bond->header_slave_dev = slave_dev;
> + bond_dev->header_ops = &bond->bond_header_ops;
>
> bond_dev->type = slave_dev->type;
> bond_dev->hard_header_len = slave_dev->hard_header_len;
> @@ -2676,6 +2727,14 @@ static int bond_release_and_destroy(struct
> net_device *bond_dev,
> struct bonding *bond = netdev_priv(bond_dev);
> int ret;
>
> + /* If slave_dev is the earliest registered one, we must clear
> + * the variables related to header_ops to avoid dangling pointer.
> + */
> + if (bond->header_slave_dev == slave_dev) {
> + bond->header_slave_dev = NULL;
> + bond_dev->header_ops = NULL;
> + }
> +
> ret = __bond_release_one(bond_dev, slave_dev, false, true);
> if (ret == 0 && !bond_has_slaves(bond) &&
> bond_dev->reg_state != NETREG_UNREGISTERING) {
> diff --git a/include/net/bonding.h b/include/net/bonding.h
> index 95f67b308c19..cf8206187ce9 100644
> --- a/include/net/bonding.h
> +++ b/include/net/bonding.h
> @@ -215,6 +215,11 @@ struct bond_ipsec {
> */
> struct bonding {
> struct net_device *dev; /* first - useful for panic debug */
> + struct net_device *header_slave_dev; /* slave net_device for
> header_ops */
> + /* maintained as a non-const variable
> + * because bond's header_ops should change depending on slaves.
> + */
> + struct header_ops bond_header_ops;
> struct slave __rcu *curr_active_slave;
> struct slave __rcu *current_arp_slave;
> struct slave __rcu *primary_slave;
Thank you for your review.
2025年5月26日(月) 17:23 Eric Dumazet <edumazet@google.com>:
>
> On Sun, May 25, 2025 at 10:08 PM 戸田晃太 <kota.toda@gmo-cybersecurity.com> wrote:
> >
> > In bond_setup_by_slave(), the slave’s header_ops are unconditionally
> > copied into the bonding device. As a result, the bonding device may invoke
> > the slave-specific header operations on itself, causing
> > netdev_priv(bond_dev) (a struct bonding) to be incorrectly interpreted
> > as the slave's private-data type.
> >
> > This type-confusion bug can lead to out-of-bounds writes into the skb,
> > resulting in memory corruption.
> >
> > This patch adds two members to struct bonding, bond_header_ops and
> > header_slave_dev, to avoid type-confusion while keeping track of the
> > slave's header_ops.
> >
> > Fixes: 1284cd3a2b740 (bonding: two small fixes for IPoIB support)
> > Signed-off-by: Kota Toda <kota.toda@gmo-cybersecurity.com>
> > Signed-off-by: Yuki Koike <yuki.koike@gmo-cybersecurity.com>
> > Co-Developed-by: Yuki Koike <yuki.koike@gmo-cybersecurity.com>
> > Reviewed-by: Paolo Abeni <pabeni@redhat.com>
> > Reported-by: Kota Toda <kota.toda@gmo-cybersecurity.com>
> > ---
> > drivers/net/bonding/bond_main.c | 61
> > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
> > include/net/bonding.h | 5 +++++
> > 2 files changed, 65 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> > index 8ea183da8d53..690f3e0971d0 100644
> > --- a/drivers/net/bonding/bond_main.c
> > +++ b/drivers/net/bonding/bond_main.c
> > @@ -1619,14 +1619,65 @@ static void bond_compute_features(struct bonding *bond)
> > netdev_change_features(bond_dev);
> > }
> >
> > +static int bond_hard_header(struct sk_buff *skb, struct net_device *dev,
> > + unsigned short type, const void *daddr,
> > + const void *saddr, unsigned int len)
> > +{
> > + struct bonding *bond = netdev_priv(dev);
> > + struct net_device *slave_dev;
> > +
> > + slave_dev = bond->header_slave_dev;
> > +
> > + return dev_hard_header(skb, slave_dev, type, daddr, saddr, len);
> > +}
> > +
> > +static void bond_header_cache_update(struct hh_cache *hh, const
> > struct net_device *dev,
> > + const unsigned char *haddr)
> > +{
> > + const struct bonding *bond = netdev_priv(dev);
> > + struct net_device *slave_dev;
> > +
> > + slave_dev = bond->header_slave_dev;
>
> I do not see any barrier ?
>
> > +
> > + if (!slave_dev->header_ops || !slave_dev->header_ops->cache_update)
> > + return;
> > +
> > + slave_dev->header_ops->cache_update(hh, slave_dev, haddr);
> > +}
> > +
> > static void bond_setup_by_slave(struct net_device *bond_dev,
> > struct net_device *slave_dev)
> > {
> > + struct bonding *bond = netdev_priv(bond_dev);
> > bool was_up = !!(bond_dev->flags & IFF_UP);
> >
> > dev_close(bond_dev);
> >
> > - bond_dev->header_ops = slave_dev->header_ops;
> > + /* Some functions are given dev as an argument
> > + * while others not. When dev is not given, we cannot
> > + * find out what is the slave device through struct bonding
> > + * (the private data of bond_dev). Therefore, we need a raw
> > + * header_ops variable instead of its pointer to const header_ops
> > + * and assign slave's functions directly.
> > + * For the other case, we set the wrapper functions that pass
> > + * slave_dev to the wrapped functions.
> > + */
> > + bond->bond_header_ops.create = bond_hard_header;
> > + bond->bond_header_ops.cache_update = bond_header_cache_update;
> > + if (slave_dev->header_ops) {
> > + bond->bond_header_ops.parse = slave_dev->header_ops->parse;
> > + bond->bond_header_ops.cache = slave_dev->header_ops->cache;
> > + bond->bond_header_ops.validate = slave_dev->header_ops->validate;
> > + bond->bond_header_ops.parse_protocol =
> > slave_dev->header_ops->parse_protocol;
>
> All these updates probably need WRITE_ONCE(), and corresponding
> READ_ONCE() on reader sides, at a very minimum ...
>
> RCU would even be better later.
>
I believe that locking is not necessary in this patch. The update of
`header_ops` only happens when a slave is newly enslaved to a bond.
Under such circumstances, members of `header_ops` are not called in
parallel with updating. Therefore, there is no possibility of race
conditions occurring.
>
> > + } else {
> > + bond->bond_header_ops.parse = NULL;
> > + bond->bond_header_ops.cache = NULL;
> > + bond->bond_header_ops.validate = NULL;
> > + bond->bond_header_ops.parse_protocol = NULL;
> > + }
> > +
> > + bond->header_slave_dev = slave_dev;
> > + bond_dev->header_ops = &bond->bond_header_ops;
> >
> > bond_dev->type = slave_dev->type;
> > bond_dev->hard_header_len = slave_dev->hard_header_len;
> > @@ -2676,6 +2727,14 @@ static int bond_release_and_destroy(struct
> > net_device *bond_dev,
> > struct bonding *bond = netdev_priv(bond_dev);
> > int ret;
> >
> > + /* If slave_dev is the earliest registered one, we must clear
> > + * the variables related to header_ops to avoid dangling pointer.
> > + */
> > + if (bond->header_slave_dev == slave_dev) {
> > + bond->header_slave_dev = NULL;
> > + bond_dev->header_ops = NULL;
> > + }
> > +
> > ret = __bond_release_one(bond_dev, slave_dev, false, true);
> > if (ret == 0 && !bond_has_slaves(bond) &&
> > bond_dev->reg_state != NETREG_UNREGISTERING) {
> > diff --git a/include/net/bonding.h b/include/net/bonding.h
> > index 95f67b308c19..cf8206187ce9 100644
> > --- a/include/net/bonding.h
> > +++ b/include/net/bonding.h
> > @@ -215,6 +215,11 @@ struct bond_ipsec {
> > */
> > struct bonding {
> > struct net_device *dev; /* first - useful for panic debug */
> > + struct net_device *header_slave_dev; /* slave net_device for
> > header_ops */
> > + /* maintained as a non-const variable
> > + * because bond's header_ops should change depending on slaves.
> > + */
> > + struct header_ops bond_header_ops;
> > struct slave __rcu *curr_active_slave;
> > struct slave __rcu *current_arp_slave;
> > struct slave __rcu *primary_slave;
On Wed, May 28, 2025 at 7:36 AM 戸田晃太 <kota.toda@gmo-cybersecurity.com> wrote:
>
> Thank you for your review.
>
> 2025年5月26日(月) 17:23 Eric Dumazet <edumazet@google.com>:
> >
> > On Sun, May 25, 2025 at 10:08 PM 戸田晃太 <kota.toda@gmo-cybersecurity.com> wrote:
> > >
> > > In bond_setup_by_slave(), the slave’s header_ops are unconditionally
> > > copied into the bonding device. As a result, the bonding device may invoke
> > > the slave-specific header operations on itself, causing
> > > netdev_priv(bond_dev) (a struct bonding) to be incorrectly interpreted
> > > as the slave's private-data type.
> > >
> > > This type-confusion bug can lead to out-of-bounds writes into the skb,
> > > resulting in memory corruption.
> > >
> > > This patch adds two members to struct bonding, bond_header_ops and
> > > header_slave_dev, to avoid type-confusion while keeping track of the
> > > slave's header_ops.
> > >
> > > Fixes: 1284cd3a2b740 (bonding: two small fixes for IPoIB support)
> > > Signed-off-by: Kota Toda <kota.toda@gmo-cybersecurity.com>
> > > Signed-off-by: Yuki Koike <yuki.koike@gmo-cybersecurity.com>
> > > Co-Developed-by: Yuki Koike <yuki.koike@gmo-cybersecurity.com>
> > > Reviewed-by: Paolo Abeni <pabeni@redhat.com>
> > > Reported-by: Kota Toda <kota.toda@gmo-cybersecurity.com>
> > > ---
> > > drivers/net/bonding/bond_main.c | 61
> > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
> > > include/net/bonding.h | 5 +++++
> > > 2 files changed, 65 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> > > index 8ea183da8d53..690f3e0971d0 100644
> > > --- a/drivers/net/bonding/bond_main.c
> > > +++ b/drivers/net/bonding/bond_main.c
> > > @@ -1619,14 +1619,65 @@ static void bond_compute_features(struct bonding *bond)
> > > netdev_change_features(bond_dev);
> > > }
> > >
> > > +static int bond_hard_header(struct sk_buff *skb, struct net_device *dev,
> > > + unsigned short type, const void *daddr,
> > > + const void *saddr, unsigned int len)
> > > +{
> > > + struct bonding *bond = netdev_priv(dev);
> > > + struct net_device *slave_dev;
> > > +
> > > + slave_dev = bond->header_slave_dev;
> > > +
> > > + return dev_hard_header(skb, slave_dev, type, daddr, saddr, len);
> > > +}
> > > +
> > > +static void bond_header_cache_update(struct hh_cache *hh, const
> > > struct net_device *dev,
> > > + const unsigned char *haddr)
> > > +{
> > > + const struct bonding *bond = netdev_priv(dev);
> > > + struct net_device *slave_dev;
> > > +
> > > + slave_dev = bond->header_slave_dev;
> >
> > I do not see any barrier ?
> >
> > > +
> > > + if (!slave_dev->header_ops || !slave_dev->header_ops->cache_update)
> > > + return;
> > > +
> > > + slave_dev->header_ops->cache_update(hh, slave_dev, haddr);
> > > +}
> > > +
> > > static void bond_setup_by_slave(struct net_device *bond_dev,
> > > struct net_device *slave_dev)
> > > {
> > > + struct bonding *bond = netdev_priv(bond_dev);
> > > bool was_up = !!(bond_dev->flags & IFF_UP);
> > >
> > > dev_close(bond_dev);
> > >
> > > - bond_dev->header_ops = slave_dev->header_ops;
> > > + /* Some functions are given dev as an argument
> > > + * while others not. When dev is not given, we cannot
> > > + * find out what is the slave device through struct bonding
> > > + * (the private data of bond_dev). Therefore, we need a raw
> > > + * header_ops variable instead of its pointer to const header_ops
> > > + * and assign slave's functions directly.
> > > + * For the other case, we set the wrapper functions that pass
> > > + * slave_dev to the wrapped functions.
> > > + */
> > > + bond->bond_header_ops.create = bond_hard_header;
> > > + bond->bond_header_ops.cache_update = bond_header_cache_update;
> > > + if (slave_dev->header_ops) {
> > > + bond->bond_header_ops.parse = slave_dev->header_ops->parse;
> > > + bond->bond_header_ops.cache = slave_dev->header_ops->cache;
> > > + bond->bond_header_ops.validate = slave_dev->header_ops->validate;
> > > + bond->bond_header_ops.parse_protocol =
> > > slave_dev->header_ops->parse_protocol;
> >
> > All these updates probably need WRITE_ONCE(), and corresponding
> > READ_ONCE() on reader sides, at a very minimum ...
> >
> > RCU would even be better later.
> >
> I believe that locking is not necessary in this patch. The update of
> `header_ops` only happens when a slave is newly enslaved to a bond.
> Under such circumstances, members of `header_ops` are not called in
> parallel with updating. Therefore, there is no possibility of race
> conditions occurring.
bond_dev can certainly be live, and packets can flow.
I have seen enough syzbot reports hinting at this precise issue.
2025年5月29日(木) 0:10 Eric Dumazet <edumazet@google.com>:
>
> On Wed, May 28, 2025 at 7:36 AM 戸田晃太 <kota.toda@gmo-cybersecurity.com> wrote:
> >
> > Thank you for your review.
> >
> > 2025年5月26日(月) 17:23 Eric Dumazet <edumazet@google.com>:
> > >
> > > On Sun, May 25, 2025 at 10:08 PM 戸田晃太 <kota.toda@gmo-cybersecurity.com> wrote:
> > > >
> > > > In bond_setup_by_slave(), the slave’s header_ops are unconditionally
> > > > copied into the bonding device. As a result, the bonding device may invoke
> > > > the slave-specific header operations on itself, causing
> > > > netdev_priv(bond_dev) (a struct bonding) to be incorrectly interpreted
> > > > as the slave's private-data type.
> > > >
> > > > This type-confusion bug can lead to out-of-bounds writes into the skb,
> > > > resulting in memory corruption.
> > > >
> > > > This patch adds two members to struct bonding, bond_header_ops and
> > > > header_slave_dev, to avoid type-confusion while keeping track of the
> > > > slave's header_ops.
> > > >
> > > > Fixes: 1284cd3a2b740 (bonding: two small fixes for IPoIB support)
> > > > Signed-off-by: Kota Toda <kota.toda@gmo-cybersecurity.com>
> > > > Signed-off-by: Yuki Koike <yuki.koike@gmo-cybersecurity.com>
> > > > Co-Developed-by: Yuki Koike <yuki.koike@gmo-cybersecurity.com>
> > > > Reviewed-by: Paolo Abeni <pabeni@redhat.com>
> > > > Reported-by: Kota Toda <kota.toda@gmo-cybersecurity.com>
> > > > ---
> > > > drivers/net/bonding/bond_main.c | 61
> > > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
> > > > include/net/bonding.h | 5 +++++
> > > > 2 files changed, 65 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> > > > index 8ea183da8d53..690f3e0971d0 100644
> > > > --- a/drivers/net/bonding/bond_main.c
> > > > +++ b/drivers/net/bonding/bond_main.c
> > > > @@ -1619,14 +1619,65 @@ static void bond_compute_features(struct bonding *bond)
> > > > netdev_change_features(bond_dev);
> > > > }
> > > >
> > > > +static int bond_hard_header(struct sk_buff *skb, struct net_device *dev,
> > > > + unsigned short type, const void *daddr,
> > > > + const void *saddr, unsigned int len)
> > > > +{
> > > > + struct bonding *bond = netdev_priv(dev);
> > > > + struct net_device *slave_dev;
> > > > +
> > > > + slave_dev = bond->header_slave_dev;
> > > > +
> > > > + return dev_hard_header(skb, slave_dev, type, daddr, saddr, len);
> > > > +}
> > > > +
> > > > +static void bond_header_cache_update(struct hh_cache *hh, const
> > > > struct net_device *dev,
> > > > + const unsigned char *haddr)
> > > > +{
> > > > + const struct bonding *bond = netdev_priv(dev);
> > > > + struct net_device *slave_dev;
> > > > +
> > > > + slave_dev = bond->header_slave_dev;
> > >
> > > I do not see any barrier ?
> > >
> > > > +
> > > > + if (!slave_dev->header_ops || !slave_dev->header_ops->cache_update)
> > > > + return;
> > > > +
> > > > + slave_dev->header_ops->cache_update(hh, slave_dev, haddr);
> > > > +}
> > > > +
> > > > static void bond_setup_by_slave(struct net_device *bond_dev,
> > > > struct net_device *slave_dev)
> > > > {
> > > > + struct bonding *bond = netdev_priv(bond_dev);
> > > > bool was_up = !!(bond_dev->flags & IFF_UP);
> > > >
> > > > dev_close(bond_dev);
> > > >
> > > > - bond_dev->header_ops = slave_dev->header_ops;
> > > > + /* Some functions are given dev as an argument
> > > > + * while others not. When dev is not given, we cannot
> > > > + * find out what is the slave device through struct bonding
> > > > + * (the private data of bond_dev). Therefore, we need a raw
> > > > + * header_ops variable instead of its pointer to const header_ops
> > > > + * and assign slave's functions directly.
> > > > + * For the other case, we set the wrapper functions that pass
> > > > + * slave_dev to the wrapped functions.
> > > > + */
> > > > + bond->bond_header_ops.create = bond_hard_header;
> > > > + bond->bond_header_ops.cache_update = bond_header_cache_update;
> > > > + if (slave_dev->header_ops) {
> > > > + bond->bond_header_ops.parse = slave_dev->header_ops->parse;
> > > > + bond->bond_header_ops.cache = slave_dev->header_ops->cache;
> > > > + bond->bond_header_ops.validate = slave_dev->header_ops->validate;
> > > > + bond->bond_header_ops.parse_protocol =
> > > > slave_dev->header_ops->parse_protocol;
> > >
> > > All these updates probably need WRITE_ONCE(), and corresponding
> > > READ_ONCE() on reader sides, at a very minimum ...
> > >
> > > RCU would even be better later.
> > >
> > I believe that locking is not necessary in this patch. The update of
> > `header_ops` only happens when a slave is newly enslaved to a bond.
> > Under such circumstances, members of `header_ops` are not called in
> > parallel with updating. Therefore, there is no possibility of race
> > conditions occurring.
>
> bond_dev can certainly be live, and packets can flow.
>
> I have seen enough syzbot reports hinting at this precise issue.
Hi Eric, Thank you for reviewing the patch.
At the beginning of `bond_setup_by_slave`, `dev_close(bond_dev)` is called,
meaning bond_dev is down and no packets can flow during the update of
`bond_header_ops`.
The syzbot report (you mentioned in the conversation in security@) indicating
`dev->header_ops` becoming NULL should be resolved by this patch.
I couldn't find any other related syzbot reports.
© 2016 - 2025 Red Hat, Inc.