include/linux/netdevice.h | 12 +++++++----- net/core/dev.c | 30 ++++++------------------------ net/core/net-sysfs.c | 2 +- 3 files changed, 14 insertions(+), 30 deletions(-)
From: Alexander Lobakin <aleksander.lobakin@intel.com>
In fact, this structure contains a flexible array at the end, but
historically its size, alignment etc., is calculated manually.
There are several instances of the structure embedded into other
structures, but also there's ongoing effort to remove them and we
could in the meantime declare &net_device properly.
Declare the array explicitly, use struct_size() and store the array
size inside the structure, so that __counted_by() can be applied.
Don't use PTR_ALIGN(), as SLUB itself tries its best to ensure the
allocated buffer is aligned to what the user expects.
Also, change its alignment from %NETDEV_ALIGN to the cacheline size
as per several suggestions on the netdev ML.
bloat-o-meter for vmlinux:
free_netdev 445 440 -5
netdev_freemem 24 - -24
alloc_netdev_mqs 1481 1450 -31
On x86_64 with several NICs of different vendors, I was never able to
get a &net_device pointer not aligned to the cacheline size after the
change.
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
---
Changelog:
v2:
* Rebased Alexander's patch on top of f750dfe825b90 ("ethtool: provide
customized dim profile management").
* Removed the ALIGN() of SMP_CACHE_BYTES for sizeof_priv.
v1:
* https://lore.kernel.org/netdev/90fd7cd7-72dc-4df6-88ec-fbc8b64735ad@intel.com
include/linux/netdevice.h | 12 +++++++-----
net/core/dev.c | 30 ++++++------------------------
net/core/net-sysfs.c | 2 +-
3 files changed, 14 insertions(+), 30 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 93558645c6d0..f0dd499244d4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2199,10 +2199,10 @@ struct net_device {
unsigned short neigh_priv_len;
unsigned short dev_id;
unsigned short dev_port;
- unsigned short padded;
+ int irq;
+ u32 priv_len;
spinlock_t addr_list_lock;
- int irq;
struct netdev_hw_addr_list uc;
struct netdev_hw_addr_list mc;
@@ -2406,7 +2406,10 @@ struct net_device {
/** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */
struct dim_irq_moder *irq_moder;
-};
+
+ u8 priv[] ____cacheline_aligned
+ __counted_by(priv_len);
+} ____cacheline_aligned;
#define to_net_dev(d) container_of(d, struct net_device, dev)
/*
@@ -2596,7 +2599,7 @@ void dev_net_set(struct net_device *dev, struct net *net)
*/
static inline void *netdev_priv(const struct net_device *dev)
{
- return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN);
+ return (void *)dev->priv;
}
/* Set the sysfs physical device reference for the network logical device
@@ -3127,7 +3130,6 @@ static inline void unregister_netdevice(struct net_device *dev)
int netdev_refcnt_read(const struct net_device *dev);
void free_netdev(struct net_device *dev);
-void netdev_freemem(struct net_device *dev);
void init_dummy_netdev(struct net_device *dev);
struct net_device *netdev_get_xmit_slave(struct net_device *dev,
diff --git a/net/core/dev.c b/net/core/dev.c
index 73e5af6943c3..6ea1d20676fb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -11006,13 +11006,6 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev)
}
EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on);
-void netdev_freemem(struct net_device *dev)
-{
- char *addr = (char *)dev - dev->padded;
-
- kvfree(addr);
-}
-
/**
* alloc_netdev_mqs - allocate network device
* @sizeof_priv: size of private data to allocate space for
@@ -11032,8 +11025,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
unsigned int txqs, unsigned int rxqs)
{
struct net_device *dev;
- unsigned int alloc_size;
- struct net_device *p;
BUG_ON(strlen(name) >= sizeof(dev->name));
@@ -11047,21 +11038,12 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
return NULL;
}
- alloc_size = sizeof(struct net_device);
- if (sizeof_priv) {
- /* ensure 32-byte alignment of private area */
- alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
- alloc_size += sizeof_priv;
- }
- /* ensure 32-byte alignment of whole construct */
- alloc_size += NETDEV_ALIGN - 1;
-
- p = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
- if (!p)
+ dev = kvzalloc(struct_size(dev, priv, sizeof_priv),
+ GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
+ if (!dev)
return NULL;
- dev = PTR_ALIGN(p, NETDEV_ALIGN);
- dev->padded = (char *)dev - (char *)p;
+ dev->priv_len = sizeof_priv;
ref_tracker_dir_init(&dev->refcnt_tracker, 128, name);
#ifdef CONFIG_PCPU_DEV_REFCNT
@@ -11148,7 +11130,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
free_percpu(dev->pcpu_refcnt);
free_dev:
#endif
- netdev_freemem(dev);
+ kvfree(dev);
return NULL;
}
EXPORT_SYMBOL(alloc_netdev_mqs);
@@ -11203,7 +11185,7 @@ void free_netdev(struct net_device *dev)
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED ||
dev->reg_state == NETREG_DUMMY) {
- netdev_freemem(dev);
+ kvfree(dev);
return;
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 4c27a360c294..0e2084ce7b75 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -2028,7 +2028,7 @@ static void netdev_release(struct device *d)
* device is dead and about to be freed.
*/
kfree(rcu_access_pointer(dev->ifalias));
- netdev_freemem(dev);
+ kvfree(dev);
}
static const void *net_namespace(const struct device *d)
--
2.43.0
On Tue, Jul 09, 2024 at 05:54:25AM -0700, Breno Leitao wrote:
> From: Alexander Lobakin <aleksander.lobakin@intel.com>
>
> In fact, this structure contains a flexible array at the end, but
> historically its size, alignment etc., is calculated manually.
> There are several instances of the structure embedded into other
> structures, but also there's ongoing effort to remove them and we
> could in the meantime declare &net_device properly.
> Declare the array explicitly, use struct_size() and store the array
> size inside the structure, so that __counted_by() can be applied.
> Don't use PTR_ALIGN(), as SLUB itself tries its best to ensure the
> allocated buffer is aligned to what the user expects.
> Also, change its alignment from %NETDEV_ALIGN to the cacheline size
> as per several suggestions on the netdev ML.
>
> bloat-o-meter for vmlinux:
>
> free_netdev 445 440 -5
> netdev_freemem 24 - -24
> alloc_netdev_mqs 1481 1450 -31
>
> On x86_64 with several NICs of different vendors, I was never able to
> get a &net_device pointer not aligned to the cacheline size after the
> change.
>
> Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
> Signed-off-by: Breno Leitao <leitao@debian.org>
> Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Hi Breno,
Some kernel doc warnings from my side.
Flagged by: kernel-doc -none
> ---
> Changelog:
>
> v2:
> * Rebased Alexander's patch on top of f750dfe825b90 ("ethtool: provide
> customized dim profile management").
> * Removed the ALIGN() of SMP_CACHE_BYTES for sizeof_priv.
>
> v1:
> * https://lore.kernel.org/netdev/90fd7cd7-72dc-4df6-88ec-fbc8b64735ad@intel.com
>
> include/linux/netdevice.h | 12 +++++++-----
> net/core/dev.c | 30 ++++++------------------------
> net/core/net-sysfs.c | 2 +-
> 3 files changed, 14 insertions(+), 30 deletions(-)
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 93558645c6d0..f0dd499244d4 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -2199,10 +2199,10 @@ struct net_device {
> unsigned short neigh_priv_len;
> unsigned short dev_id;
> unsigned short dev_port;
> - unsigned short padded;
padded should also be removed from the Kernel doc for this structure.
> + int irq;
> + u32 priv_len;
And irq and priv_len should be added to the Kernel doc for this structure.
>
> spinlock_t addr_list_lock;
> - int irq;
>
> struct netdev_hw_addr_list uc;
> struct netdev_hw_addr_list mc;
...
On Tue, Jul 09, 2024 at 07:11:28PM +0100, Simon Horman wrote: > On Tue, Jul 09, 2024 at 05:54:25AM -0700, Breno Leitao wrote: > > From: Alexander Lobakin <aleksander.lobakin@intel.com> > > > > In fact, this structure contains a flexible array at the end, but > > historically its size, alignment etc., is calculated manually. > > There are several instances of the structure embedded into other > > structures, but also there's ongoing effort to remove them and we > > could in the meantime declare &net_device properly. > > Declare the array explicitly, use struct_size() and store the array > > size inside the structure, so that __counted_by() can be applied. > > Don't use PTR_ALIGN(), as SLUB itself tries its best to ensure the > > allocated buffer is aligned to what the user expects. > > Also, change its alignment from %NETDEV_ALIGN to the cacheline size > > as per several suggestions on the netdev ML. > > > > bloat-o-meter for vmlinux: > > > > free_netdev 445 440 -5 > > netdev_freemem 24 - -24 > > alloc_netdev_mqs 1481 1450 -31 > > > > On x86_64 with several NICs of different vendors, I was never able to > > get a &net_device pointer not aligned to the cacheline size after the > > change. > > > > Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com> > > Signed-off-by: Breno Leitao <leitao@debian.org> > > Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com> > > Hi Breno, > > Some kernel doc warnings from my side. Thanks. I will send a v3 with the fixes. > Flagged by: kernel-doc -none How do you run this test exactly? I would like to add to my workflow. Thanks!
On Tue, Jul 09, 2024 at 01:19:44PM -0700, Breno Leitao wrote: > On Tue, Jul 09, 2024 at 07:11:28PM +0100, Simon Horman wrote: > > On Tue, Jul 09, 2024 at 05:54:25AM -0700, Breno Leitao wrote: > > > From: Alexander Lobakin <aleksander.lobakin@intel.com> > > > > > > In fact, this structure contains a flexible array at the end, but > > > historically its size, alignment etc., is calculated manually. > > > There are several instances of the structure embedded into other > > > structures, but also there's ongoing effort to remove them and we > > > could in the meantime declare &net_device properly. > > > Declare the array explicitly, use struct_size() and store the array > > > size inside the structure, so that __counted_by() can be applied. > > > Don't use PTR_ALIGN(), as SLUB itself tries its best to ensure the > > > allocated buffer is aligned to what the user expects. > > > Also, change its alignment from %NETDEV_ALIGN to the cacheline size > > > as per several suggestions on the netdev ML. > > > > > > bloat-o-meter for vmlinux: > > > > > > free_netdev 445 440 -5 > > > netdev_freemem 24 - -24 > > > alloc_netdev_mqs 1481 1450 -31 > > > > > > On x86_64 with several NICs of different vendors, I was never able to > > > get a &net_device pointer not aligned to the cacheline size after the > > > change. > > > > > > Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com> > > > Signed-off-by: Breno Leitao <leitao@debian.org> > > > Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com> > > > > Hi Breno, > > > > Some kernel doc warnings from my side. > > Thanks. I will send a v3 with the fixes. > > > Flagged by: kernel-doc -none > > How do you run this test exactly? I would like to add to my workflow. It can be run like this: ./scripts/kernel-doc -none include/linux/netdevice.h Or this: ./scripts/kernel-doc -none -Wall include/linux/netdevice.h In this case the second invocation has a lot of output relating to documentation of return values which is unrelated to your patch. But the first invocation shows the issues that I flagged in my previous email.
On Tue, Jul 09, 2024 at 01:19:44PM -0700, Breno Leitao wrote: > On Tue, Jul 09, 2024 at 07:11:28PM +0100, Simon Horman wrote: > > Flagged by: kernel-doc -none > > How do you run this test exactly? I would like to add to my workflow. Details here: https://docs.kernel.org/doc-guide/kernel-doc.html But basically, either: $ ./scripts/kernel-doc -none include/linux/netdevice.h include/linux/netdevice.h:2404: warning: ... or: $ make ... W=n ... ../drivers/firewire/init_ohci1394_dma.c:178: warning: Function parameter or struct member 'ohci' not described in 'init_ohci1394_wait_for_busresets' ... -- Kees Cook
On Tue, Jul 09, 2024 at 05:54:25AM -0700, Breno Leitao wrote: > From: Alexander Lobakin <aleksander.lobakin@intel.com> > > In fact, this structure contains a flexible array at the end, but > historically its size, alignment etc., is calculated manually. > There are several instances of the structure embedded into other > structures, but also there's ongoing effort to remove them and we > could in the meantime declare &net_device properly. > Declare the array explicitly, use struct_size() and store the array > size inside the structure, so that __counted_by() can be applied. > Don't use PTR_ALIGN(), as SLUB itself tries its best to ensure the > allocated buffer is aligned to what the user expects. > Also, change its alignment from %NETDEV_ALIGN to the cacheline size > as per several suggestions on the netdev ML. > > bloat-o-meter for vmlinux: > > free_netdev 445 440 -5 > netdev_freemem 24 - -24 > alloc_netdev_mqs 1481 1450 -31 > > On x86_64 with several NICs of different vendors, I was never able to > get a &net_device pointer not aligned to the cacheline size after the > change. > > Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com> > Signed-off-by: Breno Leitao <leitao@debian.org> > Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com> Nice! I'm glad the refactoring in other drivers got tackled so this could happen. :) Reviewed-by: Kees Cook <kees@kernel.org> -- Kees Cook
On Tue, Jul 9, 2024 at 5:54 AM Breno Leitao <leitao@debian.org> wrote:
>
> From: Alexander Lobakin <aleksander.lobakin@intel.com>
>
> In fact, this structure contains a flexible array at the end, but
> historically its size, alignment etc., is calculated manually.
> There are several instances of the structure embedded into other
> structures, but also there's ongoing effort to remove them and we
> could in the meantime declare &net_device properly.
> Declare the array explicitly, use struct_size() and store the array
> size inside the structure, so that __counted_by() can be applied.
> Don't use PTR_ALIGN(), as SLUB itself tries its best to ensure the
> allocated buffer is aligned to what the user expects.
> Also, change its alignment from %NETDEV_ALIGN to the cacheline size
> as per several suggestions on the netdev ML.
>
> bloat-o-meter for vmlinux:
>
> free_netdev 445 440 -5
> netdev_freemem 24 - -24
> alloc_netdev_mqs 1481 1450 -31
>
> On x86_64 with several NICs of different vendors, I was never able to
> get a &net_device pointer not aligned to the cacheline size after the
> change.
>
> Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
> Signed-off-by: Breno Leitao <leitao@debian.org>
> Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
> ---
> Changelog:
>
> v2:
> * Rebased Alexander's patch on top of f750dfe825b90 ("ethtool: provide
> customized dim profile management").
> * Removed the ALIGN() of SMP_CACHE_BYTES for sizeof_priv.
>
> v1:
> * https://lore.kernel.org/netdev/90fd7cd7-72dc-4df6-88ec-fbc8b64735ad@intel.com
>
> include/linux/netdevice.h | 12 +++++++-----
> net/core/dev.c | 30 ++++++------------------------
> net/core/net-sysfs.c | 2 +-
> 3 files changed, 14 insertions(+), 30 deletions(-)
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 93558645c6d0..f0dd499244d4 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -2199,10 +2199,10 @@ struct net_device {
> unsigned short neigh_priv_len;
> unsigned short dev_id;
> unsigned short dev_port;
> - unsigned short padded;
> + int irq;
> + u32 priv_len;
>
> spinlock_t addr_list_lock;
> - int irq;
>
> struct netdev_hw_addr_list uc;
> struct netdev_hw_addr_list mc;
> @@ -2406,7 +2406,10 @@ struct net_device {
>
> /** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */
> struct dim_irq_moder *irq_moder;
> -};
> +
> + u8 priv[] ____cacheline_aligned
> + __counted_by(priv_len);
> +} ____cacheline_aligned;
> #define to_net_dev(d) container_of(d, struct net_device, dev)
>
> /*
> @@ -2596,7 +2599,7 @@ void dev_net_set(struct net_device *dev, struct net *net)
> */
> static inline void *netdev_priv(const struct net_device *dev)
> {
> - return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN);
> + return (void *)dev->priv;
Minor remark : the cast is not needed, but this is fine.
Reviewed-by: Eric Dumazet <edumazet@google.com>
It would be great to get rid of NETDEV_ALIGN eventually.
Thanks.
Hello Eric,
On Tue, Jul 09, 2024 at 08:27:45AM -0700, Eric Dumazet wrote:
> On Tue, Jul 9, 2024 at 5:54 AM Breno Leitao <leitao@debian.org> wrote:
> > @@ -2596,7 +2599,7 @@ void dev_net_set(struct net_device *dev, struct net *net)
> > */
> > static inline void *netdev_priv(const struct net_device *dev)
> > {
> > - return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN);
> > + return (void *)dev->priv;
>
> Minor remark : the cast is not needed, but this is fine.
In fact, the compiler is not very happy if I remove the cast:
./include/linux/netdevice.h:2603:9: error: returning 'const u8[]' (aka 'const unsigned char[]') from a function with result type 'void *' discards qualifiers [-Werror,-Wincompatible-pointer-types-discards-qualifiers]
2603 | return dev->priv;
| ^~~~~~~~~
On Wed, Jul 10, 2024 at 4:19 AM Breno Leitao <leitao@debian.org> wrote:
>
> Hello Eric,
>
> On Tue, Jul 09, 2024 at 08:27:45AM -0700, Eric Dumazet wrote:
> > On Tue, Jul 9, 2024 at 5:54 AM Breno Leitao <leitao@debian.org> wrote:
>
> > > @@ -2596,7 +2599,7 @@ void dev_net_set(struct net_device *dev, struct net *net)
> > > */
> > > static inline void *netdev_priv(const struct net_device *dev)
> > > {
> > > - return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN);
> > > + return (void *)dev->priv;
> >
> > Minor remark : the cast is not needed, but this is fine.
>
> In fact, the compiler is not very happy if I remove the cast:
>
> ./include/linux/netdevice.h:2603:9: error: returning 'const u8[]' (aka 'const unsigned char[]') from a function with result type 'void *' discards qualifiers [-Werror,-Wincompatible-pointer-types-discards-qualifiers]
> 2603 | return dev->priv;
> | ^~~~~~~~~
This is because of the ‘const’ qualifier of the parameter.
This could be solved with _Generic() later, if we want to keep the
const qualifier.
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 10 Jul 2024 10:04:39 -0700
> On Wed, Jul 10, 2024 at 4:19 AM Breno Leitao <leitao@debian.org> wrote:
>>
>> Hello Eric,
>>
>> On Tue, Jul 09, 2024 at 08:27:45AM -0700, Eric Dumazet wrote:
>>> On Tue, Jul 9, 2024 at 5:54 AM Breno Leitao <leitao@debian.org> wrote:
>>
>>>> @@ -2596,7 +2599,7 @@ void dev_net_set(struct net_device *dev, struct net *net)
>>>> */
>>>> static inline void *netdev_priv(const struct net_device *dev)
>>>> {
>>>> - return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN);
>>>> + return (void *)dev->priv;
>>>
>>> Minor remark : the cast is not needed, but this is fine.
>>
>> In fact, the compiler is not very happy if I remove the cast:
>>
>> ./include/linux/netdevice.h:2603:9: error: returning 'const u8[]' (aka 'const unsigned char[]') from a function with result type 'void *' discards qualifiers [-Werror,-Wincompatible-pointer-types-discards-qualifiers]
>> 2603 | return dev->priv;
>> | ^~~~~~~~~
>
> This is because of the ‘const’ qualifier of the parameter.
>
> This could be solved with _Generic() later, if we want to keep the
> const qualifier.
I tried _Generic() when I was working on this patch and it seems like
lots of drivers need to be fixed first. They pass a const &net_device,
but assign the result to a non-const variable and modify fields there.
That's why I bet up on this and just casted to (void *) for now.
Thanks,
Olek
On Thu, Jul 11, 2024 at 5:54 AM Alexander Lobakin <aleksander.lobakin@intel.com> wrote: > > From: Eric Dumazet <edumazet@google.com> > Date: Wed, 10 Jul 2024 10:04:39 -0700 > > > This is because of the ‘const’ qualifier of the parameter. > > > > This could be solved with _Generic() later, if we want to keep the > > const qualifier. > > I tried _Generic() when I was working on this patch and it seems like > lots of drivers need to be fixed first. They pass a const &net_device, > but assign the result to a non-const variable and modify fields there. > That's why I bet up on this and just casted to (void *) for now. Right, I will clean things up in the next cycle.
On Tue, Jul 09, 2024 at 08:27:45AM -0700, Eric Dumazet wrote:
> On Tue, Jul 9, 2024 at 5:54 AM Breno Leitao <leitao@debian.org> wrote:
> > @@ -2596,7 +2599,7 @@ void dev_net_set(struct net_device *dev, struct net *net)
> > */
> > static inline void *netdev_priv(const struct net_device *dev)
> > {
> > - return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN);
> > + return (void *)dev->priv;
> Minor remark : the cast is not needed, but this is fine.
Thanks. I will fix it in a v3, which needs to be done to address some
kernel-doc warnings identified by Simon..
> It would be great to get rid of NETDEV_ALIGN eventually.
Would you mind sharing what do you have in mind? What do you use as a
replacement?
Thanks
© 2016 - 2025 Red Hat, Inc.