Now that all the users of the page pool members in struct page have been
gone, the members can be removed from struct page. However, the space
in struct page needs to be kept using a place holder with the same size,
until struct netmem_desc has its own instance, not overlayed onto struct
page, to avoid conficting with other members within struct page.
Remove the page pool members in struct page and replace with a place
holder. The place holder should be removed once struct netmem_desc has
its own instance.
Signed-off-by: Byungchul Park <byungchul@sk.com>
---
include/linux/mm_types.h | 13 ++-----------
include/net/netmem.h | 35 +----------------------------------
include/net/netmem_type.h | 22 ++++++++++++++++++++++
3 files changed, 25 insertions(+), 45 deletions(-)
create mode 100644 include/net/netmem_type.h
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e76bade9ebb12..69904a0855358 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -20,6 +20,7 @@
#include <linux/seqlock.h>
#include <linux/percpu_counter.h>
#include <linux/types.h>
+#include <net/netmem_type.h> /* for page pool */
#include <asm/mmu.h>
@@ -118,17 +119,7 @@ struct page {
*/
unsigned long private;
};
- struct { /* page_pool used by netstack */
- /**
- * @pp_magic: magic value to avoid recycling non
- * page_pool allocated pages.
- */
- unsigned long pp_magic;
- struct page_pool *pp;
- unsigned long _pp_mapping_pad;
- unsigned long dma_addr;
- atomic_long_t pp_ref_count;
- };
+ struct __netmem_desc place_holder_1; /* for page pool */
struct { /* Tail pages of compound page */
unsigned long compound_head; /* Bit zero is set */
};
diff --git a/include/net/netmem.h b/include/net/netmem.h
index 00064e766b889..c414de6c6ab0d 100644
--- a/include/net/netmem.h
+++ b/include/net/netmem.h
@@ -10,6 +10,7 @@
#include <linux/mm.h>
#include <net/net_debug.h>
+#include <net/netmem_type.h>
/* net_iov */
@@ -20,15 +21,6 @@ DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers);
*/
#define NET_IOV 0x01UL
-struct netmem_desc {
- unsigned long __unused_padding;
- unsigned long pp_magic;
- struct page_pool *pp;
- struct net_iov_area *owner;
- unsigned long dma_addr;
- atomic_long_t pp_ref_count;
-};
-
struct net_iov_area {
/* Array of net_iovs for this area. */
struct netmem_desc *niovs;
@@ -38,31 +30,6 @@ struct net_iov_area {
unsigned long base_virtual;
};
-/* These fields in struct page are used by the page_pool and net stack:
- *
- * struct {
- * unsigned long pp_magic;
- * struct page_pool *pp;
- * unsigned long _pp_mapping_pad;
- * unsigned long dma_addr;
- * atomic_long_t pp_ref_count;
- * };
- *
- * We mirror the page_pool fields here so the page_pool can access these fields
- * without worrying whether the underlying fields belong to a page or net_iov.
- *
- * The non-net stack fields of struct page are private to the mm stack and must
- * never be mirrored to net_iov.
- */
-#define NET_IOV_ASSERT_OFFSET(pg, iov) \
- static_assert(offsetof(struct page, pg) == \
- offsetof(struct netmem_desc, iov))
-NET_IOV_ASSERT_OFFSET(pp_magic, pp_magic);
-NET_IOV_ASSERT_OFFSET(pp, pp);
-NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr);
-NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count);
-#undef NET_IOV_ASSERT_OFFSET
-
static inline struct net_iov_area *net_iov_owner(const struct netmem_desc *niov)
{
return niov->owner;
diff --git a/include/net/netmem_type.h b/include/net/netmem_type.h
new file mode 100644
index 0000000000000..6a3ac8e908515
--- /dev/null
+++ b/include/net/netmem_type.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Author: Byungchul Park <max.byungchul.park@gmail.com>
+ */
+
+#ifndef _NET_NETMEM_TYPE_H
+#define _NET_NETMEM_TYPE_H
+
+#include <linux/stddef.h>
+
+struct netmem_desc {
+ unsigned long __unused_padding;
+ struct_group_tagged(__netmem_desc, actual_data,
+ unsigned long pp_magic;
+ struct page_pool *pp;
+ struct net_iov_area *owner;
+ unsigned long dma_addr;
+ atomic_long_t pp_ref_count;
+ );
+};
+
+#endif /* _NET_NETMEM_TYPE_H */
--
2.17.1
Hi Byungchul
On Fri, 9 May 2025 at 14:51, Byungchul Park <byungchul@sk.com> wrote:
>
> Now that all the users of the page pool members in struct page have been
> gone, the members can be removed from struct page. However, the space
> in struct page needs to be kept using a place holder with the same size,
> until struct netmem_desc has its own instance, not overlayed onto struct
> page, to avoid conficting with other members within struct page.
>
FWIW similar mirroring was intially proposed [0] a few years ago
> Remove the page pool members in struct page and replace with a place
> holder. The place holder should be removed once struct netmem_desc has
> its own instance.
instance? To make sure I understand this, the netmem_descs are
expected to be allocated in the future right?
[...]
> -
> static inline struct net_iov_area *net_iov_owner(const struct netmem_desc *niov)
> {
> return niov->owner;
> diff --git a/include/net/netmem_type.h b/include/net/netmem_type.h
> new file mode 100644
> index 0000000000000..6a3ac8e908515
> --- /dev/null
> +++ b/include/net/netmem_type.h
> @@ -0,0 +1,22 @@
> +/* SPDX-License-Identifier: GPL-2.0
> + *
> + * Author: Byungchul Park <max.byungchul.park@gmail.com>
Shouldn't Minas authorship be preserved here?
> + */
> +
> +#ifndef _NET_NETMEM_TYPE_H
> +#define _NET_NETMEM_TYPE_H
> +
> +#include <linux/stddef.h>
> +
> +struct netmem_desc {
> + unsigned long __unused_padding;
> + struct_group_tagged(__netmem_desc, actual_data,
> + unsigned long pp_magic;
> + struct page_pool *pp;
> + struct net_iov_area *owner;
> + unsigned long dma_addr;
> + atomic_long_t pp_ref_count;
> + );
> +};
> +
> +#endif /* _NET_NETMEM_TYPE_H */
> --
> 2.17.1
>
[0] https://lore.kernel.org/netdev/1549550196-25581-1-git-send-email-ilias.apalodimas@linaro.org/
Thanks
/Ilias
On Sat, May 10, 2025 at 10:26:00AM +0300, Ilias Apalodimas wrote:
> Hi Byungchul
>
> On Fri, 9 May 2025 at 14:51, Byungchul Park <byungchul@sk.com> wrote:
> >
> > Now that all the users of the page pool members in struct page have been
> > gone, the members can be removed from struct page. However, the space
> > in struct page needs to be kept using a place holder with the same size,
> > until struct netmem_desc has its own instance, not overlayed onto struct
> > page, to avoid conficting with other members within struct page.
> >
>
> FWIW similar mirroring was intially proposed [0] a few years ago
>
> > Remove the page pool members in struct page and replace with a place
> > holder. The place holder should be removed once struct netmem_desc has
> > its own instance.
>
> instance? To make sure I understand this, the netmem_descs are
> expected to be allocated in the future right?
Yes.
> [...]
>
> > -
> > static inline struct net_iov_area *net_iov_owner(const struct netmem_desc *niov)
> > {
> > return niov->owner;
> > diff --git a/include/net/netmem_type.h b/include/net/netmem_type.h
> > new file mode 100644
> > index 0000000000000..6a3ac8e908515
> > --- /dev/null
> > +++ b/include/net/netmem_type.h
> > @@ -0,0 +1,22 @@
> > +/* SPDX-License-Identifier: GPL-2.0
> > + *
> > + * Author: Byungchul Park <max.byungchul.park@gmail.com>
>
> Shouldn't Minas authorship be preserved here?
Ah. I dunno what I'm supposed to do in the case. I will remove the
author part :) if it's still okay.
Byungchul
> > + */
> > +
> > +#ifndef _NET_NETMEM_TYPE_H
> > +#define _NET_NETMEM_TYPE_H
> > +
> > +#include <linux/stddef.h>
> > +
> > +struct netmem_desc {
> > + unsigned long __unused_padding;
> > + struct_group_tagged(__netmem_desc, actual_data,
> > + unsigned long pp_magic;
> > + struct page_pool *pp;
> > + struct net_iov_area *owner;
> > + unsigned long dma_addr;
> > + atomic_long_t pp_ref_count;
> > + );
> > +};
> > +
> > +#endif /* _NET_NETMEM_TYPE_H */
> > --
> > 2.17.1
> >
>
> [0] https://lore.kernel.org/netdev/1549550196-25581-1-git-send-email-ilias.apalodimas@linaro.org/
> Thanks
> /Ilias
On Fri, May 09, 2025 at 08:51:26PM +0900, Byungchul Park wrote:
> +++ b/include/linux/mm_types.h
> @@ -20,6 +20,7 @@
> #include <linux/seqlock.h>
> #include <linux/percpu_counter.h>
> #include <linux/types.h>
> +#include <net/netmem_type.h> /* for page pool */
>
> #include <asm/mmu.h>
>
> @@ -118,17 +119,7 @@ struct page {
> */
> unsigned long private;
> };
> - struct { /* page_pool used by netstack */
> - /**
> - * @pp_magic: magic value to avoid recycling non
> - * page_pool allocated pages.
> - */
> - unsigned long pp_magic;
> - struct page_pool *pp;
> - unsigned long _pp_mapping_pad;
> - unsigned long dma_addr;
> - atomic_long_t pp_ref_count;
> - };
> + struct __netmem_desc place_holder_1; /* for page pool */
> struct { /* Tail pages of compound page */
> unsigned long compound_head; /* Bit zero is set */
> };
The include and the place holder aren't needed.
> diff --git a/include/net/netmem.h b/include/net/netmem.h
> index 00064e766b889..c414de6c6ab0d 100644
> --- a/include/net/netmem.h
> +++ b/include/net/netmem.h
> @@ -10,6 +10,7 @@
>
> #include <linux/mm.h>
> #include <net/net_debug.h>
> +#include <net/netmem_type.h>
... which I think means you don't need the separate header file.
> /* net_iov */
>
> @@ -20,15 +21,6 @@ DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers);
> */
> #define NET_IOV 0x01UL
>
> -struct netmem_desc {
> - unsigned long __unused_padding;
> - unsigned long pp_magic;
> - struct page_pool *pp;
> - struct net_iov_area *owner;
> - unsigned long dma_addr;
> - atomic_long_t pp_ref_count;
> -};
> -
> struct net_iov_area {
> /* Array of net_iovs for this area. */
> struct netmem_desc *niovs;
> @@ -38,31 +30,6 @@ struct net_iov_area {
> unsigned long base_virtual;
> };
>
> -/* These fields in struct page are used by the page_pool and net stack:
> - *
> - * struct {
> - * unsigned long pp_magic;
> - * struct page_pool *pp;
> - * unsigned long _pp_mapping_pad;
> - * unsigned long dma_addr;
> - * atomic_long_t pp_ref_count;
> - * };
> - *
> - * We mirror the page_pool fields here so the page_pool can access these fields
> - * without worrying whether the underlying fields belong to a page or net_iov.
> - *
> - * The non-net stack fields of struct page are private to the mm stack and must
> - * never be mirrored to net_iov.
> - */
> -#define NET_IOV_ASSERT_OFFSET(pg, iov) \
> - static_assert(offsetof(struct page, pg) == \
> - offsetof(struct netmem_desc, iov))
> -NET_IOV_ASSERT_OFFSET(pp_magic, pp_magic);
> -NET_IOV_ASSERT_OFFSET(pp, pp);
> -NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr);
> -NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count);
> -#undef NET_IOV_ASSERT_OFFSET
... but you do want to keep asserting that netmem_desc and
net_iov have the same offsets. And you want to assert that struct page
is big enough to hold everything in netmem_desc, like we do for slab:
static_assert(sizeof(struct slab) <= sizeof(struct page));
On Fri, May 09, 2025 at 07:02:30PM +0100, Matthew Wilcox wrote:
> On Fri, May 09, 2025 at 08:51:26PM +0900, Byungchul Park wrote:
> > +++ b/include/linux/mm_types.h
> > @@ -20,6 +20,7 @@
> > #include <linux/seqlock.h>
> > #include <linux/percpu_counter.h>
> > #include <linux/types.h>
> > +#include <net/netmem_type.h> /* for page pool */
> >
> > #include <asm/mmu.h>
> >
> > @@ -118,17 +119,7 @@ struct page {
> > */
> > unsigned long private;
> > };
> > - struct { /* page_pool used by netstack */
> > - /**
> > - * @pp_magic: magic value to avoid recycling non
> > - * page_pool allocated pages.
> > - */
> > - unsigned long pp_magic;
> > - struct page_pool *pp;
> > - unsigned long _pp_mapping_pad;
> > - unsigned long dma_addr;
> > - atomic_long_t pp_ref_count;
> > - };
> > + struct __netmem_desc place_holder_1; /* for page pool */
> > struct { /* Tail pages of compound page */
> > unsigned long compound_head; /* Bit zero is set */
> > };
>
> The include and the place holder aren't needed.
Or netmem_desc overlaying struct page might be conflict with other
fields of sturct page e.g. _mapcount, _refcount and the like, once the
layout of struct page *extremly changes* in the future before
netmem_desc has its own instance.
So placing a place holder like this is the safest way, IMO, to prevent
the unextected result. Am I missing something?
> > diff --git a/include/net/netmem.h b/include/net/netmem.h
> > index 00064e766b889..c414de6c6ab0d 100644
> > --- a/include/net/netmem.h
> > +++ b/include/net/netmem.h
> > @@ -10,6 +10,7 @@
> >
> > #include <linux/mm.h>
> > #include <net/net_debug.h>
> > +#include <net/netmem_type.h>
>
> ... which I think means you don't need the separate header file.
Agree if I don't use the place holder in mm_types.h.
> > /* net_iov */
> >
> > @@ -20,15 +21,6 @@ DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers);
> > */
> > #define NET_IOV 0x01UL
> >
> > -struct netmem_desc {
> > - unsigned long __unused_padding;
> > - unsigned long pp_magic;
> > - struct page_pool *pp;
> > - struct net_iov_area *owner;
> > - unsigned long dma_addr;
> > - atomic_long_t pp_ref_count;
> > -};
> > -
> > struct net_iov_area {
> > /* Array of net_iovs for this area. */
> > struct netmem_desc *niovs;
> > @@ -38,31 +30,6 @@ struct net_iov_area {
> > unsigned long base_virtual;
> > };
> >
> > -/* These fields in struct page are used by the page_pool and net stack:
> > - *
> > - * struct {
> > - * unsigned long pp_magic;
> > - * struct page_pool *pp;
> > - * unsigned long _pp_mapping_pad;
> > - * unsigned long dma_addr;
> > - * atomic_long_t pp_ref_count;
> > - * };
> > - *
> > - * We mirror the page_pool fields here so the page_pool can access these fields
> > - * without worrying whether the underlying fields belong to a page or net_iov.
> > - *
> > - * The non-net stack fields of struct page are private to the mm stack and must
> > - * never be mirrored to net_iov.
> > - */
> > -#define NET_IOV_ASSERT_OFFSET(pg, iov) \
> > - static_assert(offsetof(struct page, pg) == \
> > - offsetof(struct netmem_desc, iov))
> > -NET_IOV_ASSERT_OFFSET(pp_magic, pp_magic);
> > -NET_IOV_ASSERT_OFFSET(pp, pp);
> > -NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr);
> > -NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count);
> > -#undef NET_IOV_ASSERT_OFFSET
>
> ... but you do want to keep asserting that netmem_desc and
> net_iov have the same offsets. And you want to assert that struct page
> is big enough to hold everything in netmem_desc, like we do for slab:
>
> static_assert(sizeof(struct slab) <= sizeof(struct page));
I will. However, as I mentioned above, the total size doesn't matter
but the layout change of struct page might matter, I think.
Byungchul
On Mon, May 12, 2025 at 09:51:03PM +0900, Byungchul Park wrote:
> On Fri, May 09, 2025 at 07:02:30PM +0100, Matthew Wilcox wrote:
> > > + struct __netmem_desc place_holder_1; /* for page pool */
> > > struct { /* Tail pages of compound page */
> > > unsigned long compound_head; /* Bit zero is set */
> > > };
> >
> > The include and the place holder aren't needed.
>
> Or netmem_desc overlaying struct page might be conflict with other
> fields of sturct page e.g. _mapcount, _refcount and the like, once the
> layout of struct page *extremly changes* in the future before
> netmem_desc has its own instance.
That's not how it'll happen. When the layout of struct page changes,
it'll shrink substantially (cut in half). That means that dynamically
allocating netmem_desc must happen first (along with slab, folio, etc,
etc).
On Mon, May 12, 2025 at 03:42:54PM +0100, Matthew Wilcox wrote:
> On Mon, May 12, 2025 at 09:51:03PM +0900, Byungchul Park wrote:
> > On Fri, May 09, 2025 at 07:02:30PM +0100, Matthew Wilcox wrote:
> > > > + struct __netmem_desc place_holder_1; /* for page pool */
> > > > struct { /* Tail pages of compound page */
> > > > unsigned long compound_head; /* Bit zero is set */
> > > > };
> > >
> > > The include and the place holder aren't needed.
> >
> > Or netmem_desc overlaying struct page might be conflict with other
> > fields of sturct page e.g. _mapcount, _refcount and the like, once the
> > layout of struct page *extremly changes* in the future before
> > netmem_desc has its own instance.
>
> That's not how it'll happen. When the layout of struct page changes,
> it'll shrink substantially (cut in half). That means that dynamically
> allocating netmem_desc must happen first (along with slab, folio, etc,
> etc).
Just in case, lemme explain what I meant, for *example*:
struct page {
unsigned long flags;
union {
struct A { /* 24 bytes */ };
struct B { /* 40 bytes */ };
struct page_pool { /* 40 bytes */ };
};
atomic_t _mapcount;
atomic_t _refcount;
unsigend long memcf_data;
...
};
/* overlayed on struct page */
struct netmem_desc { /* 8 + 40 bytes */ };
After removing page_pool fields:
struct page {
unsigned long flags;
union {
struct A { /* 24 bytes */ };
struct B { /* 40 bytes */ };
};
atomic_t _mapcount;
atomic_t _refcount;
unsigend long memcf_data;
...
};
/* overlayed on struct page */
struct netmem_desc { /* 8 + 40 bytes */ };
The above still looks okay cuz operating on struct netmem_desc is not
touching _mapcount or _refcount in struct page.
However, either the size of struct B gets reduced to 32 bytes or struct B
gets away out of struct page for some reason, it will look like:
struct page {
unsigned long flags;
union {
struct A { /* 24 bytes */ };
struct B { /* 32 bytes */ };
};
atomic_t _mapcount;
atomic_t _refcount;
unsigend long memcf_data;
...
};
/* overlayed on struct page */
struct netmem_desc { /* 8 + 40 bytes */ };
In here, operating on struct netmem_desc can smash _mapcount and
_refcount in struct page unexpectedly, even though sizeof(struct
netmem_desc) <= sizeof(struct page). That's why I think the place holder
is necessary until it completely gets separated so as to have its own
instance.
If you believe it's still okay, I will remove the place holder, I still
concern it tho.
Byungchul
On Tue, May 13, 2025 at 10:42:00AM +0900, Byungchul Park wrote: > Just in case, lemme explain what I meant, for *example*: I understood what you meant. > In here, operating on struct netmem_desc can smash _mapcount and > _refcount in struct page unexpectedly, even though sizeof(struct > netmem_desc) <= sizeof(struct page). That's why I think the place holder > is necessary until it completely gets separated so as to have its own > instance. We could tighten up the assert a bit. eg static_assert(sizeof(struct netmem_desc) <= offsetof(struct page, _refcount)); We _can't_ shrink struct page until struct folio is dynamically allocated. The same patch series that dynamically allocates folio will do the same for netmem and slab and ptdesc and ...
On Tue, May 13, 2025 at 04:19:03AM +0100, Matthew Wilcox wrote: > On Tue, May 13, 2025 at 10:42:00AM +0900, Byungchul Park wrote: > > Just in case, lemme explain what I meant, for *example*: > > I understood what you meant. > > > In here, operating on struct netmem_desc can smash _mapcount and > > _refcount in struct page unexpectedly, even though sizeof(struct > > netmem_desc) <= sizeof(struct page). That's why I think the place holder > > is necessary until it completely gets separated so as to have its own > > instance. > > We could tighten up the assert a bit. eg > > static_assert(sizeof(struct netmem_desc) <= offsetof(struct page, _refcount)); This mitigates what I concern. I will replace the place holder with this (but it must never happen to relocate the fields in struct page by any chance for any reason until the day. I trust you :). Byungchul > We _can't_ shrink struct page until struct folio is dynamically > allocated. The same patch series that dynamically allocates folio will > do the same for netmem and slab and ptdesc and ...
On Fri, May 9, 2025 at 4:51 AM Byungchul Park <byungchul@sk.com> wrote:
>
> Now that all the users of the page pool members in struct page have been
> gone, the members can be removed from struct page. However, the space
> in struct page needs to be kept using a place holder with the same size,
> until struct netmem_desc has its own instance, not overlayed onto struct
> page, to avoid conficting with other members within struct page.
>
> Remove the page pool members in struct page and replace with a place
> holder. The place holder should be removed once struct netmem_desc has
> its own instance.
>
> Signed-off-by: Byungchul Park <byungchul@sk.com>
> ---
> include/linux/mm_types.h | 13 ++-----------
> include/net/netmem.h | 35 +----------------------------------
> include/net/netmem_type.h | 22 ++++++++++++++++++++++
> 3 files changed, 25 insertions(+), 45 deletions(-)
> create mode 100644 include/net/netmem_type.h
>
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index e76bade9ebb12..69904a0855358 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -20,6 +20,7 @@
> #include <linux/seqlock.h>
> #include <linux/percpu_counter.h>
> #include <linux/types.h>
> +#include <net/netmem_type.h> /* for page pool */
>
> #include <asm/mmu.h>
>
> @@ -118,17 +119,7 @@ struct page {
> */
> unsigned long private;
> };
> - struct { /* page_pool used by netstack */
> - /**
> - * @pp_magic: magic value to avoid recycling non
> - * page_pool allocated pages.
> - */
> - unsigned long pp_magic;
> - struct page_pool *pp;
> - unsigned long _pp_mapping_pad;
> - unsigned long dma_addr;
> - atomic_long_t pp_ref_count;
> - };
> + struct __netmem_desc place_holder_1; /* for page pool */
> struct { /* Tail pages of compound page */
> unsigned long compound_head; /* Bit zero is set */
> };
> diff --git a/include/net/netmem.h b/include/net/netmem.h
> index 00064e766b889..c414de6c6ab0d 100644
> --- a/include/net/netmem.h
> +++ b/include/net/netmem.h
> @@ -10,6 +10,7 @@
>
> #include <linux/mm.h>
> #include <net/net_debug.h>
> +#include <net/netmem_type.h>
>
> /* net_iov */
>
> @@ -20,15 +21,6 @@ DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers);
> */
> #define NET_IOV 0x01UL
>
> -struct netmem_desc {
> - unsigned long __unused_padding;
> - unsigned long pp_magic;
> - struct page_pool *pp;
> - struct net_iov_area *owner;
> - unsigned long dma_addr;
> - atomic_long_t pp_ref_count;
> -};
> -
> struct net_iov_area {
> /* Array of net_iovs for this area. */
> struct netmem_desc *niovs;
> @@ -38,31 +30,6 @@ struct net_iov_area {
> unsigned long base_virtual;
> };
>
> -/* These fields in struct page are used by the page_pool and net stack:
> - *
> - * struct {
> - * unsigned long pp_magic;
> - * struct page_pool *pp;
> - * unsigned long _pp_mapping_pad;
> - * unsigned long dma_addr;
> - * atomic_long_t pp_ref_count;
> - * };
> - *
> - * We mirror the page_pool fields here so the page_pool can access these fields
> - * without worrying whether the underlying fields belong to a page or net_iov.
> - *
> - * The non-net stack fields of struct page are private to the mm stack and must
> - * never be mirrored to net_iov.
> - */
> -#define NET_IOV_ASSERT_OFFSET(pg, iov) \
> - static_assert(offsetof(struct page, pg) == \
> - offsetof(struct netmem_desc, iov))
> -NET_IOV_ASSERT_OFFSET(pp_magic, pp_magic);
> -NET_IOV_ASSERT_OFFSET(pp, pp);
> -NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr);
> -NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count);
> -#undef NET_IOV_ASSERT_OFFSET
> -
> static inline struct net_iov_area *net_iov_owner(const struct netmem_desc *niov)
> {
> return niov->owner;
> diff --git a/include/net/netmem_type.h b/include/net/netmem_type.h
> new file mode 100644
> index 0000000000000..6a3ac8e908515
> --- /dev/null
> +++ b/include/net/netmem_type.h
> @@ -0,0 +1,22 @@
> +/* SPDX-License-Identifier: GPL-2.0
> + *
> + * Author: Byungchul Park <max.byungchul.park@gmail.com>
> + */
> +
> +#ifndef _NET_NETMEM_TYPE_H
> +#define _NET_NETMEM_TYPE_H
> +
> +#include <linux/stddef.h>
> +
> +struct netmem_desc {
> + unsigned long __unused_padding;
> + struct_group_tagged(__netmem_desc, actual_data,
> + unsigned long pp_magic;
> + struct page_pool *pp;
> + struct net_iov_area *owner;
> + unsigned long dma_addr;
> + atomic_long_t pp_ref_count;
> + );
> +};
> +
> +#endif /* _NET_NETMEM_TYPE_H */
> --
> 2.17.1
>
Currently the only restriction on net_iov is that some of its fields
need to be cache aligned with some of the fields of struct page, but
there is no restriction on new fields added to net_iov. We already
have fields in net_iov that have nothing to do with struct page and
shouldn't be part of struct page. Like net_iov_area *owner. I don't
think net_iov_area should be part of struct page and I don't think we
should add restrictions of net_iov.
What I would suggest here is, roughly:
1. Add a new struct:
struct netmem_desc {
unsigned long pp_magic;
struct page_pool *pp;
unsigned long _pp_mapping_pad;
unsigned long dma_addr;
atomic_long_t pp_ref_count;
};
2. Then update struct page to include this entry instead of the definitions:
struct page {
...
struct netmem_desc place_holder_1; /* for page pool */
...
}
3. And update struct net_iov to also include netmem_desc:
struct net_iov {
struct netmem_desc desc;
struct net_iov_area *owner;
/* More net_iov specific fields in the future */
};
And drop patch 1 which does a rename.
Essentially netmem_desc can be an encapsulation of the shared fields
between struct page and struct net_iov.
--
Thanks,
Mina
On Fri, May 09, 2025 at 10:32:08AM -0700, Mina Almasry wrote:
> Currently the only restriction on net_iov is that some of its fields
> need to be cache aligned with some of the fields of struct page, but
Cache aligned? Do you mean alias (ie be at the same offset)?
> What I would suggest here is, roughly:
>
> 1. Add a new struct:
>
> struct netmem_desc {
> unsigned long pp_magic;
> struct page_pool *pp;
> unsigned long _pp_mapping_pad;
> unsigned long dma_addr;
> atomic_long_t pp_ref_count;
> };
>
> 2. Then update struct page to include this entry instead of the definitions:
>
> struct page {
> ...
> struct netmem_desc place_holder_1; /* for page pool */
> ...
> }
No, the point is to move these fields out of struct page entirely.
At some point (probably this year), we'll actually kmalloc the netmem_desc
(and shrink struct page), but for now, it'll overlap the other fields
in struct page.
> 3. And update struct net_iov to also include netmem_desc:
>
> struct net_iov {
> struct netmem_desc desc;
> struct net_iov_area *owner;
> /* More net_iov specific fields in the future */
> };
>
> And drop patch 1 which does a rename.
>
> Essentially netmem_desc can be an encapsulation of the shared fields
> between struct page and struct net_iov.
That is not the goal.
On Fri, May 9, 2025 at 11:11 AM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Fri, May 09, 2025 at 10:32:08AM -0700, Mina Almasry wrote:
> > Currently the only restriction on net_iov is that some of its fields
> > need to be cache aligned with some of the fields of struct page, but
>
> Cache aligned? Do you mean alias (ie be at the same offset)?
>
> > What I would suggest here is, roughly:
> >
> > 1. Add a new struct:
> >
> > struct netmem_desc {
> > unsigned long pp_magic;
> > struct page_pool *pp;
> > unsigned long _pp_mapping_pad;
> > unsigned long dma_addr;
> > atomic_long_t pp_ref_count;
> > };
> >
> > 2. Then update struct page to include this entry instead of the definitions:
> >
> > struct page {
> > ...
> > struct netmem_desc place_holder_1; /* for page pool */
> > ...
> > }
>
> No, the point is to move these fields out of struct page entirely.
>
> At some point (probably this year), we'll actually kmalloc the netmem_desc
> (and shrink struct page), but for now, it'll overlap the other fields
> in struct page.
>
Right, all I'm saying is that if it's at all possible to keep net_iov
something that can be extended with fields unrelated to struct page,
lets do that. net_iov already has fields that should not belong in
struct page like net_iov_owner and I think more will be added.
I'm thinking netmem_desc can be the fields that are shared between
struct net_iov and struct page (but both can have more specific to the
different memory types). As you say, for now netmem_desc can currently
overlap fields in struct page and struct net_iov, and a follow up
change can replace it with something that gets kmalloced and (I
guess?) there is a pointer in struct page or struct net_iov that
refers to the netmem_desc that contains the shared fields.
--
Thanks,
Mina
On Fri, May 09, 2025 at 12:04:37PM -0700, Mina Almasry wrote: > Right, all I'm saying is that if it's at all possible to keep net_iov > something that can be extended with fields unrelated to struct page, > lets do that. net_iov already has fields that should not belong in > struct page like net_iov_owner and I think more will be added. Sure, that's fine. > I'm thinking netmem_desc can be the fields that are shared between > struct net_iov and struct page (but both can have more specific to the > different memory types). As you say, for now netmem_desc can currently > overlap fields in struct page and struct net_iov, and a follow up > change can replace it with something that gets kmalloced and (I > guess?) there is a pointer in struct page or struct net_iov that > refers to the netmem_desc that contains the shared fields. I'm sure I've pointed you at https://kernelnewbies.org/MatthewWilcox/Memdescs before. But I wouldn't expect to have net_iov contain a pointer to netmem_desc, rather it would embed a netmem_desc. Unless there's a good reason to separate them. Actually, I'd hope to do away with net_iov entirely. Networking should handle memory-on-PCI-devices the same way everybody else does (as hotplugged memory) rather than with its own special structures.
On Fri, May 9, 2025 at 12:48 PM Matthew Wilcox <willy@infradead.org> wrote: > > On Fri, May 09, 2025 at 12:04:37PM -0700, Mina Almasry wrote: > > Right, all I'm saying is that if it's at all possible to keep net_iov > > something that can be extended with fields unrelated to struct page, > > lets do that. net_iov already has fields that should not belong in > > struct page like net_iov_owner and I think more will be added. > > Sure, that's fine. > Excellent! > > I'm thinking netmem_desc can be the fields that are shared between > > struct net_iov and struct page (but both can have more specific to the > > different memory types). As you say, for now netmem_desc can currently > > overlap fields in struct page and struct net_iov, and a follow up > > change can replace it with something that gets kmalloced and (I > > guess?) there is a pointer in struct page or struct net_iov that > > refers to the netmem_desc that contains the shared fields. > > I'm sure I've pointed you at > https://kernelnewbies.org/MatthewWilcox/Memdescs before. > I've gone through that again. Some of it is a bit over my head (sorry), but this page does say that page->compound_head will have a link to memdesc: https://kernelnewbies.org/MatthewWilcox/Memdescs/Path That's an approach that sounds fine to me. We can have net_iov follow that pattern if necessary (have in it a field that points to the memdesc). > But I wouldn't expect to have net_iov contain a pointer to netmem_desc, > rather it would embed a netmem_desc. Unless there's a good reason to > separate them. > net_iov embedding netmem_desc sounds fine as well to me. > Actually, I'd hope to do away with net_iov entirely. Networking should > handle memory-on-PCI-devices the same way everybody else does (as > hotplugged memory) rather than with its own special structures. > Doing away with net_iov entirely is a different conversation. From the devmem TCP side, you're much more of an expert than me but my experience is that the GPU devices we initially net_iovs for, dmabuf is the standard way of sharing memory, and the dma-buf importer just gets a scatterlist, and has to either work with the scatterlist directly or create descriptors (like net_iov) to handle chunks of the scatterlist. I think we discussed this before and you said to me you have long term plans to get rid of scatterlists. Once that is done we may be able to do away with the dma-buf use case for net_iovs, but the conversation about migrating scatterlists to something new is well over my head and probably needs discussion with the dma-buf maintainers. Note also that the users of net_iov have expanded and io_uring has a dependency on it as well. The good news (I think) is that Byungchul's effort does not require the removal of net_iov. From looking at this patchset I think what he's trying to do is very compatible with net_iovs with minor modifications. -- Thanks, Mina
© 2016 - 2025 Red Hat, Inc.