To simplify struct page, the page pool members of struct page should be
moved to other, allowing these members to be removed from struct page.
Introduce a network memory descriptor to store the members, struct
netmem_desc, reusing struct net_iov that already mirrored struct page.
While at it, relocate _pp_mapping_pad to group struct net_iov's fields.
Signed-off-by: Byungchul Park <byungchul@sk.com>
---
include/linux/mm_types.h | 2 +-
include/net/netmem.h | 43 +++++++++++++++++++++++++++++++++-------
2 files changed, 37 insertions(+), 8 deletions(-)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 56d07edd01f9..873e820e1521 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -120,13 +120,13 @@ struct page {
unsigned long private;
};
struct { /* page_pool used by netstack */
+ unsigned long _pp_mapping_pad;
/**
* @pp_magic: magic value to avoid recycling non
* page_pool allocated pages.
*/
unsigned long pp_magic;
struct page_pool *pp;
- unsigned long _pp_mapping_pad;
unsigned long dma_addr;
atomic_long_t pp_ref_count;
};
diff --git a/include/net/netmem.h b/include/net/netmem.h
index 386164fb9c18..08e9d76cdf14 100644
--- a/include/net/netmem.h
+++ b/include/net/netmem.h
@@ -31,12 +31,41 @@ enum net_iov_type {
};
struct net_iov {
- enum net_iov_type type;
- unsigned long pp_magic;
- struct page_pool *pp;
- struct net_iov_area *owner;
- unsigned long dma_addr;
- atomic_long_t pp_ref_count;
+ /*
+ * XXX: Now that struct netmem_desc overlays on struct page,
+ * struct_group_tagged() should cover all of them. However,
+ * a separate struct netmem_desc should be declared and embedded,
+ * once struct netmem_desc is no longer overlayed but it has its
+ * own instance from slab. The final form should be:
+ *
+ * struct netmem_desc {
+ * unsigned long pp_magic;
+ * struct page_pool *pp;
+ * unsigned long dma_addr;
+ * atomic_long_t pp_ref_count;
+ * };
+ *
+ * struct net_iov {
+ * enum net_iov_type type;
+ * struct net_iov_area *owner;
+ * struct netmem_desc;
+ * };
+ */
+ struct_group_tagged(netmem_desc, desc,
+ /*
+ * only for struct net_iov
+ */
+ enum net_iov_type type;
+ struct net_iov_area *owner;
+
+ /*
+ * actually for struct netmem_desc
+ */
+ unsigned long pp_magic;
+ struct page_pool *pp;
+ unsigned long dma_addr;
+ atomic_long_t pp_ref_count;
+ );
};
struct net_iov_area {
@@ -51,9 +80,9 @@ struct net_iov_area {
/* These fields in struct page are used by the page_pool and net stack:
*
* struct {
+ * unsigned long _pp_mapping_pad;
* unsigned long pp_magic;
* struct page_pool *pp;
- * unsigned long _pp_mapping_pad;
* unsigned long dma_addr;
* atomic_long_t pp_ref_count;
* };
--
2.17.1
On Fri, May 23, 2025 at 12:25:52PM +0900, Byungchul Park wrote:
> To simplify struct page, the page pool members of struct page should be
> moved to other, allowing these members to be removed from struct page.
>
> Introduce a network memory descriptor to store the members, struct
> netmem_desc, reusing struct net_iov that already mirrored struct page.
>
> While at it, relocate _pp_mapping_pad to group struct net_iov's fields.
>
> Signed-off-by: Byungchul Park <byungchul@sk.com>
> ---
> include/linux/mm_types.h | 2 +-
> include/net/netmem.h | 43 +++++++++++++++++++++++++++++++++-------
> 2 files changed, 37 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 56d07edd01f9..873e820e1521 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -120,13 +120,13 @@ struct page {
> unsigned long private;
> };
> struct { /* page_pool used by netstack */
> + unsigned long _pp_mapping_pad;
> /**
> * @pp_magic: magic value to avoid recycling non
> * page_pool allocated pages.
> */
> unsigned long pp_magic;
> struct page_pool *pp;
> - unsigned long _pp_mapping_pad;
> unsigned long dma_addr;
> atomic_long_t pp_ref_count;
> };
> diff --git a/include/net/netmem.h b/include/net/netmem.h
> index 386164fb9c18..08e9d76cdf14 100644
> --- a/include/net/netmem.h
> +++ b/include/net/netmem.h
> @@ -31,12 +31,41 @@ enum net_iov_type {
> };
>
> struct net_iov {
> - enum net_iov_type type;
> - unsigned long pp_magic;
> - struct page_pool *pp;
> - struct net_iov_area *owner;
> - unsigned long dma_addr;
> - atomic_long_t pp_ref_count;
> + /*
> + * XXX: Now that struct netmem_desc overlays on struct page,
> + * struct_group_tagged() should cover all of them. However,
> + * a separate struct netmem_desc should be declared and embedded,
> + * once struct netmem_desc is no longer overlayed but it has its
> + * own instance from slab. The final form should be:
> + *
> + * struct netmem_desc {
> + * unsigned long pp_magic;
> + * struct page_pool *pp;
> + * unsigned long dma_addr;
> + * atomic_long_t pp_ref_count;
> + * };
> + *
> + * struct net_iov {
> + * enum net_iov_type type;
> + * struct net_iov_area *owner;
> + * struct netmem_desc;
> + * };
> + */
> + struct_group_tagged(netmem_desc, desc,
So.. For now, this is the best option we can pick. We can do all that
you told me once struct netmem_desc has it own instance from slab.
Again, it's because the page pool fields (or netmem things) from struct
page will be gone by this series.
Mina, thoughts?
Byungchul
> + /*
> + * only for struct net_iov
> + */
> + enum net_iov_type type;
> + struct net_iov_area *owner;
> +
> + /*
> + * actually for struct netmem_desc
> + */
> + unsigned long pp_magic;
> + struct page_pool *pp;
> + unsigned long dma_addr;
> + atomic_long_t pp_ref_count;
> + );
> };
>
> struct net_iov_area {
> @@ -51,9 +80,9 @@ struct net_iov_area {
> /* These fields in struct page are used by the page_pool and net stack:
> *
> * struct {
> + * unsigned long _pp_mapping_pad;
> * unsigned long pp_magic;
> * struct page_pool *pp;
> - * unsigned long _pp_mapping_pad;
> * unsigned long dma_addr;
> * atomic_long_t pp_ref_count;
> * };
> --
> 2.17.1
On Mon, May 26, 2025 at 7:50 PM Byungchul Park <byungchul@sk.com> wrote:
>
> On Fri, May 23, 2025 at 12:25:52PM +0900, Byungchul Park wrote:
> > To simplify struct page, the page pool members of struct page should be
> > moved to other, allowing these members to be removed from struct page.
> >
> > Introduce a network memory descriptor to store the members, struct
> > netmem_desc, reusing struct net_iov that already mirrored struct page.
> >
> > While at it, relocate _pp_mapping_pad to group struct net_iov's fields.
> >
> > Signed-off-by: Byungchul Park <byungchul@sk.com>
> > ---
> > include/linux/mm_types.h | 2 +-
> > include/net/netmem.h | 43 +++++++++++++++++++++++++++++++++-------
> > 2 files changed, 37 insertions(+), 8 deletions(-)
> >
> > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> > index 56d07edd01f9..873e820e1521 100644
> > --- a/include/linux/mm_types.h
> > +++ b/include/linux/mm_types.h
> > @@ -120,13 +120,13 @@ struct page {
> > unsigned long private;
> > };
> > struct { /* page_pool used by netstack */
> > + unsigned long _pp_mapping_pad;
> > /**
> > * @pp_magic: magic value to avoid recycling non
> > * page_pool allocated pages.
> > */
> > unsigned long pp_magic;
> > struct page_pool *pp;
> > - unsigned long _pp_mapping_pad;
> > unsigned long dma_addr;
> > atomic_long_t pp_ref_count;
> > };
> > diff --git a/include/net/netmem.h b/include/net/netmem.h
> > index 386164fb9c18..08e9d76cdf14 100644
> > --- a/include/net/netmem.h
> > +++ b/include/net/netmem.h
> > @@ -31,12 +31,41 @@ enum net_iov_type {
> > };
> >
> > struct net_iov {
> > - enum net_iov_type type;
> > - unsigned long pp_magic;
> > - struct page_pool *pp;
> > - struct net_iov_area *owner;
> > - unsigned long dma_addr;
> > - atomic_long_t pp_ref_count;
> > + /*
> > + * XXX: Now that struct netmem_desc overlays on struct page,
> > + * struct_group_tagged() should cover all of them. However,
> > + * a separate struct netmem_desc should be declared and embedded,
> > + * once struct netmem_desc is no longer overlayed but it has its
> > + * own instance from slab. The final form should be:
> > + *
> > + * struct netmem_desc {
> > + * unsigned long pp_magic;
> > + * struct page_pool *pp;
> > + * unsigned long dma_addr;
> > + * atomic_long_t pp_ref_count;
> > + * };
> > + *
> > + * struct net_iov {
> > + * enum net_iov_type type;
> > + * struct net_iov_area *owner;
> > + * struct netmem_desc;
> > + * };
> > + */
> > + struct_group_tagged(netmem_desc, desc,
>
> So.. For now, this is the best option we can pick. We can do all that
> you told me once struct netmem_desc has it own instance from slab.
>
> Again, it's because the page pool fields (or netmem things) from struct
> page will be gone by this series.
>
> Mina, thoughts?
>
Can you please post an updated series with the approach you have in
mind? I think this series as-is seems broken vis-a-vie the
_pp_padding_map param move that looks incorrect. Pavel and I have also
commented on patch 18 that removing the ASSERTS seems incorrect as
it's breaking the symmetry between struct page and struct net_iov.
It's not clear to me if the fields are being removed from struct page,
where are they going... the approach ptdesc for example has taken is
to create a mirror of struct page, then show via asserts that the
mirror is equivalent to struct page, AFAIU:
https://elixir.bootlin.com/linux/v6.14.3/source/include/linux/mm_types.h#L437
Also the same approach for zpdesc:
https://elixir.bootlin.com/linux/v6.14.3/source/mm/zpdesc.h#L29
In this series you're removing the entries from struct page, I'm not
really sure where they went, and you're removing the asserts that we
have between net_iov and struct page so we're not even sure that those
are in sync anymore. I would suggest for me at least reposting with
the new types you have in mind and with clear asserts showing what is
meant to be in sync (and overlay) what.
--
Thanks,
Mina
On Tue, May 27, 2025 at 01:03:32PM -0700, Mina Almasry wrote:
> On Mon, May 26, 2025 at 7:50 PM Byungchul Park <byungchul@sk.com> wrote:
> >
> > On Fri, May 23, 2025 at 12:25:52PM +0900, Byungchul Park wrote:
> > > To simplify struct page, the page pool members of struct page should be
> > > moved to other, allowing these members to be removed from struct page.
> > >
> > > Introduce a network memory descriptor to store the members, struct
> > > netmem_desc, reusing struct net_iov that already mirrored struct page.
> > >
> > > While at it, relocate _pp_mapping_pad to group struct net_iov's fields.
> > >
> > > Signed-off-by: Byungchul Park <byungchul@sk.com>
> > > ---
> > > include/linux/mm_types.h | 2 +-
> > > include/net/netmem.h | 43 +++++++++++++++++++++++++++++++++-------
> > > 2 files changed, 37 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> > > index 56d07edd01f9..873e820e1521 100644
> > > --- a/include/linux/mm_types.h
> > > +++ b/include/linux/mm_types.h
> > > @@ -120,13 +120,13 @@ struct page {
> > > unsigned long private;
> > > };
> > > struct { /* page_pool used by netstack */
> > > + unsigned long _pp_mapping_pad;
> > > /**
> > > * @pp_magic: magic value to avoid recycling non
> > > * page_pool allocated pages.
> > > */
> > > unsigned long pp_magic;
> > > struct page_pool *pp;
> > > - unsigned long _pp_mapping_pad;
> > > unsigned long dma_addr;
> > > atomic_long_t pp_ref_count;
> > > };
> > > diff --git a/include/net/netmem.h b/include/net/netmem.h
> > > index 386164fb9c18..08e9d76cdf14 100644
> > > --- a/include/net/netmem.h
> > > +++ b/include/net/netmem.h
> > > @@ -31,12 +31,41 @@ enum net_iov_type {
> > > };
> > >
> > > struct net_iov {
> > > - enum net_iov_type type;
> > > - unsigned long pp_magic;
> > > - struct page_pool *pp;
> > > - struct net_iov_area *owner;
> > > - unsigned long dma_addr;
> > > - atomic_long_t pp_ref_count;
> > > + /*
> > > + * XXX: Now that struct netmem_desc overlays on struct page,
> > > + * struct_group_tagged() should cover all of them. However,
> > > + * a separate struct netmem_desc should be declared and embedded,
> > > + * once struct netmem_desc is no longer overlayed but it has its
> > > + * own instance from slab. The final form should be:
> > > + *
> > > + * struct netmem_desc {
> > > + * unsigned long pp_magic;
> > > + * struct page_pool *pp;
> > > + * unsigned long dma_addr;
> > > + * atomic_long_t pp_ref_count;
> > > + * };
> > > + *
> > > + * struct net_iov {
> > > + * enum net_iov_type type;
> > > + * struct net_iov_area *owner;
> > > + * struct netmem_desc;
> > > + * };
> > > + */
> > > + struct_group_tagged(netmem_desc, desc,
> >
> > So.. For now, this is the best option we can pick. We can do all that
> > you told me once struct netmem_desc has it own instance from slab.
> >
> > Again, it's because the page pool fields (or netmem things) from struct
> > page will be gone by this series.
> >
> > Mina, thoughts?
> >
>
> Can you please post an updated series with the approach you have in
> mind? I think this series as-is seems broken vis-a-vie the
> _pp_padding_map param move that looks incorrect. Pavel and I have also
> commented on patch 18 that removing the ASSERTS seems incorrect as
> it's breaking the symmetry between struct page and struct net_iov.
I told you I will fix it. I will send the updated series shortly for
*review*. However, it will be for review since we know this work can be
completed once the next works have been done:
https://lore.kernel.org/all/20250520205920.2134829-2-anthony.l.nguyen@intel.com/
https://lore.kernel.org/all/1747950086-1246773-9-git-send-email-tariqt@nvidia.com/
> It's not clear to me if the fields are being removed from struct page,
> where are they going... the approach ptdesc for example has taken is
They are going to struct net_iov. Or I should introduce another struct
mirroring struct page as ptdesc did, that would be the exact same as
struct net_iov. Do you think I should do that?
> to create a mirror of struct page, then show via asserts that the
> mirror is equivalent to struct page, AFAIU:
>
> https://elixir.bootlin.com/linux/v6.14.3/source/include/linux/mm_types.h#L437
>
> Also the same approach for zpdesc:
>
> https://elixir.bootlin.com/linux/v6.14.3/source/mm/zpdesc.h#L29
Okay, again. Thanks.
Byungchul
> In this series you're removing the entries from struct page, I'm not
> really sure where they went, and you're removing the asserts that we
> have between net_iov and struct page so we're not even sure that those
> are in sync anymore. I would suggest for me at least reposting with
> the new types you have in mind and with clear asserts showing what is
> meant to be in sync (and overlay) what.
>
> --
> Thanks,
> Mina
On 5/28/25 02:21, Byungchul Park wrote: >>> So.. For now, this is the best option we can pick. We can do all that >>> you told me once struct netmem_desc has it own instance from slab. >>> >>> Again, it's because the page pool fields (or netmem things) from struct >>> page will be gone by this series. >>> >>> Mina, thoughts? >>> >> >> Can you please post an updated series with the approach you have in >> mind? I think this series as-is seems broken vis-a-vie the >> _pp_padding_map param move that looks incorrect. Pavel and I have also >> commented on patch 18 that removing the ASSERTS seems incorrect as >> it's breaking the symmetry between struct page and struct net_iov. > > I told you I will fix it. I will send the updated series shortly for > *review*. However, it will be for review since we know this work can be > completed once the next works have been done: Please don't forget to tag it with "RFC", otherwise nobody will assume it's for for review only. -- Pavel Begunkov
On Tue, May 27, 2025 at 6:22 PM Byungchul Park <byungchul@sk.com> wrote:
>
> On Tue, May 27, 2025 at 01:03:32PM -0700, Mina Almasry wrote:
> > On Mon, May 26, 2025 at 7:50 PM Byungchul Park <byungchul@sk.com> wrote:
> > >
> > > On Fri, May 23, 2025 at 12:25:52PM +0900, Byungchul Park wrote:
> > > > To simplify struct page, the page pool members of struct page should be
> > > > moved to other, allowing these members to be removed from struct page.
> > > >
> > > > Introduce a network memory descriptor to store the members, struct
> > > > netmem_desc, reusing struct net_iov that already mirrored struct page.
> > > >
> > > > While at it, relocate _pp_mapping_pad to group struct net_iov's fields.
> > > >
> > > > Signed-off-by: Byungchul Park <byungchul@sk.com>
> > > > ---
> > > > include/linux/mm_types.h | 2 +-
> > > > include/net/netmem.h | 43 +++++++++++++++++++++++++++++++++-------
> > > > 2 files changed, 37 insertions(+), 8 deletions(-)
> > > >
> > > > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> > > > index 56d07edd01f9..873e820e1521 100644
> > > > --- a/include/linux/mm_types.h
> > > > +++ b/include/linux/mm_types.h
> > > > @@ -120,13 +120,13 @@ struct page {
> > > > unsigned long private;
> > > > };
> > > > struct { /* page_pool used by netstack */
> > > > + unsigned long _pp_mapping_pad;
> > > > /**
> > > > * @pp_magic: magic value to avoid recycling non
> > > > * page_pool allocated pages.
> > > > */
> > > > unsigned long pp_magic;
> > > > struct page_pool *pp;
> > > > - unsigned long _pp_mapping_pad;
> > > > unsigned long dma_addr;
> > > > atomic_long_t pp_ref_count;
> > > > };
> > > > diff --git a/include/net/netmem.h b/include/net/netmem.h
> > > > index 386164fb9c18..08e9d76cdf14 100644
> > > > --- a/include/net/netmem.h
> > > > +++ b/include/net/netmem.h
> > > > @@ -31,12 +31,41 @@ enum net_iov_type {
> > > > };
> > > >
> > > > struct net_iov {
> > > > - enum net_iov_type type;
> > > > - unsigned long pp_magic;
> > > > - struct page_pool *pp;
> > > > - struct net_iov_area *owner;
> > > > - unsigned long dma_addr;
> > > > - atomic_long_t pp_ref_count;
> > > > + /*
> > > > + * XXX: Now that struct netmem_desc overlays on struct page,
> > > > + * struct_group_tagged() should cover all of them. However,
> > > > + * a separate struct netmem_desc should be declared and embedded,
> > > > + * once struct netmem_desc is no longer overlayed but it has its
> > > > + * own instance from slab. The final form should be:
> > > > + *
> > > > + * struct netmem_desc {
> > > > + * unsigned long pp_magic;
> > > > + * struct page_pool *pp;
> > > > + * unsigned long dma_addr;
> > > > + * atomic_long_t pp_ref_count;
> > > > + * };
> > > > + *
> > > > + * struct net_iov {
> > > > + * enum net_iov_type type;
> > > > + * struct net_iov_area *owner;
> > > > + * struct netmem_desc;
> > > > + * };
> > > > + */
> > > > + struct_group_tagged(netmem_desc, desc,
> > >
> > > So.. For now, this is the best option we can pick. We can do all that
> > > you told me once struct netmem_desc has it own instance from slab.
> > >
> > > Again, it's because the page pool fields (or netmem things) from struct
> > > page will be gone by this series.
> > >
> > > Mina, thoughts?
> > >
> >
> > Can you please post an updated series with the approach you have in
> > mind? I think this series as-is seems broken vis-a-vie the
> > _pp_padding_map param move that looks incorrect. Pavel and I have also
> > commented on patch 18 that removing the ASSERTS seems incorrect as
> > it's breaking the symmetry between struct page and struct net_iov.
>
> I told you I will fix it. I will send the updated series shortly for
> *review*. However, it will be for review since we know this work can be
> completed once the next works have been done:
>
> https://lore.kernel.org/all/20250520205920.2134829-2-anthony.l.nguyen@intel.com/
> https://lore.kernel.org/all/1747950086-1246773-9-git-send-email-tariqt@nvidia.com/
>
> > It's not clear to me if the fields are being removed from struct page,
> > where are they going... the approach ptdesc for example has taken is
>
> They are going to struct net_iov.
Oh. I see. My gut reaction is I'm not sure moving the page_pool fields
to struct net_iov will work.
struct net_iov shares some fields with struct page, but abstractly
it's very different.
struct page is allocated by the mm stack via things like alloc_pages
and can be passed to mm apis such as put_page() (called from
skb_frag_ref) and vm_insert_batch (called from
tcp_zerocopy_vm_insert_batch_error).
struct net_iov is kvmalloced by networking code (see
net_devmem_bind_dmabuf for example), and *must not* be passed to any
mm apis as it's not a struct page at all. Accidentally calling
vm_insert_batch on a struct net_iov will cause a kernel crash or some
memory corruption.
Thus abstractly different things maybe should not share the same
in-kernel struct.
One thing that maybe could work is if struct net_iov has a field in it
which tells us whether it's actually a struct page that can be passed
to mm apis, or not a struct page which cannot be passed to mm apis.
> Or I should introduce another struct
maybe introducing another struct is the answer. I'm not sure. The net
stack today already supports struct page and struct net_iov, with
netmem_ref acting as an abstraction over both. Adding a 3rd struct and
adding more checks to test if page or net_iov or something new will
add overhead.
An additional problem is that there are probably hundreds or thousands
of references to 'page' in the net stack and drivers. I'm not sure
what you're going to do about those. Are you converting all those to
netmem or netmem_desc?
--
Thanks,
Mina
On Tue, May 27, 2025 at 08:47:54PM -0700, Mina Almasry wrote:
> On Tue, May 27, 2025 at 6:22 PM Byungchul Park <byungchul@sk.com> wrote:
> >
> > On Tue, May 27, 2025 at 01:03:32PM -0700, Mina Almasry wrote:
> > > On Mon, May 26, 2025 at 7:50 PM Byungchul Park <byungchul@sk.com> wrote:
> > > >
> > > > On Fri, May 23, 2025 at 12:25:52PM +0900, Byungchul Park wrote:
> > > > > To simplify struct page, the page pool members of struct page should be
> > > > > moved to other, allowing these members to be removed from struct page.
> > > > >
> > > > > Introduce a network memory descriptor to store the members, struct
> > > > > netmem_desc, reusing struct net_iov that already mirrored struct page.
> > > > >
> > > > > While at it, relocate _pp_mapping_pad to group struct net_iov's fields.
> > > > >
> > > > > Signed-off-by: Byungchul Park <byungchul@sk.com>
> > > > > ---
> > > > > include/linux/mm_types.h | 2 +-
> > > > > include/net/netmem.h | 43 +++++++++++++++++++++++++++++++++-------
> > > > > 2 files changed, 37 insertions(+), 8 deletions(-)
> > > > >
> > > > > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> > > > > index 56d07edd01f9..873e820e1521 100644
> > > > > --- a/include/linux/mm_types.h
> > > > > +++ b/include/linux/mm_types.h
> > > > > @@ -120,13 +120,13 @@ struct page {
> > > > > unsigned long private;
> > > > > };
> > > > > struct { /* page_pool used by netstack */
> > > > > + unsigned long _pp_mapping_pad;
> > > > > /**
> > > > > * @pp_magic: magic value to avoid recycling non
> > > > > * page_pool allocated pages.
> > > > > */
> > > > > unsigned long pp_magic;
> > > > > struct page_pool *pp;
> > > > > - unsigned long _pp_mapping_pad;
> > > > > unsigned long dma_addr;
> > > > > atomic_long_t pp_ref_count;
> > > > > };
> > > > > diff --git a/include/net/netmem.h b/include/net/netmem.h
> > > > > index 386164fb9c18..08e9d76cdf14 100644
> > > > > --- a/include/net/netmem.h
> > > > > +++ b/include/net/netmem.h
> > > > > @@ -31,12 +31,41 @@ enum net_iov_type {
> > > > > };
> > > > >
> > > > > struct net_iov {
> > > > > - enum net_iov_type type;
> > > > > - unsigned long pp_magic;
> > > > > - struct page_pool *pp;
> > > > > - struct net_iov_area *owner;
> > > > > - unsigned long dma_addr;
> > > > > - atomic_long_t pp_ref_count;
> > > > > + /*
> > > > > + * XXX: Now that struct netmem_desc overlays on struct page,
> > > > > + * struct_group_tagged() should cover all of them. However,
> > > > > + * a separate struct netmem_desc should be declared and embedded,
> > > > > + * once struct netmem_desc is no longer overlayed but it has its
> > > > > + * own instance from slab. The final form should be:
> > > > > + *
> > > > > + * struct netmem_desc {
> > > > > + * unsigned long pp_magic;
> > > > > + * struct page_pool *pp;
> > > > > + * unsigned long dma_addr;
> > > > > + * atomic_long_t pp_ref_count;
> > > > > + * };
> > > > > + *
> > > > > + * struct net_iov {
> > > > > + * enum net_iov_type type;
> > > > > + * struct net_iov_area *owner;
> > > > > + * struct netmem_desc;
> > > > > + * };
> > > > > + */
> > > > > + struct_group_tagged(netmem_desc, desc,
> > > >
> > > > So.. For now, this is the best option we can pick. We can do all that
> > > > you told me once struct netmem_desc has it own instance from slab.
> > > >
> > > > Again, it's because the page pool fields (or netmem things) from struct
> > > > page will be gone by this series.
> > > >
> > > > Mina, thoughts?
> > > >
> > >
> > > Can you please post an updated series with the approach you have in
> > > mind? I think this series as-is seems broken vis-a-vie the
> > > _pp_padding_map param move that looks incorrect. Pavel and I have also
> > > commented on patch 18 that removing the ASSERTS seems incorrect as
> > > it's breaking the symmetry between struct page and struct net_iov.
> >
> > I told you I will fix it. I will send the updated series shortly for
> > *review*. However, it will be for review since we know this work can be
> > completed once the next works have been done:
> >
> > https://lore.kernel.org/all/20250520205920.2134829-2-anthony.l.nguyen@intel.com/
> > https://lore.kernel.org/all/1747950086-1246773-9-git-send-email-tariqt@nvidia.com/
> >
> > > It's not clear to me if the fields are being removed from struct page,
> > > where are they going... the approach ptdesc for example has taken is
> >
> > They are going to struct net_iov.
Precisely speaking, to 'struct netmem_desc'.
> Oh. I see. My gut reaction is I'm not sure moving the page_pool fields
> to struct net_iov will work.
>
> struct net_iov shares some fields with struct page, but abstractly
> it's very different.
>
> struct page is allocated by the mm stack via things like alloc_pages
> and can be passed to mm apis such as put_page() (called from
> skb_frag_ref) and vm_insert_batch (called from
> tcp_zerocopy_vm_insert_batch_error).
>
> struct net_iov is kvmalloced by networking code (see
> net_devmem_bind_dmabuf for example), and *must not* be passed to any
> mm apis as it's not a struct page at all. Accidentally calling
> vm_insert_batch on a struct net_iov will cause a kernel crash or some
> memory corruption.
>
> Thus abstractly different things maybe should not share the same
> in-kernel struct.
>
> One thing that maybe could work is if struct net_iov has a field in it
> which tells us whether it's actually a struct page that can be passed
> to mm apis, or not a struct page which cannot be passed to mm apis.
>
> > Or I should introduce another struct
>
> maybe introducing another struct is the answer. I'm not sure. The net
The final form should be like:
struct netmem_desc {
struct page_pool *pp;
unsigned long dma_addr;
atomic_long_t ref_count;
};
struct net_iov {
struct netmem_desc;
enum net_iov_type type;
struct net_iov_area *owner;
...
};
However, now that overlaying on struct page is required, struct
netmem_desc should be almost same as struct net_iov. So I'm not sure if
we should introduce struct netmem_desc as a new struct along with struct
net_iov.
> stack today already supports struct page and struct net_iov, with
> netmem_ref acting as an abstraction over both. Adding a 3rd struct and
> adding more checks to test if page or net_iov or something new will
> add overhead.
So I think the current form in this patch is a good option we can take
for now.
> An additional problem is that there are probably hundreds or thousands
> of references to 'page' in the net stack and drivers. I'm not sure
> what you're going to do about those. Are you converting all those to
> netmem or netmem_desc?
No. I will convert only the references for page pool.
Byungchul
>
> --
> Thanks,
> Mina
On 5/28/25 06:03, Byungchul Park wrote:
...>> Thus abstractly different things maybe should not share the same
>> in-kernel struct.
>>
>> One thing that maybe could work is if struct net_iov has a field in it
>> which tells us whether it's actually a struct page that can be passed
>> to mm apis, or not a struct page which cannot be passed to mm apis.
>>
>>> Or I should introduce another struct
>>
>> maybe introducing another struct is the answer. I'm not sure. The net
>
> The final form should be like:
>
> struct netmem_desc {
> struct page_pool *pp;
> unsigned long dma_addr;
> atomic_long_t ref_count;
> };
>
> struct net_iov {
> struct netmem_desc;
> enum net_iov_type type;
> struct net_iov_area *owner;
> ...
> };
>
> However, now that overlaying on struct page is required, struct
> netmem_desc should be almost same as struct net_iov. So I'm not sure if
> we should introduce struct netmem_desc as a new struct along with struct
> net_iov.
Yes, you should. Mina already explained that net_iov is not the same
thing as the net specific sub-struct of the page. They have common
fields, but there are also net_iov (memory provider) specific fields
as well.
--
Pavel Begunkov
On Wed, May 28, 2025 at 08:43:34AM +0100, Pavel Begunkov wrote:
> On 5/28/25 06:03, Byungchul Park wrote:
> ...>> Thus abstractly different things maybe should not share the same
> > > in-kernel struct.
> > >
> > > One thing that maybe could work is if struct net_iov has a field in it
> > > which tells us whether it's actually a struct page that can be passed
> > > to mm apis, or not a struct page which cannot be passed to mm apis.
> > >
> > > > Or I should introduce another struct
> > >
> > > maybe introducing another struct is the answer. I'm not sure. The net
> >
> > The final form should be like:
> >
> > struct netmem_desc {
> > struct page_pool *pp;
> > unsigned long dma_addr;
> > atomic_long_t ref_count;
> > };
> >
> > struct net_iov {
> > struct netmem_desc;
> > enum net_iov_type type;
> > struct net_iov_area *owner;
> > ...
> > };
> >
> > However, now that overlaying on struct page is required, struct
> > netmem_desc should be almost same as struct net_iov. So I'm not sure if
> > we should introduce struct netmem_desc as a new struct along with struct
> > net_iov.
>
> Yes, you should. Mina already explained that net_iov is not the same
> thing as the net specific sub-struct of the page. They have common
> fields, but there are also net_iov (memory provider) specific fields
> as well.
Okay then. I will introduce a separate struct, netmem_desc, that has
similar fields to net_iov, and related static assert for the offsets.
Byungchul
>
> --
> Pavel Begunkov
On Thu, May 22, 2025 at 8:26 PM Byungchul Park <byungchul@sk.com> wrote:
>
> To simplify struct page, the page pool members of struct page should be
> moved to other, allowing these members to be removed from struct page.
>
> Introduce a network memory descriptor to store the members, struct
> netmem_desc, reusing struct net_iov that already mirrored struct page.
>
> While at it, relocate _pp_mapping_pad to group struct net_iov's fields.
>
> Signed-off-by: Byungchul Park <byungchul@sk.com>
> ---
> include/linux/mm_types.h | 2 +-
> include/net/netmem.h | 43 +++++++++++++++++++++++++++++++++-------
> 2 files changed, 37 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 56d07edd01f9..873e820e1521 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -120,13 +120,13 @@ struct page {
> unsigned long private;
> };
> struct { /* page_pool used by netstack */
> + unsigned long _pp_mapping_pad;
> /**
> * @pp_magic: magic value to avoid recycling non
> * page_pool allocated pages.
> */
> unsigned long pp_magic;
> struct page_pool *pp;
> - unsigned long _pp_mapping_pad;
Like Toke says, moving this to the beginning of this struct is not
allowed. The first 3 bits of pp_magic are overlaid with page->lru so
the pp makes sure not to use them. _pp_mapping_pad is overlaid with
page->mapping, so the pp makes sure not to use it. AFAICT, this moving
of _pp_mapping_pad is not necessary for this patch. I think just drop
it.
> unsigned long dma_addr;
> atomic_long_t pp_ref_count;
> };
> diff --git a/include/net/netmem.h b/include/net/netmem.h
> index 386164fb9c18..08e9d76cdf14 100644
> --- a/include/net/netmem.h
> +++ b/include/net/netmem.h
> @@ -31,12 +31,41 @@ enum net_iov_type {
> };
>
> struct net_iov {
> - enum net_iov_type type;
> - unsigned long pp_magic;
> - struct page_pool *pp;
> - struct net_iov_area *owner;
> - unsigned long dma_addr;
> - atomic_long_t pp_ref_count;
> + /*
> + * XXX: Now that struct netmem_desc overlays on struct page,
> + * struct_group_tagged() should cover all of them. However,
> + * a separate struct netmem_desc should be declared and embedded,
> + * once struct netmem_desc is no longer overlayed but it has its
> + * own instance from slab. The final form should be:
> + *
> + * struct netmem_desc {
> + * unsigned long pp_magic;
> + * struct page_pool *pp;
> + * unsigned long dma_addr;
> + * atomic_long_t pp_ref_count;
> + * };
> + *
> + * struct net_iov {
> + * enum net_iov_type type;
> + * struct net_iov_area *owner;
> + * struct netmem_desc;
> + * };
> + */
I'm unclear on why moving to this format is a TODO for the future. Why
isn't this state in the comment the state in the code? I think I gave
the same code snippet on the RFC, but here again:
struct netmem_desc {
/**
* @pp_magic: magic value to avoid recycling non
* page_pool allocated pages.
*/
unsigned long pp_magic;
struct page_pool *pp;
unsigned long _pp_mapping_pad;
unsigned long dma_addr;
atomic_long_t pp_ref_count;
};
(Roughly):
struct page {
...
struct { /* page_pool used by netstack */
struct netmem_desc;
};
...
};
struct net_iov {
enum net_iov_type type;
struct netmem_desc;
struct net_iov_area *owner;
}
AFAICT, this should work..?
--
Thanks,
Mina
On Fri, May 23, 2025 at 10:00:55AM -0700, Mina Almasry wrote:
> On Thu, May 22, 2025 at 8:26 PM Byungchul Park <byungchul@sk.com> wrote:
> >
> > To simplify struct page, the page pool members of struct page should be
> > moved to other, allowing these members to be removed from struct page.
> >
> > Introduce a network memory descriptor to store the members, struct
> > netmem_desc, reusing struct net_iov that already mirrored struct page.
> >
> > While at it, relocate _pp_mapping_pad to group struct net_iov's fields.
> >
> > Signed-off-by: Byungchul Park <byungchul@sk.com>
> > ---
> > include/linux/mm_types.h | 2 +-
> > include/net/netmem.h | 43 +++++++++++++++++++++++++++++++++-------
> > 2 files changed, 37 insertions(+), 8 deletions(-)
> >
> > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> > index 56d07edd01f9..873e820e1521 100644
> > --- a/include/linux/mm_types.h
> > +++ b/include/linux/mm_types.h
> > @@ -120,13 +120,13 @@ struct page {
> > unsigned long private;
> > };
> > struct { /* page_pool used by netstack */
> > + unsigned long _pp_mapping_pad;
> > /**
> > * @pp_magic: magic value to avoid recycling non
> > * page_pool allocated pages.
> > */
> > unsigned long pp_magic;
> > struct page_pool *pp;
> > - unsigned long _pp_mapping_pad;
>
> Like Toke says, moving this to the beginning of this struct is not
> allowed. The first 3 bits of pp_magic are overlaid with page->lru so
> the pp makes sure not to use them. _pp_mapping_pad is overlaid with
> page->mapping, so the pp makes sure not to use it. AFAICT, this moving
> of _pp_mapping_pad is not necessary for this patch. I think just drop
> it.
Sure, I will. Thanks.
> > unsigned long dma_addr;
> > atomic_long_t pp_ref_count;
> > };
> > diff --git a/include/net/netmem.h b/include/net/netmem.h
> > index 386164fb9c18..08e9d76cdf14 100644
> > --- a/include/net/netmem.h
> > +++ b/include/net/netmem.h
> > @@ -31,12 +31,41 @@ enum net_iov_type {
> > };
> >
> > struct net_iov {
> > - enum net_iov_type type;
> > - unsigned long pp_magic;
> > - struct page_pool *pp;
> > - struct net_iov_area *owner;
> > - unsigned long dma_addr;
> > - atomic_long_t pp_ref_count;
> > + /*
> > + * XXX: Now that struct netmem_desc overlays on struct page,
> > + * struct_group_tagged() should cover all of them. However,
> > + * a separate struct netmem_desc should be declared and embedded,
> > + * once struct netmem_desc is no longer overlayed but it has its
> > + * own instance from slab. The final form should be:
> > + *
> > + * struct netmem_desc {
> > + * unsigned long pp_magic;
> > + * struct page_pool *pp;
> > + * unsigned long dma_addr;
> > + * atomic_long_t pp_ref_count;
> > + * };
> > + *
> > + * struct net_iov {
> > + * enum net_iov_type type;
> > + * struct net_iov_area *owner;
> > + * struct netmem_desc;
> > + * };
> > + */
>
> I'm unclear on why moving to this format is a TODO for the future. Why
> isn't this state in the comment the state in the code? I think I gave
> the same code snippet on the RFC, but here again:
>
> struct netmem_desc {
> /**
> * @pp_magic: magic value to avoid recycling non
> * page_pool allocated pages.
> */
> unsigned long pp_magic;
> struct page_pool *pp;
> unsigned long _pp_mapping_pad;
> unsigned long dma_addr;
> atomic_long_t pp_ref_count;
> };
>
> (Roughly):
>
> struct page {
> ...
> struct { /* page_pool used by netstack */
> struct netmem_desc;
This is unnecessary since it will be removed shortly.
> };
> ...
> };
>
> struct net_iov {
> enum net_iov_type type;
> struct netmem_desc;
This requires a huge change in a single commit since all the code
referring to any of the page pool fields, struct net_iov, and maybe
io_uring(?) should be altered at once.
Plus, much more changes are required since struct netmem_desc would not
overlay on struct page with what you suggest, which breaks the
assumption that struct netmem_desc overlays on struct page in the
current code.
So at least, the work should be started once the code doesn't need the
assumption.
Thoughts?
Byungchul
> struct net_iov_area *owner;
> }
>
> AFAICT, this should work..?
>
> --
> Thanks,
> Mina
Byungchul Park <byungchul@sk.com> writes:
> To simplify struct page, the page pool members of struct page should be
> moved to other, allowing these members to be removed from struct page.
>
> Introduce a network memory descriptor to store the members, struct
> netmem_desc, reusing struct net_iov that already mirrored struct page.
>
> While at it, relocate _pp_mapping_pad to group struct net_iov's fields.
>
> Signed-off-by: Byungchul Park <byungchul@sk.com>
> ---
> include/linux/mm_types.h | 2 +-
> include/net/netmem.h | 43 +++++++++++++++++++++++++++++++++-------
> 2 files changed, 37 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 56d07edd01f9..873e820e1521 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -120,13 +120,13 @@ struct page {
> unsigned long private;
> };
> struct { /* page_pool used by netstack */
> + unsigned long _pp_mapping_pad;
> /**
> * @pp_magic: magic value to avoid recycling non
> * page_pool allocated pages.
> */
> unsigned long pp_magic;
> struct page_pool *pp;
> - unsigned long _pp_mapping_pad;
> unsigned long dma_addr;
> atomic_long_t pp_ref_count;
> };
The reason that field is called "_pp_mapping_pad" is that it's supposed
to overlay the page->mapping field, so that none of the page_pool uses
set a value here. Moving it breaks that assumption. Once struct
netmem_desc is completely decoupled from struct page this obviously
doesn't matter, but I think it does today? At least, trying to use that
field for the DMA index broke things, which is why we ended up with the
bit-stuffing in pp_magic...
-Toke
On Fri, May 23, 2025 at 11:01:01AM +0200, Toke Høiland-Jørgensen wrote:
> Byungchul Park <byungchul@sk.com> writes:
>
> > To simplify struct page, the page pool members of struct page should be
> > moved to other, allowing these members to be removed from struct page.
> >
> > Introduce a network memory descriptor to store the members, struct
> > netmem_desc, reusing struct net_iov that already mirrored struct page.
> >
> > While at it, relocate _pp_mapping_pad to group struct net_iov's fields.
> >
> > Signed-off-by: Byungchul Park <byungchul@sk.com>
> > ---
> > include/linux/mm_types.h | 2 +-
> > include/net/netmem.h | 43 +++++++++++++++++++++++++++++++++-------
> > 2 files changed, 37 insertions(+), 8 deletions(-)
> >
> > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> > index 56d07edd01f9..873e820e1521 100644
> > --- a/include/linux/mm_types.h
> > +++ b/include/linux/mm_types.h
> > @@ -120,13 +120,13 @@ struct page {
> > unsigned long private;
> > };
> > struct { /* page_pool used by netstack */
> > + unsigned long _pp_mapping_pad;
> > /**
> > * @pp_magic: magic value to avoid recycling non
> > * page_pool allocated pages.
> > */
> > unsigned long pp_magic;
> > struct page_pool *pp;
> > - unsigned long _pp_mapping_pad;
> > unsigned long dma_addr;
> > atomic_long_t pp_ref_count;
> > };
>
> The reason that field is called "_pp_mapping_pad" is that it's supposed
> to overlay the page->mapping field, so that none of the page_pool uses
> set a value here. Moving it breaks that assumption. Once struct
Right. I will fix it. Thanks.
Byungchul
> netmem_desc is completely decoupled from struct page this obviously
> doesn't matter, but I think it does today? At least, trying to use that
> field for the DMA index broke things, which is why we ended up with the
> bit-stuffing in pp_magic...
>
> -Toke
>
© 2016 - 2025 Red Hat, Inc.