Currently page_pool_dev_alloc_pages() can not be called
when PP_FLAG_PAGE_FRAG is set, because it does not use
the frag reference counting.
As we are already doing a optimization by not updating
page->pp_frag_count in page_pool_defrag_page() for the
last frag user, and non-frag page only have one user,
so we utilize that to unify frag page and non-frag page
handling, so that page_pool_dev_alloc_pages() can also
be called with PP_FLAG_PAGE_FRAG set.
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
CC: Lorenzo Bianconi <lorenzo@kernel.org>
CC: Alexander Duyck <alexander.duyck@gmail.com>
---
include/net/page_pool.h | 38 +++++++++++++++++++++++++++++++-------
net/core/page_pool.c | 1 +
2 files changed, 32 insertions(+), 7 deletions(-)
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index c8ec2f34722b..ea7a0c0592a5 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -50,6 +50,9 @@
PP_FLAG_DMA_SYNC_DEV |\
PP_FLAG_PAGE_FRAG)
+#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \
+ (sizeof(dma_addr_t) > sizeof(unsigned long))
+
/*
* Fast allocation side cache array/stack
*
@@ -295,13 +298,20 @@ void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
*/
static inline void page_pool_fragment_page(struct page *page, long nr)
{
- atomic_long_set(&page->pp_frag_count, nr);
+ if (!PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+ atomic_long_set(&page->pp_frag_count, nr);
}
+/* We need to reset frag_count back to 1 for the last user to allow
+ * only one user in case the page is recycled and allocated as non-frag
+ * page.
+ */
static inline long page_pool_defrag_page(struct page *page, long nr)
{
long ret;
+ BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1);
+
/* If nr == pp_frag_count then we have cleared all remaining
* references to the page. No need to actually overwrite it, instead
* we can leave this to be overwritten by the calling function.
@@ -311,19 +321,36 @@ static inline long page_pool_defrag_page(struct page *page, long nr)
* especially when dealing with a page that may be partitioned
* into only 2 or 3 pieces.
*/
- if (atomic_long_read(&page->pp_frag_count) == nr)
+ if (atomic_long_read(&page->pp_frag_count) == nr) {
+ /* As we have ensured nr is always one for constant case
+ * using the BUILD_BUG_ON() as above, only need to handle
+ * the non-constant case here for frag count draining.
+ */
+ if (!__builtin_constant_p(nr))
+ atomic_long_set(&page->pp_frag_count, 1);
+
return 0;
+ }
ret = atomic_long_sub_return(nr, &page->pp_frag_count);
WARN_ON(ret < 0);
+
+ /* Reset frag count back to 1, this should be the rare case when
+ * two users call page_pool_defrag_page() currently.
+ */
+ if (!ret)
+ atomic_long_set(&page->pp_frag_count, 1);
+
return ret;
}
static inline bool page_pool_is_last_frag(struct page_pool *pool,
struct page *page)
{
- /* If fragments aren't enabled or count is 0 we were the last user */
- return !(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
+ /* When dma_addr_upper is overlapped with pp_frag_count
+ * or we were the last page frag user.
+ */
+ return PAGE_POOL_DMA_USE_PP_FRAG_COUNT ||
(page_pool_defrag_page(page, 1) == 0);
}
@@ -357,9 +384,6 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
page_pool_put_full_page(pool, page, true);
}
-#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \
- (sizeof(dma_addr_t) > sizeof(unsigned long))
-
static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
{
dma_addr_t ret = page->dma_addr;
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index e212e9d7edcb..0868aa8f6323 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -334,6 +334,7 @@ static void page_pool_set_pp_info(struct page_pool *pool,
{
page->pp = pool;
page->pp_magic |= PP_SIGNATURE;
+ page_pool_fragment_page(page, 1);
if (pool->p.init_callback)
pool->p.init_callback(page, pool->p.init_arg);
}
--
2.33.0
Hi Yunsheng Apologies for not replying to the RFC, I was pretty busy with internal stuff On Fri, May 26, 2023 at 05:26:14PM +0800, Yunsheng Lin wrote: > Currently page_pool_dev_alloc_pages() can not be called > when PP_FLAG_PAGE_FRAG is set, because it does not use > the frag reference counting. > > As we are already doing a optimization by not updating > page->pp_frag_count in page_pool_defrag_page() for the > last frag user, and non-frag page only have one user, > so we utilize that to unify frag page and non-frag page > handling, so that page_pool_dev_alloc_pages() can also > be called with PP_FLAG_PAGE_FRAG set. What happens here is clear. But why do we need this? Do you have a specific use case in mind where a driver will call page_pool_dev_alloc_pages() and the PP_FLAG_PAGE_FRAG will be set? If that's the case isn't it a better idea to unify the functions entirely? Thanks /Ilias > > Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> > CC: Lorenzo Bianconi <lorenzo@kernel.org> > CC: Alexander Duyck <alexander.duyck@gmail.com> > --- > include/net/page_pool.h | 38 +++++++++++++++++++++++++++++++------- > net/core/page_pool.c | 1 + > 2 files changed, 32 insertions(+), 7 deletions(-) > > diff --git a/include/net/page_pool.h b/include/net/page_pool.h > index c8ec2f34722b..ea7a0c0592a5 100644 > --- a/include/net/page_pool.h > +++ b/include/net/page_pool.h > @@ -50,6 +50,9 @@ > PP_FLAG_DMA_SYNC_DEV |\ > PP_FLAG_PAGE_FRAG) > > +#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \ > + (sizeof(dma_addr_t) > sizeof(unsigned long)) > + > /* > * Fast allocation side cache array/stack > * > @@ -295,13 +298,20 @@ void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, > */ > static inline void page_pool_fragment_page(struct page *page, long nr) > { > - atomic_long_set(&page->pp_frag_count, nr); > + if (!PAGE_POOL_DMA_USE_PP_FRAG_COUNT) > + atomic_long_set(&page->pp_frag_count, nr); > } > > +/* We need to reset frag_count back to 1 for the last user to allow > + * only one user in case the page is recycled and allocated as non-frag > + * page. > + */ > static inline long page_pool_defrag_page(struct page *page, long nr) > { > long ret; > > + BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1); > + > /* If nr == pp_frag_count then we have cleared all remaining > * references to the page. No need to actually overwrite it, instead > * we can leave this to be overwritten by the calling function. > @@ -311,19 +321,36 @@ static inline long page_pool_defrag_page(struct page *page, long nr) > * especially when dealing with a page that may be partitioned > * into only 2 or 3 pieces. > */ > - if (atomic_long_read(&page->pp_frag_count) == nr) > + if (atomic_long_read(&page->pp_frag_count) == nr) { > + /* As we have ensured nr is always one for constant case > + * using the BUILD_BUG_ON() as above, only need to handle > + * the non-constant case here for frag count draining. > + */ > + if (!__builtin_constant_p(nr)) > + atomic_long_set(&page->pp_frag_count, 1); > + > return 0; > + } > > ret = atomic_long_sub_return(nr, &page->pp_frag_count); > WARN_ON(ret < 0); > + > + /* Reset frag count back to 1, this should be the rare case when > + * two users call page_pool_defrag_page() currently. > + */ > + if (!ret) > + atomic_long_set(&page->pp_frag_count, 1); > + > return ret; > } > > static inline bool page_pool_is_last_frag(struct page_pool *pool, > struct page *page) > { > - /* If fragments aren't enabled or count is 0 we were the last user */ > - return !(pool->p.flags & PP_FLAG_PAGE_FRAG) || > + /* When dma_addr_upper is overlapped with pp_frag_count > + * or we were the last page frag user. > + */ > + return PAGE_POOL_DMA_USE_PP_FRAG_COUNT || > (page_pool_defrag_page(page, 1) == 0); > } > > @@ -357,9 +384,6 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, > page_pool_put_full_page(pool, page, true); > } > > -#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \ > - (sizeof(dma_addr_t) > sizeof(unsigned long)) > - > static inline dma_addr_t page_pool_get_dma_addr(struct page *page) > { > dma_addr_t ret = page->dma_addr; > diff --git a/net/core/page_pool.c b/net/core/page_pool.c > index e212e9d7edcb..0868aa8f6323 100644 > --- a/net/core/page_pool.c > +++ b/net/core/page_pool.c > @@ -334,6 +334,7 @@ static void page_pool_set_pp_info(struct page_pool *pool, > { > page->pp = pool; > page->pp_magic |= PP_SIGNATURE; > + page_pool_fragment_page(page, 1); > if (pool->p.init_callback) > pool->p.init_callback(page, pool->p.init_arg); > } > -- > 2.33.0 >
On 2023/5/26 20:03, Ilias Apalodimas wrote: > Hi Yunsheng > > Apologies for not replying to the RFC, I was pretty busy with internal > stuff > > On Fri, May 26, 2023 at 05:26:14PM +0800, Yunsheng Lin wrote: >> Currently page_pool_dev_alloc_pages() can not be called >> when PP_FLAG_PAGE_FRAG is set, because it does not use >> the frag reference counting. >> >> As we are already doing a optimization by not updating >> page->pp_frag_count in page_pool_defrag_page() for the >> last frag user, and non-frag page only have one user, >> so we utilize that to unify frag page and non-frag page >> handling, so that page_pool_dev_alloc_pages() can also >> be called with PP_FLAG_PAGE_FRAG set. > > What happens here is clear. But why do we need this? Do you have a > specific use case in mind where a driver will call > page_pool_dev_alloc_pages() and the PP_FLAG_PAGE_FRAG will be set? Actually it is about calling page_pool_alloc_pages() in page_pool_alloc_frag() in patch 2, the use case is the veth using page frag support. see: https://patchwork.kernel.org/project/netdevbpf/patch/d3ae6bd3537fbce379382ac6a42f67e22f27ece2.1683896626.git.lorenzo@kernel.org/ > If that's the case isn't it a better idea to unify the functions entirely? As about, page_pool_alloc_frag() does seems to be a superset of page_pool_alloc_pages() after this patchset as my understanding. If the page_pool_alloc_frag() API turns out to be a good API for the driver, maybe we can phase out *page_pool_alloc_pages() as time goes by?
On Fri, May 26, 2023 at 08:35:24PM +0800, Yunsheng Lin wrote: > On 2023/5/26 20:03, Ilias Apalodimas wrote: > > Hi Yunsheng > > > > Apologies for not replying to the RFC, I was pretty busy with internal > > stuff > > > > On Fri, May 26, 2023 at 05:26:14PM +0800, Yunsheng Lin wrote: > >> Currently page_pool_dev_alloc_pages() can not be called > >> when PP_FLAG_PAGE_FRAG is set, because it does not use > >> the frag reference counting. > >> > >> As we are already doing a optimization by not updating > >> page->pp_frag_count in page_pool_defrag_page() for the > >> last frag user, and non-frag page only have one user, > >> so we utilize that to unify frag page and non-frag page > >> handling, so that page_pool_dev_alloc_pages() can also > >> be called with PP_FLAG_PAGE_FRAG set. > > > > What happens here is clear. But why do we need this? Do you have a > > specific use case in mind where a driver will call > > page_pool_dev_alloc_pages() and the PP_FLAG_PAGE_FRAG will be set? > > Actually it is about calling page_pool_alloc_pages() in > page_pool_alloc_frag() in patch 2, the use case is the > veth using page frag support. see: > > https://patchwork.kernel.org/project/netdevbpf/patch/d3ae6bd3537fbce379382ac6a42f67e22f27ece2.1683896626.git.lorenzo@kernel.org/ Ok I missed that patch. > > > If that's the case isn't it a better idea to unify the functions entirely? > > As about, page_pool_alloc_frag() does seems to be a superset of > page_pool_alloc_pages() after this patchset as my understanding. > If the page_pool_alloc_frag() API turns out to be a good API for > the driver, maybe we can phase out *page_pool_alloc_pages() as > time goes by? Looking at patch 2/2 it seems a bit wasteful. At the moment only hns3 uses fragments and the length of the allocation seems static. But if someone else chooses to allocate a > 2048 packet why should it allocate a page? I just think it's a bit confusing since we have a flag on the pool for page fragments, but then we violate it when it suits us. Thanks /Ilias
On 2023/5/26 23:38, Ilias Apalodimas wrote: >> >>> If that's the case isn't it a better idea to unify the functions entirely? >> >> As about, page_pool_alloc_frag() does seems to be a superset of >> page_pool_alloc_pages() after this patchset as my understanding. >> If the page_pool_alloc_frag() API turns out to be a good API for >> the driver, maybe we can phase out *page_pool_alloc_pages() as >> time goes by? > > Looking at patch 2/2 it seems a bit wasteful. At the moment only hns3 uses > fragments and the length of the allocation seems static. But if someone > else chooses to allocate a > 2048 packet why should it allocate a page? It is based on the fact that if user requests a > 2048 frag, then it will most likely requests > 2048 frag again, for example, when mtu is changed or xdp is enabled/disabble, at least for veth case, the frag size is likely changed. Allocating a page for the above case avoid the frag count draining overhead, and unify the interface for the driver so that driver don't need to choose which API to use. > > I just think it's a bit confusing since we have a flag on the pool for page > fragments, but then we violate it when it suits us. Yes, we can remove it as mentioned in the cover letter: "PP_FLAG_PAGE_FRAG may be removed after this patchset, and the extra benefit is that driver does not need to handle the case for arch with PAGE_POOL_DMA_USE_PP_FRAG_COUNT when using page_pool_alloc_frag() API." > > Thanks > /Ilias > . >
© 2016 - 2024 Red Hat, Inc.