:p
atchew
Login
In [1], there is a use case to use frag support in page pool to reduce memory usage, and it may request different frag size depending on the head/tail room space for xdp_frame/shinfo and mtu/packet size. When the requested frag size is large enough that a single page can not be split into more than one frag, using frag support only have performance penalty because of the extra frag count handling for frag support. So this patchset provides a way for user to fail back to non-frag page depending on the user's request. 1. https://patchwork.kernel.org/project/netdevbpf/patch/d3ae6bd3537fbce379382ac6a42f67e22f27ece2.1683896626.git.lorenzo@kernel.org/ Yunsheng Lin (3): page_pool: unify frag page and non-frag page handling page_pool: support non-frag page for page_pool_alloc_frag() page_pool: introduce 'struct page_pool_frag' .../net/ethernet/hisilicon/hns3/hns3_enet.c | 16 +++--- drivers/net/wireless/mediatek/mt76/mt76.h | 9 ++-- include/net/page_pool.h | 51 ++++++++++++++----- net/core/page_pool.c | 45 +++++++++++----- 4 files changed, 86 insertions(+), 35 deletions(-) -- 2.33.0
Currently page_pool_dev_alloc_pages() can not be called when PP_FLAG_PAGE_FRAG is set, because it does not use the frag reference counting. As we are already doing a optimization by not updating page->pp_frag_count in page_pool_defrag_page() for the last frag user, and non-frag page only have one user, so we utilize that to unify frag page and non-frag page handling, so that page_pool_dev_alloc_pages() can also be called with PP_FLAG_PAGE_FRAG set. Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> CC: Lorenzo Bianconi <lorenzo@kernel.org> CC: Alexander Duyck <alexander.duyck@gmail.com> --- include/net/page_pool.h | 24 ++++++++++++++++++------ net/core/page_pool.c | 3 ++- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/include/net/page_pool.h b/include/net/page_pool.h index XXXXXXX..XXXXXXX 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -XXX,XX +XXX,XX @@ PP_FLAG_DMA_SYNC_DEV |\ PP_FLAG_PAGE_FRAG) +#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \ + (sizeof(dma_addr_t) > sizeof(unsigned long)) + /* * Fast allocation side cache array/stack * @@ -XXX,XX +XXX,XX @@ void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, */ static inline void page_pool_fragment_page(struct page *page, long nr) { - atomic_long_set(&page->pp_frag_count, nr); + if (!PAGE_POOL_DMA_USE_PP_FRAG_COUNT) + atomic_long_set(&page->pp_frag_count, nr); } static inline long page_pool_defrag_page(struct page *page, long nr) @@ -XXX,XX +XXX,XX @@ static inline long page_pool_defrag_page(struct page *page, long nr) ret = atomic_long_sub_return(nr, &page->pp_frag_count); WARN_ON(ret < 0); + + /* Reset it to 1 to allow only one user in case the page is + * recycled and allocated as non-frag page if it is the last + * user, this should be the rare case as it only happen when + * two users call page_pool_defrag_page() currently. + */ + if (!ret) + atomic_long_set(&page->pp_frag_count, 1); + return ret; } static inline bool page_pool_is_last_frag(struct page_pool *pool, struct page *page) { - /* If fragments aren't enabled or count is 0 we were the last user */ - return !(pool->p.flags & PP_FLAG_PAGE_FRAG) || + /* When dma_addr_upper is overlapped with pp_frag_count + * or we were the last page frag user. + */ + return PAGE_POOL_DMA_USE_PP_FRAG_COUNT || (page_pool_defrag_page(page, 1) == 0); } @@ -XXX,XX +XXX,XX @@ static inline void page_pool_recycle_direct(struct page_pool *pool, page_pool_put_full_page(pool, page, true); } -#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \ - (sizeof(dma_addr_t) > sizeof(unsigned long)) - static inline dma_addr_t page_pool_get_dma_addr(struct page *page) { dma_addr_t ret = page->dma_addr; diff --git a/net/core/page_pool.c b/net/core/page_pool.c index XXXXXXX..XXXXXXX 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -XXX,XX +XXX,XX @@ static void page_pool_set_pp_info(struct page_pool *pool, { page->pp = pool; page->pp_magic |= PP_SIGNATURE; + page_pool_fragment_page(page, 1); if (pool->p.init_callback) pool->p.init_callback(page, pool->p.init_arg); } @@ -XXX,XX +XXX,XX @@ struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int max_size = PAGE_SIZE << pool->p.order; struct page *page = pool->frag_page; - if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) || + if (WARN_ON(PAGE_POOL_DMA_USE_PP_FRAG_COUNT || size > max_size)) return NULL; -- 2.33.0
There is performance penalty with using page frag support when user requests a larger frag size and a page only supports one frag user, see [1]. It seems like user may request different frag size depending on the mtu and packet size, provide an option to allocate non-frag page when user has requested a frag size larger than a specific size, so that user has a unified interface for the memory allocation with least memory utilization and performance penalty. 1. https://lore.kernel.org/netdev/ZEU+vospFdm08IeE@localhost.localdomain/ Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> CC: Lorenzo Bianconi <lorenzo@kernel.org> CC: Alexander Duyck <alexander.duyck@gmail.com> --- include/net/page_pool.h | 9 +++++++++ net/core/page_pool.c | 10 ++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/include/net/page_pool.h b/include/net/page_pool.h index XXXXXXX..XXXXXXX 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -XXX,XX +XXX,XX @@ struct page_pool { unsigned int frag_offset; struct page *frag_page; long frag_users; + unsigned int max_frag_size; #ifdef CONFIG_PAGE_POOL_STATS /* these stats are incremented while in softirq context */ @@ -XXX,XX +XXX,XX @@ struct page_pool { u64 destroy_cnt; }; +/* Called after page_pool_create() */ +static inline void page_pool_set_max_frag_size(struct page_pool *pool, + unsigned int max_frag_size) +{ + pool->max_frag_size = min_t(unsigned int, max_frag_size, + PAGE_SIZE << pool->p.order); +} + struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index XXXXXXX..XXXXXXX 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -XXX,XX +XXX,XX @@ static int page_pool_init(struct page_pool *pool, if (pool->p.flags & PP_FLAG_DMA_MAP) get_device(pool->p.dev); + page_pool_set_max_frag_size(pool, PAGE_SIZE << pool->p.order); + return 0; } @@ -XXX,XX +XXX,XX @@ struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int max_size = PAGE_SIZE << pool->p.order; struct page *page = pool->frag_page; - if (WARN_ON(PAGE_POOL_DMA_USE_PP_FRAG_COUNT || - size > max_size)) + if (WARN_ON(PAGE_POOL_DMA_USE_PP_FRAG_COUNT)) return NULL; + if (unlikely(size > pool->max_frag_size)) { + *offset = 0; + return page_pool_alloc_pages(pool, gfp); + } + size = ALIGN(size, dma_get_cache_alignment()); *offset = pool->frag_offset; -- 2.33.0
As page_pool_alloc_frag() can return both frag and non-frag page now, the true size may be different for them, so introduce 'struct page_pool_frag' to return the frag info instead of adding more function parameters and adjust the interface accordingly. Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> CC: Lorenzo Bianconi <lorenzo@kernel.org> CC: Alexander Duyck <alexander.duyck@gmail.com> --- .../net/ethernet/hisilicon/hns3/hns3_enet.c | 16 ++++---- drivers/net/wireless/mediatek/mt76/mt76.h | 9 +++-- include/net/page_pool.h | 18 ++++++--- net/core/page_pool.c | 38 +++++++++++++------ 4 files changed, 52 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index XXXXXXX..XXXXXXX 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -XXX,XX +XXX,XX @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring, struct page *p; if (ring->page_pool) { - p = page_pool_dev_alloc_frag(ring->page_pool, - &cb->page_offset, - hns3_buf_size(ring)); - if (unlikely(!p)) + struct page_pool_frag *pp_frag; + + pp_frag = page_pool_dev_alloc_frag(ring->page_pool, + hns3_buf_size(ring)); + if (unlikely(!pp_frag)) return -ENOMEM; - cb->priv = p; - cb->buf = page_address(p); - cb->dma = page_pool_get_dma_addr(p); + cb->priv = pp_frag->page; + cb->page_offset = pp_frag->offset; + cb->buf = page_address(pp_frag->page); + cb->dma = page_pool_get_dma_addr(pp_frag->page); cb->type = DESC_TYPE_PP_FRAG; cb->reuse_flag = 0; return 0; diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index XXXXXXX..XXXXXXX 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -XXX,XX +XXX,XX @@ static inline void mt76_put_page_pool_buf(void *buf, bool allow_direct) static inline void * mt76_get_page_pool_buf(struct mt76_queue *q, u32 *offset, u32 size) { - struct page *page; + struct page_pool_frag *pp_frag; - page = page_pool_dev_alloc_frag(q->page_pool, offset, size); - if (!page) + pp_frag = page_pool_dev_alloc_frag(q->page_pool, size); + if (!pp_frag) return NULL; - return page_address(page) + *offset; + *offset = pp_frag->offset; + return page_address(pp_frag->page) + *offset; } static inline void mt76_set_tx_blocked(struct mt76_dev *dev, bool blocked) diff --git a/include/net/page_pool.h b/include/net/page_pool.h index XXXXXXX..XXXXXXX 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -XXX,XX +XXX,XX @@ static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats) #endif +struct page_pool_frag { + struct page *page; + unsigned int offset; + unsigned int truesize; +}; + struct page_pool { struct page_pool_params p; @@ -XXX,XX +XXX,XX @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) return page_pool_alloc_pages(pool, gfp); } -struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, - unsigned int size, gfp_t gfp); +struct page_pool_frag *page_pool_alloc_frag(struct page_pool *pool, + unsigned int size, gfp_t gfp); -static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, - unsigned int *offset, - unsigned int size) +static inline +struct page_pool_frag *page_pool_dev_alloc_frag(struct page_pool *pool, + unsigned int size) { gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); - return page_pool_alloc_frag(pool, offset, size, gfp); + return page_pool_alloc_frag(pool, size, gfp); } /* get the stored dma direction. A driver might decide to treat this locally and diff --git a/net/core/page_pool.c b/net/core/page_pool.c index XXXXXXX..XXXXXXX 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -XXX,XX +XXX,XX @@ #define BIAS_MAX LONG_MAX +static DEFINE_PER_CPU(struct page_pool_frag, pp_frag); + #ifdef CONFIG_PAGE_POOL_STATS /* alloc_stat_inc is intended to be used in softirq context */ #define alloc_stat_inc(pool, __stat) (pool->alloc_stats.__stat++) @@ -XXX,XX +XXX,XX @@ static void page_pool_free_frag(struct page_pool *pool) page_pool_return_page(pool, page); } -struct page *page_pool_alloc_frag(struct page_pool *pool, - unsigned int *offset, - unsigned int size, gfp_t gfp) +struct page_pool_frag *page_pool_alloc_frag(struct page_pool *pool, + unsigned int size, + gfp_t gfp) { + struct page_pool_frag *frag = this_cpu_ptr(&pp_frag); unsigned int max_size = PAGE_SIZE << pool->p.order; - struct page *page = pool->frag_page; + struct page *page; if (WARN_ON(PAGE_POOL_DMA_USE_PP_FRAG_COUNT)) return NULL; if (unlikely(size > pool->max_frag_size)) { - *offset = 0; - return page_pool_alloc_pages(pool, gfp); + frag->page = page_pool_alloc_pages(pool, gfp); + if (unlikely(!frag->page)) + return NULL; + + frag->offset = 0; + frag->truesize = max_size; + return frag; } + page = pool->frag_page; size = ALIGN(size, dma_get_cache_alignment()); - *offset = pool->frag_offset; - if (page && *offset + size > max_size) { + if (page && pool->frag_offset + size > max_size) { page = page_pool_drain_frag(pool, page); if (page) { alloc_stat_inc(pool, fast); @@ -XXX,XX +XXX,XX @@ struct page *page_pool_alloc_frag(struct page_pool *pool, frag_reset: pool->frag_users = 1; - *offset = 0; pool->frag_offset = size; page_pool_fragment_page(page, BIAS_MAX); - return page; + frag->page = page; + frag->offset = 0; + frag->truesize = size; + return frag; } + frag->page = page; + frag->truesize = size; + frag->offset = pool->frag_offset; + pool->frag_users++; - pool->frag_offset = *offset + size; + pool->frag_offset += size; alloc_stat_inc(pool, fast); - return page; + return frag; } EXPORT_SYMBOL(page_pool_alloc_frag); -- 2.33.0
In [1], there is a use case to use frag support in page pool to reduce memory usage, and it may request different frag size depending on the head/tail room space for xdp_frame/shinfo and mtu/packet size. When the requested frag size is large enough that a single page can not be split into more than one frag, using frag support only have performance penalty because of the extra frag count handling for frag support. So this patchset provides a way for user to fail back to non-frag page when a page is not able to hold two frags. PP_FLAG_PAGE_FRAG may be removed after this patchset, and the extra benefit is that driver does not need to handle the case for arch with PAGE_POOL_DMA_USE_PP_FRAG_COUNT when using page_pool_alloc_frag() API. 1. https://patchwork.kernel.org/project/netdevbpf/patch/d3ae6bd3537fbce379382ac6a42f67e22f27ece2.1683896626.git.lorenzo@kernel.org/ V1: Drop RFC tag and page_pool_frag patch Yunsheng Lin (2): page_pool: unify frag page and non-frag page handling page_pool: support non-frag page for page_pool_alloc_frag() include/net/page_pool.h | 38 ++++++++++++++++++++++++++------ net/core/page_pool.c | 48 +++++++++++++++++++++++++---------------- 2 files changed, 61 insertions(+), 25 deletions(-) -- 2.33.0
Currently page_pool_dev_alloc_pages() can not be called when PP_FLAG_PAGE_FRAG is set, because it does not use the frag reference counting. As we are already doing a optimization by not updating page->pp_frag_count in page_pool_defrag_page() for the last frag user, and non-frag page only have one user, so we utilize that to unify frag page and non-frag page handling, so that page_pool_dev_alloc_pages() can also be called with PP_FLAG_PAGE_FRAG set. Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> CC: Lorenzo Bianconi <lorenzo@kernel.org> CC: Alexander Duyck <alexander.duyck@gmail.com> --- include/net/page_pool.h | 38 +++++++++++++++++++++++++++++++------- net/core/page_pool.c | 1 + 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/include/net/page_pool.h b/include/net/page_pool.h index XXXXXXX..XXXXXXX 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -XXX,XX +XXX,XX @@ PP_FLAG_DMA_SYNC_DEV |\ PP_FLAG_PAGE_FRAG) +#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \ + (sizeof(dma_addr_t) > sizeof(unsigned long)) + /* * Fast allocation side cache array/stack * @@ -XXX,XX +XXX,XX @@ void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, */ static inline void page_pool_fragment_page(struct page *page, long nr) { - atomic_long_set(&page->pp_frag_count, nr); + if (!PAGE_POOL_DMA_USE_PP_FRAG_COUNT) + atomic_long_set(&page->pp_frag_count, nr); } +/* We need to reset frag_count back to 1 for the last user to allow + * only one user in case the page is recycled and allocated as non-frag + * page. + */ static inline long page_pool_defrag_page(struct page *page, long nr) { long ret; + BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1); + /* If nr == pp_frag_count then we have cleared all remaining * references to the page. No need to actually overwrite it, instead * we can leave this to be overwritten by the calling function. @@ -XXX,XX +XXX,XX @@ static inline long page_pool_defrag_page(struct page *page, long nr) * especially when dealing with a page that may be partitioned * into only 2 or 3 pieces. */ - if (atomic_long_read(&page->pp_frag_count) == nr) + if (atomic_long_read(&page->pp_frag_count) == nr) { + /* As we have ensured nr is always one for constant case + * using the BUILD_BUG_ON() as above, only need to handle + * the non-constant case here for frag count draining. + */ + if (!__builtin_constant_p(nr)) + atomic_long_set(&page->pp_frag_count, 1); + return 0; + } ret = atomic_long_sub_return(nr, &page->pp_frag_count); WARN_ON(ret < 0); + + /* Reset frag count back to 1, this should be the rare case when + * two users call page_pool_defrag_page() currently. + */ + if (!ret) + atomic_long_set(&page->pp_frag_count, 1); + return ret; } static inline bool page_pool_is_last_frag(struct page_pool *pool, struct page *page) { - /* If fragments aren't enabled or count is 0 we were the last user */ - return !(pool->p.flags & PP_FLAG_PAGE_FRAG) || + /* When dma_addr_upper is overlapped with pp_frag_count + * or we were the last page frag user. + */ + return PAGE_POOL_DMA_USE_PP_FRAG_COUNT || (page_pool_defrag_page(page, 1) == 0); } @@ -XXX,XX +XXX,XX @@ static inline void page_pool_recycle_direct(struct page_pool *pool, page_pool_put_full_page(pool, page, true); } -#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \ - (sizeof(dma_addr_t) > sizeof(unsigned long)) - static inline dma_addr_t page_pool_get_dma_addr(struct page *page) { dma_addr_t ret = page->dma_addr; diff --git a/net/core/page_pool.c b/net/core/page_pool.c index XXXXXXX..XXXXXXX 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -XXX,XX +XXX,XX @@ static void page_pool_set_pp_info(struct page_pool *pool, { page->pp = pool; page->pp_magic |= PP_SIGNATURE; + page_pool_fragment_page(page, 1); if (pool->p.init_callback) pool->p.init_callback(page, pool->p.init_arg); } -- 2.33.0
There is performance penalty with using page frag support when user requests a larger frag size and a page only supports one frag user, see [1]. It seems like user may request different frag size depending on the mtu and packet size, provide an option to allocate non-frag page when a whole page is not able to hold two frags, so that user has a unified interface for the memory allocation with least memory utilization and performance penalty. 1. https://lore.kernel.org/netdev/ZEU+vospFdm08IeE@localhost.localdomain/ Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> CC: Lorenzo Bianconi <lorenzo@kernel.org> CC: Alexander Duyck <alexander.duyck@gmail.com> --- net/core/page_pool.c | 47 +++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index XXXXXXX..XXXXXXX 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -XXX,XX +XXX,XX @@ struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int max_size = PAGE_SIZE << pool->p.order; struct page *page = pool->frag_page; - if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) || - size > max_size)) + if (unlikely(size > max_size)) return NULL; + if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) { + *offset = 0; + return page_pool_alloc_pages(pool, gfp); + } + size = ALIGN(size, dma_get_cache_alignment()); - *offset = pool->frag_offset; - if (page && *offset + size > max_size) { + if (page) { + *offset = pool->frag_offset; + + if (*offset + size <= max_size) { + pool->frag_users++; + pool->frag_offset = *offset + size; + alloc_stat_inc(pool, fast); + return page; + } + + pool->frag_page = NULL; page = page_pool_drain_frag(pool, page); if (page) { alloc_stat_inc(pool, fast); @@ -XXX,XX +XXX,XX @@ struct page *page_pool_alloc_frag(struct page_pool *pool, } } - if (!page) { - page = page_pool_alloc_pages(pool, gfp); - if (unlikely(!page)) { - pool->frag_page = NULL; - return NULL; - } - - pool->frag_page = page; + page = page_pool_alloc_pages(pool, gfp); + if (unlikely(!page)) + return NULL; frag_reset: - pool->frag_users = 1; + /* return page as non-frag page if a page is not able to + * hold two frags for the current requested size. + */ + if (unlikely(size << 1 > max_size)) { *offset = 0; - pool->frag_offset = size; - page_pool_fragment_page(page, BIAS_MAX); return page; } - pool->frag_users++; - pool->frag_offset = *offset + size; - alloc_stat_inc(pool, fast); + pool->frag_page = page; + pool->frag_users = 1; + *offset = 0; + pool->frag_offset = size; + page_pool_fragment_page(page, BIAS_MAX); return page; } EXPORT_SYMBOL(page_pool_alloc_frag); -- 2.33.0